aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-23 11:47:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-23 11:47:02 -0700
commit5f05647dd81c11a6a165ccc8f0c1370b16f3bcb0 (patch)
tree7851ef1c93aa1aba7ef327ca4b75fd35e6d10f29 /net
parent02f36038c568111ad4fc433f6fa760ff5e38fab4 (diff)
parentec37a48d1d16c30b655ac5280209edf52a6775d4 (diff)
downloadvexpress-lsk-5f05647dd81c11a6a165ccc8f0c1370b16f3bcb0.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1699 commits) bnx2/bnx2x: Unsupported Ethtool operations should return -EINVAL. vlan: Calling vlan_hwaccel_do_receive() is always valid. tproxy: use the interface primary IP address as a default value for --on-ip tproxy: added IPv6 support to the socket match cxgb3: function namespace cleanup tproxy: added IPv6 support to the TPROXY target tproxy: added IPv6 socket lookup function to nf_tproxy_core be2net: Changes to use only priority codes allowed by f/w tproxy: allow non-local binds of IPv6 sockets if IP_TRANSPARENT is enabled tproxy: added tproxy sockopt interface in the IPV6 layer tproxy: added udp6_lib_lookup function tproxy: added const specifiers to udp lookup functions tproxy: split off ipv6 defragmentation to a separate module l2tp: small cleanup nf_nat: restrict ICMP translation for embedded header can: mcp251x: fix generation of error frames can: mcp251x: fix endless loop in interrupt handler if CANINTF_MERRF is set can-raw: add msg_flags to distinguish local traffic 9p: client code cleanup rds: make local functions/variables static ... Fix up conflicts in net/core/dev.c, drivers/net/pcmcia/smc91c92_cs.c and drivers/net/wireless/ath/ath9k/debug.c as per David
Diffstat (limited to 'net')
-rw-r--r--net/802/fc.c2
-rw-r--r--net/802/fddi.c12
-rw-r--r--net/802/hippi.c2
-rw-r--r--net/802/tr.c2
-rw-r--r--net/8021q/vlan.c93
-rw-r--r--net/8021q/vlan.h17
-rw-r--r--net/8021q/vlan_core.c121
-rw-r--r--net/8021q/vlan_dev.c10
-rw-r--r--net/9p/client.c55
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_route.c4
-rw-r--r--net/bluetooth/af_bluetooth.c114
-rw-r--r--net/bluetooth/cmtp/core.c6
-rw-r--r--net/bluetooth/hci_core.c1
-rw-r--r--net/bluetooth/hci_sysfs.c21
-rw-r--r--net/bluetooth/hidp/core.c8
-rw-r--r--net/bluetooth/l2cap.c60
-rw-r--r--net/bluetooth/lib.c4
-rw-r--r--net/bluetooth/rfcomm/core.c43
-rw-r--r--net/bluetooth/rfcomm/sock.c104
-rw-r--r--net/bluetooth/rfcomm/tty.c4
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_if.c29
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_netfilter.c134
-rw-r--r--net/bridge/netfilter/ebt_vlan.c25
-rw-r--r--net/bridge/netfilter/ebtables.c15
-rw-r--r--net/caif/caif_dev.c24
-rw-r--r--net/caif/caif_socket.c27
-rw-r--r--net/caif/cfcnfg.c49
-rw-r--r--net/caif/cfctrl.c59
-rw-r--r--net/caif/cfdbgl.c4
-rw-r--r--net/caif/cfdgml.c11
-rw-r--r--net/caif/cffrml.c14
-rw-r--r--net/caif/cfmuxl.c14
-rw-r--r--net/caif/cfpkt_skbuff.c48
-rw-r--r--net/caif/cfrfml.c12
-rw-r--r--net/caif/cfserl.c4
-rw-r--r--net/caif/cfsrvl.c17
-rw-r--r--net/caif/cfutill.c12
-rw-r--r--net/caif/cfveil.c11
-rw-r--r--net/caif/cfvidl.c6
-rw-r--r--net/caif/chnl_net.c47
-rw-r--r--net/can/raw.c37
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c597
-rw-r--r--net/core/dst.c39
-rw-r--r--net/core/ethtool.c91
-rw-r--r--net/core/fib_rules.c16
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/flow.c82
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/iovec.c6
-rw-r--r--net/core/neighbour.c486
-rw-r--r--net/core/net-sysfs.c39
-rw-r--r--net/core/net-sysfs.h4
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/pktgen.c12
-rw-r--r--net/core/rtnetlink.c39
-rw-r--r--net/core/skbuff.c105
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/utils.c15
-rw-r--r--net/dccp/ccid.h52
-rw-r--r--net/dccp/ccids/Kconfig31
-rw-r--r--net/dccp/ccids/ccid2.c289
-rw-r--r--net/dccp/ccids/ccid2.h35
-rw-r--r--net/dccp/ccids/ccid3.c256
-rw-r--r--net/dccp/ccids/ccid3.h51
-rw-r--r--net/dccp/ccids/lib/loss_interval.c2
-rw-r--r--net/dccp/ccids/lib/packet_history.c39
-rw-r--r--net/dccp/ccids/lib/packet_history.h22
-rw-r--r--net/dccp/ccids/lib/tfrc.h1
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c14
-rw-r--r--net/dccp/dccp.h46
-rw-r--r--net/dccp/feat.c10
-rw-r--r--net/dccp/feat.h1
-rw-r--r--net/dccp/input.c20
-rw-r--r--net/dccp/ipv4.c10
-rw-r--r--net/dccp/ipv6.c10
-rw-r--r--net/dccp/minisocks.c30
-rw-r--r--net/dccp/options.c31
-rw-r--r--net/dccp/output.c20
-rw-r--r--net/dccp/proto.c50
-rw-r--r--net/decnet/dn_neigh.c13
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/decnet/dn_route.c3
-rw-r--r--net/econet/af_econet.c6
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c245
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c11
-rw-r--r--net/ipv4/fib_frontend.c192
-rw-r--r--net/ipv4/fib_hash.c291
-rw-r--r--net/ipv4/fib_lookup.h11
-rw-r--r--net/ipv4/fib_rules.c13
-rw-r--r--net/ipv4/fib_semantics.c297
-rw-r--r--net/ipv4/fib_trie.c84
-rw-r--r--net/ipv4/gre.c151
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/igmp.c22
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_hashtables.c28
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c237
-rw-r--r--net/ipv4/ip_options.c3
-rw-r--r--net/ipv4/ip_output.c24
-rw-r--r--net/ipv4/ipip.c212
-rw-r--r--net/ipv4/ipmr.c428
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/arp_tables.c64
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c84
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c31
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c145
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c51
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c53
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c76
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c27
-rw-r--r--net/ipv4/protocol.c31
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c190
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c55
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c31
-rw-r--r--net/ipv4/tcp_timer.c50
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/tunnel4.c19
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/xfrm4_policy.c4
-rw-r--r--net/ipv4/xfrm4_tunnel.c4
-rw-r--r--net/ipv6/addrconf.c8
-rw-r--r--net/ipv6/addrlabel.c5
-rw-r--r--net/ipv6/af_inet6.c9
-rw-r--r--net/ipv6/datagram.c19
-rw-r--r--net/ipv6/exthdrs_core.c4
-rw-r--r--net/ipv6/fib6_rules.c3
-rw-r--r--net/ipv6/ip6_fib.c9
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_tunnel.c157
-rw-r--r--net/ipv6/ip6mr.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c23
-rw-r--r--net/ipv6/ndisc.c36
-rw-r--r--net/ipv6/netfilter/Kconfig4
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6_tables.c98
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c157
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c78
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c16
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c131
-rw-r--r--net/ipv6/protocol.c32
-rw-r--r--net/ipv6/raw.c12
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c50
-rw-r--r--net/ipv6/sit.c165
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/ipv6/tunnel6.c17
-rw-r--r--net/ipv6/udp.c16
-rw-r--r--net/ipv6/xfrm6_policy.c10
-rw-r--r--net/ipv6/xfrm6_tunnel.c8
-rw-r--r--net/irda/af_irda.c380
-rw-r--r--net/irda/discovery.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c4
-rw-r--r--net/irda/iriap.c3
-rw-r--r--net/irda/irlan/irlan_eth.c32
-rw-r--r--net/irda/irlan/irlan_event.c2
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/irda/irlmp_frame.c2
-rw-r--r--net/irda/irnet/irnet.h2
-rw-r--r--net/irda/irnet/irnet_irda.c22
-rw-r--r--net/irda/irnet/irnet_ppp.c69
-rw-r--r--net/irda/irnet/irnet_ppp.h3
-rw-r--r--net/irda/parameters.c4
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/l2tp/l2tp_ip.c4
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/mac80211/aes_ccm.c6
-rw-r--r--net/mac80211/aes_cmac.c6
-rw-r--r--net/mac80211/agg-rx.c30
-rw-r--r--net/mac80211/agg-tx.c14
-rw-r--r--net/mac80211/cfg.c244
-rw-r--r--net/mac80211/chan.c2
-rw-r--r--net/mac80211/debugfs.c7
-rw-r--r--net/mac80211/debugfs_key.c55
-rw-r--r--net/mac80211/debugfs_netdev.c3
-rw-r--r--net/mac80211/debugfs_sta.c5
-rw-r--r--net/mac80211/driver-ops.h14
-rw-r--r--net/mac80211/driver-trace.h42
-rw-r--r--net/mac80211/ht.c47
-rw-r--r--net/mac80211/ibss.c77
-rw-r--r--net/mac80211/ieee80211_i.h133
-rw-r--r--net/mac80211/iface.c460
-rw-r--r--net/mac80211/key.c168
-rw-r--r--net/mac80211/key.h13
-rw-r--r--net/mac80211/main.c194
-rw-r--r--net/mac80211/mesh_plink.c17
-rw-r--r--net/mac80211/mlme.c173
-rw-r--r--net/mac80211/offchannel.c26
-rw-r--r--net/mac80211/pm.c2
-rw-r--r--net/mac80211/rate.c11
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c7
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c2
-rw-r--r--net/mac80211/rx.c819
-rw-r--r--net/mac80211/scan.c179
-rw-r--r--net/mac80211/sta_info.c52
-rw-r--r--net/mac80211/sta_info.h24
-rw-r--r--net/mac80211/status.c14
-rw-r--r--net/mac80211/tx.c73
-rw-r--r--net/mac80211/util.c102
-rw-r--r--net/mac80211/wep.c10
-rw-r--r--net/mac80211/work.c39
-rw-r--r--net/mac80211/wpa.c34
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipvs/Kconfig20
-rw-r--r--net/netfilter/ipvs/Makefile10
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c286
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c819
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c392
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c194
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c292
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c147
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c169
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c99
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c27
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c52
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c51
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c47
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c46
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c696
-rw-r--r--net/netfilter/nf_conntrack_core.c131
-rw-r--r--net/netfilter/nf_conntrack_expect.c68
-rw-r--r--net/netfilter/nf_conntrack_netlink.c77
-rw-r--r--net/netfilter/nf_conntrack_sip.c42
-rw-r--r--net/netfilter/nf_tproxy_core.c35
-rw-r--r--net/netfilter/x_tables.c12
-rw-r--r--net/netfilter/xt_TPROXY.c366
-rw-r--r--net/netfilter/xt_hashlimit.c15
-rw-r--r--net/netfilter/xt_ipvs.c1
-rw-r--r--net/netfilter/xt_socket.c167
-rw-r--r--net/netlink/genetlink.c14
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/Kconfig12
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c13
-rw-r--r--net/phonet/pep.c388
-rw-r--r--net/phonet/pn_dev.c5
-rw-r--r--net/phonet/socket.c289
-rw-r--r--net/rds/af_rds.c26
-rw-r--r--net/rds/bind.c82
-rw-r--r--net/rds/cong.c8
-rw-r--r--net/rds/connection.c159
-rw-r--r--net/rds/ib.c200
-rw-r--r--net/rds/ib.h104
-rw-r--r--net/rds/ib_cm.c184
-rw-r--r--net/rds/ib_rdma.c318
-rw-r--r--net/rds/ib_recv.c549
-rw-r--r--net/rds/ib_send.c682
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/ib_sysctl.c19
-rw-r--r--net/rds/info.c12
-rw-r--r--net/rds/iw.c8
-rw-r--r--net/rds/iw.h15
-rw-r--r--net/rds/iw_cm.c14
-rw-r--r--net/rds/iw_rdma.c8
-rw-r--r--net/rds/iw_recv.c24
-rw-r--r--net/rds/iw_send.c93
-rw-r--r--net/rds/iw_sysctl.c6
-rw-r--r--net/rds/loop.c31
-rw-r--r--net/rds/message.c142
-rw-r--r--net/rds/page.c8
-rw-r--r--net/rds/rdma.c339
-rw-r--r--net/rds/rdma.h85
-rw-r--r--net/rds/rdma_transport.c44
-rw-r--r--net/rds/rdma_transport.h4
-rw-r--r--net/rds/rds.h192
-rw-r--r--net/rds/recv.c12
-rw-r--r--net/rds/send.c548
-rw-r--r--net/rds/stats.c6
-rw-r--r--net/rds/sysctl.c4
-rw-r--r--net/rds/tcp.c12
-rw-r--r--net/rds/tcp.h9
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rds/tcp_recv.c17
-rw-r--r--net/rds/tcp_send.c68
-rw-r--r--net/rds/threads.c69
-rw-r--r--net/rds/transport.c19
-rw-r--r--net/rds/xlist.h80
-rw-r--r--net/rfkill/input.c2
-rw-r--r--net/rose/rose_link.c4
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_csum.c595
-rw-r--r--net/sched/act_ipt.c14
-rw-r--r--net/sched/cls_flow.c74
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/sch_api.c44
-rw-r--r--net/sched/sch_atm.c5
-rw-r--r--net/sched/sch_cbq.c12
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_fifo.c3
-rw-r--r--net/sched/sch_generic.c24
-rw-r--r--net/sched/sch_hfsc.c8
-rw-r--r--net/sched/sch_htb.c12
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c3
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_sfq.c33
-rw-r--r--net/sched/sch_teql.c8
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/objcnt.c5
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c34
-rw-r--r--net/sctp/probe.c4
-rw-r--r--net/sctp/protocol.c19
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_sideeffect.c21
-rw-r--r--net/sctp/sm_statefuns.c20
-rw-r--r--net/sctp/sm_statetable.c42
-rw-r--r--net/sctp/socket.c85
-rw-r--r--net/sctp/transport.c9
-rw-r--r--net/socket.c37
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c44
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c2
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c2
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/tipc/addr.c7
-rw-r--r--net/tipc/bcast.c51
-rw-r--r--net/tipc/bcast.h3
-rw-r--r--net/tipc/bearer.c42
-rw-r--r--net/tipc/cluster.c21
-rw-r--r--net/tipc/cluster.h2
-rw-r--r--net/tipc/config.c148
-rw-r--r--net/tipc/config.h6
-rw-r--r--net/tipc/core.c38
-rw-r--r--net/tipc/core.h9
-rw-r--r--net/tipc/dbg.c17
-rw-r--r--net/tipc/dbg.h3
-rw-r--r--net/tipc/discover.c44
-rw-r--r--net/tipc/discover.h5
-rw-r--r--net/tipc/eth_media.c48
-rw-r--r--net/tipc/link.c188
-rw-r--r--net/tipc/link.h24
-rw-r--r--net/tipc/msg.c2
-rw-r--r--net/tipc/msg.h6
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/name_table.c67
-rw-r--r--net/tipc/net.c10
-rw-r--r--net/tipc/node.c73
-rw-r--r--net/tipc/node.h3
-rw-r--r--net/tipc/port.c295
-rw-r--r--net/tipc/port.h4
-rw-r--r--net/tipc/ref.c17
-rw-r--r--net/tipc/ref.h1
-rw-r--r--net/tipc/socket.c83
-rw-r--r--net/tipc/subscr.c77
-rw-r--r--net/tipc/subscr.h2
-rw-r--r--net/tipc/zone.c11
-rw-r--r--net/tipc/zone.h1
-rw-r--r--net/unix/af_unix.c10
-rw-r--r--net/wireless/core.c66
-rw-r--r--net/wireless/core.h34
-rw-r--r--net/wireless/ibss.c21
-rw-r--r--net/wireless/mlme.c225
-rw-r--r--net/wireless/nl80211.c2189
-rw-r--r--net/wireless/nl80211.h14
-rw-r--r--net/wireless/radiotap.c61
-rw-r--r--net/wireless/reg.c22
-rw-r--r--net/wireless/scan.c12
-rw-r--r--net/wireless/sme.c11
-rw-r--r--net/wireless/sysfs.c18
-rw-r--r--net/wireless/util.c40
-rw-r--r--net/wireless/wext-compat.c42
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/x25/af_x25.c34
-rw-r--r--net/xfrm/xfrm_policy.c7
398 files changed, 15632 insertions, 10876 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index 34cf1ee014b..1e49f2d4ea9 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -70,7 +70,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
if(daddr)
{
memcpy(fch->daddr,daddr,dev->addr_len);
- return(hdr_len);
+ return hdr_len;
}
return -hdr_len;
}
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 3ef0ab0a543..94b3ad08f39 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -82,10 +82,10 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev,
if (daddr != NULL)
{
memcpy(fddi->daddr, daddr, dev->addr_len);
- return(hl);
+ return hl;
}
- return(-hl);
+ return -hl;
}
@@ -108,7 +108,7 @@ static int fddi_rebuild_header(struct sk_buff *skb)
{
printk("%s: Don't know how to resolve type %04X addresses.\n",
skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
- return(0);
+ return 0;
}
}
@@ -162,7 +162,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
/* Assume 802.2 SNAP frames, for now */
- return(type);
+ return type;
}
EXPORT_SYMBOL(fddi_type_trans);
@@ -170,9 +170,9 @@ EXPORT_SYMBOL(fddi_type_trans);
int fddi_change_mtu(struct net_device *dev, int new_mtu)
{
if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
- return(-EINVAL);
+ return -EINVAL;
dev->mtu = new_mtu;
- return(0);
+ return 0;
}
EXPORT_SYMBOL(fddi_change_mtu);
diff --git a/net/802/hippi.c b/net/802/hippi.c
index cd3e8e92952..91aca8780fd 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -152,7 +152,7 @@ int hippi_change_mtu(struct net_device *dev, int new_mtu)
if ((new_mtu < 68) || (new_mtu > 65280))
return -EINVAL;
dev->mtu = new_mtu;
- return(0);
+ return 0;
}
EXPORT_SYMBOL(hippi_change_mtu);
diff --git a/net/802/tr.c b/net/802/tr.c
index 1c6e596074d..5e20cf8a074 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -145,7 +145,7 @@ static int tr_header(struct sk_buff *skb, struct net_device *dev,
{
memcpy(trh->daddr,daddr,dev->addr_len);
tr_source_route(skb, trh, dev);
- return(hdr_len);
+ return hdr_len;
}
return -hdr_len;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a2ad1525057..05b867e4375 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -44,9 +44,6 @@
int vlan_net_id __read_mostly;
-/* Our listing of VLAN group(s) */
-static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
-
const char vlan_fullname[] = "802.1Q VLAN Support";
const char vlan_version[] = DRV_VERSION;
static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
@@ -59,40 +56,6 @@ static struct packet_type vlan_packet_type __read_mostly = {
/* End of global variables definitions. */
-static inline unsigned int vlan_grp_hashfn(unsigned int idx)
-{
- return ((idx >> VLAN_GRP_HASH_SHIFT) ^ idx) & VLAN_GRP_HASH_MASK;
-}
-
-/* Must be invoked with RCU read lock (no preempt) */
-static struct vlan_group *__vlan_find_group(struct net_device *real_dev)
-{
- struct vlan_group *grp;
- struct hlist_node *n;
- int hash = vlan_grp_hashfn(real_dev->ifindex);
-
- hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) {
- if (grp->real_dev == real_dev)
- return grp;
- }
-
- return NULL;
-}
-
-/* Find the protocol handler. Assumes VID < VLAN_VID_MASK.
- *
- * Must be invoked with RCU read lock (no preempt)
- */
-struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id)
-{
- struct vlan_group *grp = __vlan_find_group(real_dev);
-
- if (grp)
- return vlan_group_get_device(grp, vlan_id);
-
- return NULL;
-}
-
static void vlan_group_free(struct vlan_group *grp)
{
int i;
@@ -111,8 +74,6 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
return NULL;
grp->real_dev = real_dev;
- hlist_add_head_rcu(&grp->hlist,
- &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
return grp;
}
@@ -151,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
ASSERT_RTNL();
- grp = __vlan_find_group(real_dev);
+ grp = real_dev->vlgrp;
BUG_ON(!grp);
/* Take it out of our own structures, but be sure to interlock with
@@ -173,11 +134,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
if (grp->nr_vlans == 0) {
vlan_gvrp_uninit_applicant(real_dev);
- if (real_dev->features & NETIF_F_HW_VLAN_RX)
+ rcu_assign_pointer(real_dev->vlgrp, NULL);
+ if (ops->ndo_vlan_rx_register)
ops->ndo_vlan_rx_register(real_dev, NULL);
- hlist_del_rcu(&grp->hlist);
-
/* Free the group, after all cpu's are done. */
call_rcu(&grp->rcu, vlan_rcu_free);
}
@@ -196,18 +156,13 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
return -EOPNOTSUPP;
}
- if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !ops->ndo_vlan_rx_register) {
- pr_info("8021q: device %s has buggy VLAN hw accel\n", name);
- return -EOPNOTSUPP;
- }
-
if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
(!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) {
pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
return -EOPNOTSUPP;
}
- if (__find_vlan_dev(real_dev, vlan_id) != NULL)
+ if (vlan_find_dev(real_dev, vlan_id) != NULL)
return -EEXIST;
return 0;
@@ -222,7 +177,7 @@ int register_vlan_dev(struct net_device *dev)
struct vlan_group *grp, *ngrp = NULL;
int err;
- grp = __vlan_find_group(real_dev);
+ grp = real_dev->vlgrp;
if (!grp) {
ngrp = grp = vlan_group_alloc(real_dev);
if (!grp)
@@ -252,8 +207,11 @@ int register_vlan_dev(struct net_device *dev)
vlan_group_set_device(grp, vlan_id, dev);
grp->nr_vlans++;
- if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
- ops->ndo_vlan_rx_register(real_dev, ngrp);
+ if (ngrp) {
+ if (ops->ndo_vlan_rx_register)
+ ops->ndo_vlan_rx_register(real_dev, ngrp);
+ rcu_assign_pointer(real_dev->vlgrp, ngrp);
+ }
if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
@@ -264,7 +222,6 @@ out_uninit_applicant:
vlan_gvrp_uninit_applicant(real_dev);
out_free_group:
if (ngrp) {
- hlist_del_rcu(&ngrp->hlist);
/* Free the group, after all cpu's are done. */
call_rcu(&ngrp->rcu, vlan_rcu_free);
}
@@ -321,7 +278,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
if (new_dev == NULL)
return -ENOBUFS;
- new_dev->real_num_tx_queues = real_dev->real_num_tx_queues;
+ netif_copy_real_num_queues(new_dev, real_dev);
dev_net_set(new_dev, net);
/* need 4 bytes for extra VLAN header info,
* hope the underlying device can handle it.
@@ -428,7 +385,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
}
- grp = __vlan_find_group(dev);
+ grp = dev->vlgrp;
if (!grp)
goto out;
@@ -439,7 +396,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
switch (event) {
case NETDEV_CHANGE:
/* Propagate real device state to vlan devices */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -450,7 +407,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_CHANGEADDR:
/* Adjust unicast filters on underlying device */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -464,7 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
break;
case NETDEV_CHANGEMTU:
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -478,7 +435,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_FEAT_CHANGE:
/* Propagate device features to underlying device */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -490,7 +447,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_DOWN:
/* Put all VLANs for this dev in the down state too. */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -508,7 +465,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_UP:
/* Put all VLANs for this dev in the up state too. */
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -525,10 +482,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
break;
case NETDEV_UNREGISTER:
+ /* twiddle thumbs on netns device moves */
+ if (dev->reg_state != NETREG_UNREGISTERING)
+ break;
+
/* Delete all VLANs for this dev. */
grp->killall = 1;
- for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ for (i = 0; i < VLAN_N_VID; i++) {
vlandev = vlan_group_get_device(grp, i);
if (!vlandev)
continue;
@@ -536,7 +497,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
/* unregistration of last vlan destroys group, abort
* afterwards */
if (grp->nr_vlans == 1)
- i = VLAN_GROUP_ARRAY_LEN;
+ i = VLAN_N_VID;
unregister_vlan_dev(vlandev, &list);
}
@@ -742,8 +703,6 @@ err0:
static void __exit vlan_cleanup_module(void)
{
- unsigned int i;
-
vlan_ioctl_set(NULL);
vlan_netlink_fini();
@@ -751,10 +710,6 @@ static void __exit vlan_cleanup_module(void)
dev_remove_pack(&vlan_packet_type);
- /* This table must be empty if there are no module references left. */
- for (i = 0; i < VLAN_GRP_HASH_SIZE; i++)
- BUG_ON(!hlist_empty(&vlan_group_hash[i]));
-
unregister_pernet_subsys(&vlan_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 8d9503ad01d..db01b3181fd 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -72,23 +72,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
return netdev_priv(dev);
}
-#define VLAN_GRP_HASH_SHIFT 5
-#define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT)
-#define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1)
-
-/* Find a VLAN device by the MAC address of its Ethernet device, and
- * it's VLAN ID. The default configuration is to have VLAN's scope
- * to be box-wide, so the MAC will be ignored. The mac will only be
- * looked at if we are configured to have a separate set of VLANs per
- * each MAC addressable interface. Note that this latter option does
- * NOT follow the spec for VLANs, but may be useful for doing very
- * large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs.
- *
- * Must be invoked with rcu_read_lock (ie preempt disabled)
- * or with RTNL.
- */
-struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id);
-
/* found in vlan_dev.c */
int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype, struct net_device *orig_dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 0eb96f7e44b..69b2f79800a 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,53 +4,29 @@
#include <linux/netpoll.h>
#include "vlan.h"
-/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */
-int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
- u16 vlan_tci, int polling)
+bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
{
+ struct sk_buff *skb = *skbp;
+ u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
struct net_device *vlan_dev;
- u16 vlan_id;
-
- if (netpoll_rx(skb))
- return NET_RX_DROP;
-
- if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
- skb->deliver_no_wcard = 1;
+ struct vlan_rx_stats *rx_stats;
- skb->skb_iif = skb->dev->ifindex;
- __vlan_hwaccel_put_tag(skb, vlan_tci);
- vlan_id = vlan_tci & VLAN_VID_MASK;
- vlan_dev = vlan_group_get_device(grp, vlan_id);
-
- if (vlan_dev)
- skb->dev = vlan_dev;
- else if (vlan_id) {
- if (!(skb->dev->flags & IFF_PROMISC))
- goto drop;
- skb->pkt_type = PACKET_OTHERHOST;
+ vlan_dev = vlan_find_dev(skb->dev, vlan_id);
+ if (!vlan_dev) {
+ if (vlan_id)
+ skb->pkt_type = PACKET_OTHERHOST;
+ return false;
}
- return (polling ? netif_receive_skb(skb) : netif_rx(skb));
+ skb = *skbp = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return false;
-drop:
- dev_kfree_skb_any(skb);
- return NET_RX_DROP;
-}
-EXPORT_SYMBOL(__vlan_hwaccel_rx);
-
-int vlan_hwaccel_do_receive(struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- struct vlan_rx_stats *rx_stats;
-
- skb->dev = vlan_dev_info(dev)->real_dev;
- netif_nit_deliver(skb);
-
- skb->dev = dev;
- skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
+ skb->dev = vlan_dev;
+ skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
skb->vlan_tci = 0;
- rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats);
+ rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats);
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->rx_packets++;
@@ -67,12 +43,13 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
* This allows the VLAN to have a different MAC than the
* underlying device, and still route correctly. */
if (!compare_ether_addr(eth_hdr(skb)->h_dest,
- dev->dev_addr))
+ vlan_dev->dev_addr))
skb->pkt_type = PACKET_HOST;
break;
}
u64_stats_update_end(&rx_stats->syncp);
- return 0;
+
+ return true;
}
struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -87,71 +64,27 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
}
EXPORT_SYMBOL(vlan_dev_vlan_id);
-static gro_result_t
-vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
- unsigned int vlan_tci, struct sk_buff *skb)
+/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */
+int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
+ u16 vlan_tci, int polling)
{
- struct sk_buff *p;
- struct net_device *vlan_dev;
- u16 vlan_id;
-
- if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
- skb->deliver_no_wcard = 1;
-
- skb->skb_iif = skb->dev->ifindex;
__vlan_hwaccel_put_tag(skb, vlan_tci);
- vlan_id = vlan_tci & VLAN_VID_MASK;
- vlan_dev = vlan_group_get_device(grp, vlan_id);
-
- if (vlan_dev)
- skb->dev = vlan_dev;
- else if (vlan_id) {
- if (!(skb->dev->flags & IFF_PROMISC))
- goto drop;
- skb->pkt_type = PACKET_OTHERHOST;
- }
-
- for (p = napi->gro_list; p; p = p->next) {
- NAPI_GRO_CB(p)->same_flow =
- p->dev == skb->dev && !compare_ether_header(
- skb_mac_header(p), skb_gro_mac_header(skb));
- NAPI_GRO_CB(p)->flush = 0;
- }
-
- return dev_gro_receive(napi, skb);
-
-drop:
- return GRO_DROP;
+ return polling ? netif_receive_skb(skb) : netif_rx(skb);
}
+EXPORT_SYMBOL(__vlan_hwaccel_rx);
gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
unsigned int vlan_tci, struct sk_buff *skb)
{
- if (netpoll_rx_on(skb))
- return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
- ? GRO_DROP : GRO_NORMAL;
-
- skb_gro_reset_offset(skb);
-
- return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
+ __vlan_hwaccel_put_tag(skb, vlan_tci);
+ return napi_gro_receive(napi, skb);
}
EXPORT_SYMBOL(vlan_gro_receive);
gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
unsigned int vlan_tci)
{
- struct sk_buff *skb = napi_frags_skb(napi);
-
- if (!skb)
- return GRO_DROP;
-
- if (netpoll_rx_on(skb)) {
- skb->protocol = eth_type_trans(skb, skb->dev);
- return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
- ? GRO_DROP : GRO_NORMAL;
- }
-
- return napi_frags_finish(napi, skb,
- vlan_gro_common(napi, grp, vlan_tci, skb));
+ __vlan_hwaccel_put_tag(napi->skb, vlan_tci);
+ return napi_gro_frags(napi);
}
EXPORT_SYMBOL(vlan_gro_frags);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3bccdd12a26..14e3d1fa07a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -158,7 +158,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
vlan_id = vlan_tci & VLAN_VID_MASK;
rcu_read_lock();
- vlan_dev = __find_vlan_dev(dev, vlan_id);
+ vlan_dev = vlan_find_dev(dev, vlan_id);
/* If the VLAN device is defined, we use it.
* If not, and the VID is 0, it is a 802.1p packet (not
@@ -177,8 +177,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
} else {
skb->dev = vlan_dev;
- rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
- smp_processor_id());
+ rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats);
+
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->rx_packets++;
rx_stats->rx_bytes += skb->len;
@@ -226,12 +226,14 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
}
netif_rx(skb);
+
rcu_read_unlock();
return NET_RX_SUCCESS;
err_unlock:
rcu_read_unlock();
err_free:
+ atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -843,7 +845,7 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
accum.rx_packets += rxpackets;
accum.rx_bytes += rxbytes;
accum.rx_multicast += rxmulticast;
- /* rx_errors is an ulong, not protected by syncp */
+ /* rx_errors is ulong, not protected by syncp */
accum.rx_errors += p->rx_errors;
}
stats->rx_packets = accum.rx_packets;
diff --git a/net/9p/client.c b/net/9p/client.c
index 9eb72505308..83bf0541d66 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -61,13 +61,13 @@ static const match_table_t tokens = {
inline int p9_is_proto_dotl(struct p9_client *clnt)
{
- return (clnt->proto_version == p9_proto_2000L);
+ return clnt->proto_version == p9_proto_2000L;
}
EXPORT_SYMBOL(p9_is_proto_dotl);
inline int p9_is_proto_dotu(struct p9_client *clnt)
{
- return (clnt->proto_version == p9_proto_2000u);
+ return clnt->proto_version == p9_proto_2000u;
}
EXPORT_SYMBOL(p9_is_proto_dotu);
@@ -671,7 +671,7 @@ static void p9_fid_destroy(struct p9_fid *fid)
kfree(fid);
}
-int p9_client_version(struct p9_client *c)
+static int p9_client_version(struct p9_client *c)
{
int err = 0;
struct p9_req_t *req;
@@ -730,7 +730,6 @@ error:
return err;
}
-EXPORT_SYMBOL(p9_client_version);
struct p9_client *p9_client_create(const char *dev_name, char *options)
{
@@ -887,54 +886,6 @@ error:
}
EXPORT_SYMBOL(p9_client_attach);
-struct p9_fid *
-p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
-{
- int err;
- struct p9_req_t *req;
- struct p9_qid qid;
- struct p9_fid *afid;
-
- P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname);
- err = 0;
-
- afid = p9_fid_create(clnt);
- if (IS_ERR(afid)) {
- err = PTR_ERR(afid);
- afid = NULL;
- goto error;
- }
-
- req = p9_client_rpc(clnt, P9_TAUTH, "dss?d",
- afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
- if (IS_ERR(req)) {
- err = PTR_ERR(req);
- goto error;
- }
-
- err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
- if (err) {
- p9pdu_dump(1, req->rc);
- p9_free_req(clnt, req);
- goto error;
- }
-
- P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
- qid.type,
- (unsigned long long)qid.path,
- qid.version);
-
- memmove(&afid->qid, &qid, sizeof(struct p9_qid));
- p9_free_req(clnt, req);
- return afid;
-
-error:
- if (afid)
- p9_fid_destroy(afid);
- return ERR_PTR(err);
-}
-EXPORT_SYMBOL(p9_client_auth);
-
struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
int clone)
{
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c85109d809c..078eb162d9b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -222,7 +222,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
}
}
-static unsigned int
+static int
p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
{
int ret, n;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 95fdd118506..ff956d1115b 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -310,9 +310,9 @@ static int clip_constructor(struct neighbour *neigh)
return 0;
}
-static u32 clip_hash(const void *pkey, const struct net_device *dev)
+static u32 clip_hash(const void *pkey, const struct net_device *dev, __u32 rnd)
{
- return jhash_2words(*(u32 *) pkey, dev->ifindex, clip_tbl.hash_rnd);
+ return jhash_2words(*(u32 *) pkey, dev->ifindex, rnd);
}
static struct neigh_table clip_tbl = {
diff --git a/net/atm/common.c b/net/atm/common.c
index 940404a73b3..1b9c52a02cd 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -792,7 +792,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
default:
if (level == SOL_SOCKET)
return -EINVAL;
- break;
+ break;
}
if (!vcc->dev || !vcc->dev->ops->getsockopt)
return -EINVAL;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d98bde1a0ac..181d70c73d7 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -220,7 +220,6 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
static int lec_open(struct net_device *dev)
{
netif_start_queue(dev);
- memset(&dev->stats, 0, sizeof(struct net_device_stats));
return 0;
}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index cfdfd7e2a17..26eaebf4aaa 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1103,7 +1103,7 @@ done:
out:
release_sock(sk);
- return 0;
+ return err;
}
/*
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 7805945a5fd..a1690845dc6 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -412,7 +412,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
{
ax25_uid_assoc *user;
ax25_route *ax25_rt;
- int err;
+ int err = 0;
if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
return -EHOSTUNREACH;
@@ -453,7 +453,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
put:
ax25_put_route(ax25_rt);
- return 0;
+ return err;
}
struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 421c45bd1b9..c4cf3f59500 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -265,6 +265,115 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
}
EXPORT_SYMBOL(bt_sock_recvmsg);
+static long bt_sock_data_wait(struct sock *sk, long timeo)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ break;
+
+ if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN))
+ break;
+
+ if (signal_pending(current) || !timeo)
+ break;
+
+ set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ release_sock(sk);
+ timeo = schedule_timeout(timeo);
+ lock_sock(sk);
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ }
+
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return timeo;
+}
+
+int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *msg, size_t size, int flags)
+{
+ struct sock *sk = sock->sk;
+ int err = 0;
+ size_t target, copied = 0;
+ long timeo;
+
+ if (flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ msg->msg_namelen = 0;
+
+ BT_DBG("sk %p size %zu", sk, size);
+
+ lock_sock(sk);
+
+ target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+ do {
+ struct sk_buff *skb;
+ int chunk;
+
+ skb = skb_dequeue(&sk->sk_receive_queue);
+ if (!skb) {
+ if (copied >= target)
+ break;
+
+ if ((err = sock_error(sk)) != 0)
+ break;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ break;
+
+ err = -EAGAIN;
+ if (!timeo)
+ break;
+
+ timeo = bt_sock_data_wait(sk, timeo);
+
+ if (signal_pending(current)) {
+ err = sock_intr_errno(timeo);
+ goto out;
+ }
+ continue;
+ }
+
+ chunk = min_t(unsigned int, skb->len, size);
+ if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+ skb_queue_head(&sk->sk_receive_queue, skb);
+ if (!copied)
+ copied = -EFAULT;
+ break;
+ }
+ copied += chunk;
+ size -= chunk;
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+
+ if (!(flags & MSG_PEEK)) {
+ skb_pull(skb, chunk);
+ if (skb->len) {
+ skb_queue_head(&sk->sk_receive_queue, skb);
+ break;
+ }
+ kfree_skb(skb);
+
+ } else {
+ /* put message back and return */
+ skb_queue_head(&sk->sk_receive_queue, skb);
+ break;
+ }
+ } while (size);
+
+out:
+ release_sock(sk);
+ return copied ? : err;
+}
+EXPORT_SYMBOL(bt_sock_stream_recvmsg);
+
static inline unsigned int bt_accept_poll(struct sock *parent)
{
struct list_head *p, *n;
@@ -297,13 +406,12 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
mask |= POLLERR;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP;
+ mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sk->sk_shutdown & RCV_SHUTDOWN))
+ if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
if (sk->sk_state == BT_CLOSED)
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index d4c6af082d4..ec0a1347f93 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -321,14 +321,10 @@ static int cmtp_session(void *arg)
int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
{
struct cmtp_session *session, *s;
- bdaddr_t src, dst;
int i, err;
BT_DBG("");
- baswap(&src, &bt_sk(sock->sk)->src);
- baswap(&dst, &bt_sk(sock->sk)->dst);
-
session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL);
if (!session)
return -ENOMEM;
@@ -347,7 +343,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
BT_DBG("mtu %d", session->mtu);
- sprintf(session->name, "%s", batostr(&dst));
+ sprintf(session->name, "%s", batostr(&bt_sk(sock->sk)->dst));
session->sock = sock;
session->state = BT_CONFIG;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c52f091ee6d..bc2a052e518 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -562,7 +562,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hci_dev_lock_bh(hdev);
inquiry_cache_flush(hdev);
hci_conn_hash_flush(hdev);
- hci_blacklist_clear(hdev);
hci_dev_unlock_bh(hdev);
hci_notify(hdev, HCI_DEV_DOWN);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 8fb967beee8..5fce3d6d07b 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -37,9 +37,7 @@ static ssize_t show_link_type(struct device *dev, struct device_attribute *attr,
static ssize_t show_link_address(struct device *dev, struct device_attribute *attr, char *buf)
{
struct hci_conn *conn = dev_get_drvdata(dev);
- bdaddr_t bdaddr;
- baswap(&bdaddr, &conn->dst);
- return sprintf(buf, "%s\n", batostr(&bdaddr));
+ return sprintf(buf, "%s\n", batostr(&conn->dst));
}
static ssize_t show_link_features(struct device *dev, struct device_attribute *attr, char *buf)
@@ -196,8 +194,8 @@ static inline char *host_typetostr(int type)
switch (type) {
case HCI_BREDR:
return "BR/EDR";
- case HCI_80211:
- return "802.11";
+ case HCI_AMP:
+ return "AMP";
default:
return "UNKNOWN";
}
@@ -238,9 +236,7 @@ static ssize_t show_class(struct device *dev, struct device_attribute *attr, cha
static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf)
{
struct hci_dev *hdev = dev_get_drvdata(dev);
- bdaddr_t bdaddr;
- baswap(&bdaddr, &hdev->bdaddr);
- return sprintf(buf, "%s\n", batostr(&bdaddr));
+ return sprintf(buf, "%s\n", batostr(&hdev->bdaddr));
}
static ssize_t show_features(struct device *dev, struct device_attribute *attr, char *buf)
@@ -408,10 +404,8 @@ static int inquiry_cache_show(struct seq_file *f, void *p)
for (e = cache->list; e; e = e->next) {
struct inquiry_data *data = &e->data;
- bdaddr_t bdaddr;
- baswap(&bdaddr, &data->bdaddr);
seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
- batostr(&bdaddr),
+ batostr(&data->bdaddr),
data->pscan_rep_mode, data->pscan_period_mode,
data->pscan_mode, data->dev_class[2],
data->dev_class[1], data->dev_class[0],
@@ -445,13 +439,10 @@ static int blacklist_show(struct seq_file *f, void *p)
list_for_each(l, &hdev->blacklist) {
struct bdaddr_list *b;
- bdaddr_t bdaddr;
b = list_entry(l, struct bdaddr_list, list);
- baswap(&bdaddr, &b->bdaddr);
-
- seq_printf(f, "%s\n", batostr(&bdaddr));
+ seq_printf(f, "%s\n", batostr(&b->bdaddr));
}
hci_dev_unlock_bh(hdev);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index bfe641b7dfa..c0ee8b3928e 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -758,7 +758,6 @@ static int hidp_setup_hid(struct hidp_session *session,
struct hidp_connadd_req *req)
{
struct hid_device *hid;
- bdaddr_t src, dst;
int err;
session->rd_data = kzalloc(req->rd_size, GFP_KERNEL);
@@ -781,9 +780,6 @@ static int hidp_setup_hid(struct hidp_session *session,
hid->driver_data = session;
- baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
- baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
-
hid->bus = BUS_BLUETOOTH;
hid->vendor = req->vendor;
hid->product = req->product;
@@ -791,8 +787,8 @@ static int hidp_setup_hid(struct hidp_session *session,
hid->country = req->country;
strncpy(hid->name, req->name, 128);
- strncpy(hid->phys, batostr(&src), 64);
- strncpy(hid->uniq, batostr(&dst), 64);
+ strncpy(hid->phys, batostr(&bt_sk(session->ctrl_sock->sk)->src), 64);
+ strncpy(hid->uniq, batostr(&bt_sk(session->ctrl_sock->sk)->dst), 64);
hid->dev.parent = hidp_get_device(session);
hid->ll_driver = &hidp_hid_driver;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0b54b7dd840..daa7a988d9a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1008,10 +1008,20 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
goto done;
}
- if (la.l2_psm && __le16_to_cpu(la.l2_psm) < 0x1001 &&
- !capable(CAP_NET_BIND_SERVICE)) {
- err = -EACCES;
- goto done;
+ if (la.l2_psm) {
+ __u16 psm = __le16_to_cpu(la.l2_psm);
+
+ /* PSM must be odd and lsb of upper byte must be 0 */
+ if ((psm & 0x0101) != 0x0001) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ /* Restrict usage of well-known PSMs */
+ if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) {
+ err = -EACCES;
+ goto done;
+ }
}
write_lock_bh(&l2cap_sk_list.lock);
@@ -1190,6 +1200,13 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
goto done;
}
+ /* PSM must be odd and lsb of upper byte must be 0 */
+ if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
+ sk->sk_type != SOCK_RAW) {
+ err = -EINVAL;
+ goto done;
+ }
+
/* Set destination address and psm */
bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
l2cap_pi(sk)->psm = la.l2_psm;
@@ -1635,7 +1652,7 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in
*frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err);
if (!*frag)
- return -EFAULT;
+ return err;
if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count))
return -EFAULT;
@@ -1661,7 +1678,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr
skb = bt_skb_send_alloc(sk, count + hlen,
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(err);
/* Create L2CAP header */
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
@@ -1690,7 +1707,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *ms
skb = bt_skb_send_alloc(sk, count + hlen,
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(err);
/* Create L2CAP header */
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
@@ -1727,7 +1744,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
skb = bt_skb_send_alloc(sk, count + hlen,
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(err);
/* Create L2CAP header */
lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
@@ -1934,6 +1951,9 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
release_sock(sk);
+ if (sock->type == SOCK_STREAM)
+ return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
+
return bt_sock_recvmsg(iocb, sock, msg, len, flags);
}
@@ -2891,7 +2911,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
struct l2cap_chan_list *list = &conn->chan_list;
struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
struct l2cap_conn_rsp rsp;
- struct sock *parent, *uninitialized_var(sk);
+ struct sock *parent, *sk = NULL;
int result, status = L2CAP_CS_NO_INFO;
u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -3000,7 +3020,7 @@ sendresp:
L2CAP_INFO_REQ, sizeof(info), &info);
}
- if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) &&
+ if (sk && !(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) &&
result == L2CAP_CR_SUCCESS) {
u8 buf[128];
l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
@@ -3151,6 +3171,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) {
u8 buf[64];
+ l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
l2cap_build_conf_req(sk, buf), buf);
l2cap_pi(sk)->num_conf_req++;
@@ -4643,6 +4664,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
if (flags & ACL_START) {
struct l2cap_hdr *hdr;
+ struct sock *sk;
+ u16 cid;
int len;
if (conn->rx_len) {
@@ -4653,7 +4676,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
l2cap_conn_unreliable(conn, ECOMM);
}
- if (skb->len < 2) {
+ /* Start fragment always begin with Basic L2CAP header */
+ if (skb->len < L2CAP_HDR_SIZE) {
BT_ERR("Frame is too short (len %d)", skb->len);
l2cap_conn_unreliable(conn, ECOMM);
goto drop;
@@ -4661,6 +4685,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
hdr = (struct l2cap_hdr *) skb->data;
len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE;
+ cid = __le16_to_cpu(hdr->cid);
if (len == skb->len) {
/* Complete frame received */
@@ -4677,6 +4702,19 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
goto drop;
}
+ sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
+
+ if (sk && l2cap_pi(sk)->imtu < len - L2CAP_HDR_SIZE) {
+ BT_ERR("Frame exceeding recv MTU (len %d, MTU %d)",
+ len, l2cap_pi(sk)->imtu);
+ bh_unlock_sock(sk);
+ l2cap_conn_unreliable(conn, ECOMM);
+ goto drop;
+ }
+
+ if (sk)
+ bh_unlock_sock(sk);
+
/* Allocate skb for the complete frame (with header) */
conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC);
if (!conn->rx_skb)
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index ad2af5814e4..b826d1bf10d 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -51,8 +51,8 @@ char *batostr(bdaddr_t *ba)
i ^= 1;
sprintf(str[i], "%2.2X:%2.2X:%2.2X:%2.2X:%2.2X:%2.2X",
- ba->b[0], ba->b[1], ba->b[2],
- ba->b[3], ba->b[4], ba->b[5]);
+ ba->b[5], ba->b[4], ba->b[3],
+ ba->b[2], ba->b[1], ba->b[0]);
return str[i];
}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7dca91bb8c5..39a5d87e33b 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -113,11 +113,10 @@ static void rfcomm_session_del(struct rfcomm_session *s);
#define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1)
#define __get_rpn_parity(line) (((line) >> 3) & 0x7)
-static inline void rfcomm_schedule(uint event)
+static inline void rfcomm_schedule(void)
{
if (!rfcomm_thread)
return;
- //set_bit(event, &rfcomm_event);
set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
wake_up_process(rfcomm_thread);
}
@@ -179,13 +178,13 @@ static unsigned char rfcomm_crc_table[256] = {
/* FCS on 2 bytes */
static inline u8 __fcs(u8 *data)
{
- return (0xff - __crc(data));
+ return 0xff - __crc(data);
}
/* FCS on 3 bytes */
static inline u8 __fcs2(u8 *data)
{
- return (0xff - rfcomm_crc_table[__crc(data) ^ data[2]]);
+ return 0xff - rfcomm_crc_table[__crc(data) ^ data[2]];
}
/* Check FCS */
@@ -203,13 +202,13 @@ static inline int __check_fcs(u8 *data, int type, u8 fcs)
static void rfcomm_l2state_change(struct sock *sk)
{
BT_DBG("%p state %d", sk, sk->sk_state);
- rfcomm_schedule(RFCOMM_SCHED_STATE);
+ rfcomm_schedule();
}
static void rfcomm_l2data_ready(struct sock *sk, int bytes)
{
BT_DBG("%p bytes %d", sk, bytes);
- rfcomm_schedule(RFCOMM_SCHED_RX);
+ rfcomm_schedule();
}
static int rfcomm_l2sock_create(struct socket **sock)
@@ -255,7 +254,7 @@ static void rfcomm_session_timeout(unsigned long arg)
BT_DBG("session %p state %ld", s, s->state);
set_bit(RFCOMM_TIMED_OUT, &s->flags);
- rfcomm_schedule(RFCOMM_SCHED_TIMEO);
+ rfcomm_schedule();
}
static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
@@ -283,7 +282,7 @@ static void rfcomm_dlc_timeout(unsigned long arg)
set_bit(RFCOMM_TIMED_OUT, &d->flags);
rfcomm_dlc_put(d);
- rfcomm_schedule(RFCOMM_SCHED_TIMEO);
+ rfcomm_schedule();
}
static void rfcomm_dlc_set_timer(struct rfcomm_dlc *d, long timeout)
@@ -465,7 +464,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
case BT_CONFIG:
if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
set_bit(RFCOMM_AUTH_REJECT, &d->flags);
- rfcomm_schedule(RFCOMM_SCHED_AUTH);
+ rfcomm_schedule();
break;
}
/* Fall through */
@@ -485,7 +484,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
case BT_CONNECT2:
if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
set_bit(RFCOMM_AUTH_REJECT, &d->flags);
- rfcomm_schedule(RFCOMM_SCHED_AUTH);
+ rfcomm_schedule();
break;
}
/* Fall through */
@@ -533,7 +532,7 @@ int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
skb_queue_tail(&d->tx_queue, skb);
if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags))
- rfcomm_schedule(RFCOMM_SCHED_TX);
+ rfcomm_schedule();
return len;
}
@@ -545,7 +544,7 @@ void __rfcomm_dlc_throttle(struct rfcomm_dlc *d)
d->v24_sig |= RFCOMM_V24_FC;
set_bit(RFCOMM_MSC_PENDING, &d->flags);
}
- rfcomm_schedule(RFCOMM_SCHED_TX);
+ rfcomm_schedule();
}
void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
@@ -556,7 +555,7 @@ void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
d->v24_sig &= ~RFCOMM_V24_FC;
set_bit(RFCOMM_MSC_PENDING, &d->flags);
}
- rfcomm_schedule(RFCOMM_SCHED_TX);
+ rfcomm_schedule();
}
/*
@@ -577,7 +576,7 @@ int rfcomm_dlc_set_modem_status(struct rfcomm_dlc *d, u8 v24_sig)
d->v24_sig = v24_sig;
if (!test_and_set_bit(RFCOMM_MSC_PENDING, &d->flags))
- rfcomm_schedule(RFCOMM_SCHED_TX);
+ rfcomm_schedule();
return 0;
}
@@ -816,7 +815,7 @@ static int rfcomm_queue_disc(struct rfcomm_dlc *d)
cmd->fcs = __fcs2((u8 *) cmd);
skb_queue_tail(&d->tx_queue, skb);
- rfcomm_schedule(RFCOMM_SCHED_TX);
+ rfcomm_schedule();
return 0;
}
@@ -1415,8 +1414,8 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
return 0;
if (len == 1) {
- /* This is a request, return default settings */
- bit_rate = RFCOMM_RPN_BR_115200;
+ /* This is a request, return default (according to ETSI TS 07.10) settings */
+ bit_rate = RFCOMM_RPN_BR_9600;
data_bits = RFCOMM_RPN_DATA_8;
stop_bits = RFCOMM_RPN_STOP_1;
parity = RFCOMM_RPN_PARITY_NONE;
@@ -1431,9 +1430,9 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_BITRATE)) {
bit_rate = rpn->bit_rate;
- if (bit_rate != RFCOMM_RPN_BR_115200) {
+ if (bit_rate > RFCOMM_RPN_BR_230400) {
BT_DBG("RPN bit rate mismatch 0x%x", bit_rate);
- bit_rate = RFCOMM_RPN_BR_115200;
+ bit_rate = RFCOMM_RPN_BR_9600;
rpn_mask ^= RFCOMM_RPN_PM_BITRATE;
}
}
@@ -1698,7 +1697,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
break;
default:
- BT_ERR("Unknown packet type 0x%02x\n", type);
+ BT_ERR("Unknown packet type 0x%02x", type);
break;
}
kfree_skb(skb);
@@ -1884,7 +1883,7 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s)
* L2CAP MTU minus UIH header and FCS. */
s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5;
- rfcomm_schedule(RFCOMM_SCHED_RX);
+ rfcomm_schedule();
} else
sock_release(nsock);
}
@@ -2093,7 +2092,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
rfcomm_session_put(s);
- rfcomm_schedule(RFCOMM_SCHED_AUTH);
+ rfcomm_schedule();
}
static struct hci_cb rfcomm_cb = {
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 194b3a04cfd..aec505f934d 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -621,121 +621,29 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
return sent;
}
-static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(sk_sleep(sk), &wait);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- sk->sk_err ||
- (sk->sk_shutdown & RCV_SHUTDOWN) ||
- signal_pending(current) ||
- !timeo)
- break;
-
- set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
- release_sock(sk);
- timeo = schedule_timeout(timeo);
- lock_sock(sk);
- clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
- }
-
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(sk_sleep(sk), &wait);
- return timeo;
-}
-
static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
struct sock *sk = sock->sk;
struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
- int err = 0;
- size_t target, copied = 0;
- long timeo;
+ int len;
if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
rfcomm_dlc_accept(d);
return 0;
}
- if (flags & MSG_OOB)
- return -EOPNOTSUPP;
-
- msg->msg_namelen = 0;
-
- BT_DBG("sk %p size %zu", sk, size);
+ len = bt_sock_stream_recvmsg(iocb, sock, msg, size, flags);
lock_sock(sk);
+ if (!(flags & MSG_PEEK) && len > 0)
+ atomic_sub(len, &sk->sk_rmem_alloc);
- target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- do {
- struct sk_buff *skb;
- int chunk;
-
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (!skb) {
- if (copied >= target)
- break;
-
- if ((err = sock_error(sk)) != 0)
- break;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- break;
-
- err = -EAGAIN;
- if (!timeo)
- break;
-
- timeo = rfcomm_sock_data_wait(sk, timeo);
-
- if (signal_pending(current)) {
- err = sock_intr_errno(timeo);
- goto out;
- }
- continue;
- }
-
- chunk = min_t(unsigned int, skb->len, size);
- if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
- skb_queue_head(&sk->sk_receive_queue, skb);
- if (!copied)
- copied = -EFAULT;
- break;
- }
- copied += chunk;
- size -= chunk;
-
- sock_recv_ts_and_drops(msg, sk, skb);
-
- if (!(flags & MSG_PEEK)) {
- atomic_sub(chunk, &sk->sk_rmem_alloc);
-
- skb_pull(skb, chunk);
- if (skb->len) {
- skb_queue_head(&sk->sk_receive_queue, skb);
- break;
- }
- kfree_skb(skb);
-
- } else {
- /* put message back and return */
- skb_queue_head(&sk->sk_receive_queue, skb);
- break;
- }
- } while (size);
-
-out:
if (atomic_read(&sk->sk_rmem_alloc) <= (sk->sk_rcvbuf >> 2))
rfcomm_dlc_unthrottle(rfcomm_pi(sk)->dlc);
-
release_sock(sk);
- return copied ? : err;
+
+ return len;
}
static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 84c2a4d013c..a9b81f5dacd 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -183,9 +183,7 @@ static struct device *rfcomm_get_device(struct rfcomm_dev *dev)
static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf)
{
struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
- bdaddr_t bdaddr;
- baswap(&bdaddr, &dev->dst);
- return sprintf(buf, "%s\n", batostr(&bdaddr));
+ return sprintf(buf, "%s\n", batostr(&dev->dst));
}
static ssize_t show_channel(struct device *tty_dev, struct device_attribute *attr, char *buf)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index cf09fe591fc..17cb0b63357 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -212,6 +212,11 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
return 0;
}
+static int br_set_flags(struct net_device *netdev, u32 data)
+{
+ return ethtool_op_set_flags(netdev, data, ETH_FLAG_TXVLAN);
+}
+
#ifdef CONFIG_NET_POLL_CONTROLLER
static void br_poll_controller(struct net_device *br_dev)
{
@@ -304,6 +309,7 @@ static const struct ethtool_ops br_ethtool_ops = {
.get_ufo = ethtool_op_get_ufo,
.set_ufo = ethtool_op_set_ufo,
.get_flags = ethtool_op_get_flags,
+ .set_flags = br_set_flags,
};
static const struct net_device_ops br_netdev_ops = {
@@ -343,5 +349,5 @@ void br_dev_setup(struct net_device *dev)
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
- NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
+ NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX;
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c03d2c3ff03..89ad25a7620 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -61,30 +61,27 @@ static int port_cost(struct net_device *dev)
}
-/*
- * Check for port carrier transistions.
- * Called from work queue to allow for calling functions that
- * might sleep (such as speed check), and to debounce.
- */
+/* Check for port carrier transistions. */
void br_port_carrier_check(struct net_bridge_port *p)
{
struct net_device *dev = p->dev;
struct net_bridge *br = p->br;
- if (netif_carrier_ok(dev))
+ if (netif_running(dev) && netif_carrier_ok(dev))
p->path_cost = port_cost(dev);
- if (netif_running(br->dev)) {
- spin_lock_bh(&br->lock);
- if (netif_carrier_ok(dev)) {
- if (p->state == BR_STATE_DISABLED)
- br_stp_enable_port(p);
- } else {
- if (p->state != BR_STATE_DISABLED)
- br_stp_disable_port(p);
- }
- spin_unlock_bh(&br->lock);
+ if (!netif_running(br->dev))
+ return;
+
+ spin_lock_bh(&br->lock);
+ if (netif_running(dev) && netif_carrier_ok(dev)) {
+ if (p->state == BR_STATE_DISABLED)
+ br_stp_enable_port(p);
+ } else {
+ if (p->state != BR_STATE_DISABLED)
+ br_stp_disable_port(p);
}
+ spin_unlock_bh(&br->lock);
}
static void release_nbp(struct kobject *kobj)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 826cd522153..25207a1f182 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -141,7 +141,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
const unsigned char *dest = eth_hdr(skb)->h_dest;
int (*rhook)(struct sk_buff *skb);
- if (skb->pkt_type == PACKET_LOOPBACK)
+ if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return skb;
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
@@ -159,7 +159,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
goto drop;
/* If STP is turned off, then forward */
- if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
+ if (p->br->stp_enabled == BR_NO_STP)
goto forward;
if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 137f23259a9..865fd7634b6 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -64,22 +64,24 @@ static int brnf_filter_pppoe_tagged __read_mostly = 0;
static inline __be16 vlan_proto(const struct sk_buff *skb)
{
- return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+ if (vlan_tx_tag_present(skb))
+ return skb->protocol;
+ else if (skb->protocol == htons(ETH_P_8021Q))
+ return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+ else
+ return 0;
}
#define IS_VLAN_IP(skb) \
- (skb->protocol == htons(ETH_P_8021Q) && \
- vlan_proto(skb) == htons(ETH_P_IP) && \
+ (vlan_proto(skb) == htons(ETH_P_IP) && \
brnf_filter_vlan_tagged)
#define IS_VLAN_IPV6(skb) \
- (skb->protocol == htons(ETH_P_8021Q) && \
- vlan_proto(skb) == htons(ETH_P_IPV6) &&\
+ (vlan_proto(skb) == htons(ETH_P_IPV6) && \
brnf_filter_vlan_tagged)
#define IS_VLAN_ARP(skb) \
- (skb->protocol == htons(ETH_P_8021Q) && \
- vlan_proto(skb) == htons(ETH_P_ARP) && \
+ (vlan_proto(skb) == htons(ETH_P_ARP) && \
brnf_filter_vlan_tagged)
static inline __be16 pppoe_proto(const struct sk_buff *skb)
@@ -106,7 +108,6 @@ static struct dst_ops fake_dst_ops = {
.family = AF_INET,
.protocol = cpu_to_be16(ETH_P_IP),
.update_pmtu = fake_update_pmtu,
- .entries = ATOMIC_INIT(0),
};
/*
@@ -209,6 +210,72 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb)
skb->protocol = htons(ETH_P_PPP_SES);
}
+/* When handing a packet over to the IP layer
+ * check whether we have a skb that is in the
+ * expected format
+ */
+
+static int br_parse_ip_options(struct sk_buff *skb)
+{
+ struct ip_options *opt;
+ struct iphdr *iph;
+ struct net_device *dev = skb->dev;
+ u32 len;
+
+ iph = ip_hdr(skb);
+ opt = &(IPCB(skb)->opt);
+
+ /* Basic sanity checks */
+ if (iph->ihl < 5 || iph->version != 4)
+ goto inhdr_error;
+
+ if (!pskb_may_pull(skb, iph->ihl*4))
+ goto inhdr_error;
+
+ iph = ip_hdr(skb);
+ if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+ goto inhdr_error;
+
+ len = ntohs(iph->tot_len);
+ if (skb->len < len) {
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ } else if (len < (iph->ihl*4))
+ goto inhdr_error;
+
+ if (pskb_trim_rcsum(skb, len)) {
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+
+ /* Zero out the CB buffer if no options present */
+ if (iph->ihl == 5) {
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ return 0;
+ }
+
+ opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
+ if (ip_options_compile(dev_net(dev), opt, skb))
+ goto inhdr_error;
+
+ /* Check correct handling of SRR option */
+ if (unlikely(opt->srr)) {
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+ if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev))
+ goto drop;
+
+ if (ip_options_rcv_srr(skb))
+ goto drop;
+ }
+
+ return 0;
+
+inhdr_error:
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
+drop:
+ return -1;
+}
+
/* Fill in the header for fragmented IP packets handled by
* the IPv4 connection tracking code.
*/
@@ -549,7 +616,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
{
struct net_bridge_port *p;
struct net_bridge *br;
- struct iphdr *iph;
__u32 len = nf_bridge_encap_header_len(skb);
if (unlikely(!pskb_may_pull(skb, len)))
@@ -578,28 +644,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
nf_bridge_pull_encap_header_rcsum(skb);
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto inhdr_error;
-
- iph = ip_hdr(skb);
- if (iph->ihl < 5 || iph->version != 4)
- goto inhdr_error;
-
- if (!pskb_may_pull(skb, 4 * iph->ihl))
- goto inhdr_error;
-
- iph = ip_hdr(skb);
- if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
- goto inhdr_error;
-
- len = ntohs(iph->tot_len);
- if (skb->len < len || len < 4 * iph->ihl)
- goto inhdr_error;
-
- pskb_trim_rcsum(skb, len);
-
- /* BUG: Should really parse the IP options here. */
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ if (br_parse_ip_options(skb))
+ /* Drop invalid packet */
+ goto out;
nf_bridge_put(skb->nf_bridge);
if (!nf_bridge_alloc(skb))
@@ -614,8 +661,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
return NF_STOLEN;
-inhdr_error:
-// IP_INC_STATS_BH(IpInHdrErrors);
out:
return NF_DROP;
}
@@ -759,14 +804,19 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
+ int ret;
+
if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
!skb_is_gso(skb)) {
- /* BUG: Should really parse the IP options here. */
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- return ip_fragment(skb, br_dev_queue_push_xmit);
+ if (br_parse_ip_options(skb))
+ /* Drop invalid packet */
+ return NF_DROP;
+ ret = ip_fragment(skb, br_dev_queue_push_xmit);
} else
- return br_dev_queue_push_xmit(skb);
+ ret = br_dev_queue_push_xmit(skb);
+
+ return ret;
}
#else
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
@@ -954,15 +1004,22 @@ int __init br_netfilter_init(void)
{
int ret;
- ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ ret = dst_entries_init(&fake_dst_ops);
if (ret < 0)
return ret;
+
+ ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ if (ret < 0) {
+ dst_entries_destroy(&fake_dst_ops);
+ return ret;
+ }
#ifdef CONFIG_SYSCTL
brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
if (brnf_sysctl_header == NULL) {
printk(KERN_WARNING
"br_netfilter: can't register to sysctl.\n");
nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ dst_entries_destroy(&fake_dst_ops);
return -ENOMEM;
}
#endif
@@ -976,4 +1033,5 @@ void br_netfilter_fini(void)
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(brnf_sysctl_header);
#endif
+ dst_entries_destroy(&fake_dst_ops);
}
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 87b53b3a921..eae67bf0446 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -39,8 +39,6 @@ static bool
ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct ebt_vlan_info *info = par->matchinfo;
- const struct vlan_hdr *fp;
- struct vlan_hdr _frame;
unsigned short TCI; /* Whole TCI, given from parsed frame */
unsigned short id; /* VLAN ID, given from frame TCI */
@@ -48,9 +46,20 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
/* VLAN encapsulated Type/Length field, given from orig frame */
__be16 encap;
- fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
- if (fp == NULL)
- return false;
+ if (vlan_tx_tag_present(skb)) {
+ TCI = vlan_tx_tag_get(skb);
+ encap = skb->protocol;
+ } else {
+ const struct vlan_hdr *fp;
+ struct vlan_hdr _frame;
+
+ fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
+ if (fp == NULL)
+ return false;
+
+ TCI = ntohs(fp->h_vlan_TCI);
+ encap = fp->h_vlan_encapsulated_proto;
+ }
/* Tag Control Information (TCI) consists of the following elements:
* - User_priority. The user_priority field is three bits in length,
@@ -59,10 +68,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
* (CFI) is a single bit flag value. Currently ignored.
* - VLAN Identifier (VID). The VID is encoded as
* an unsigned binary number. */
- TCI = ntohs(fp->h_vlan_TCI);
id = TCI & VLAN_VID_MASK;
prio = (TCI >> 13) & 0x7;
- encap = fp->h_vlan_encapsulated_proto;
/* Checking VLAN Identifier (VID) */
if (GET_BITMASK(EBT_VLAN_ID))
@@ -111,10 +118,10 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
* 0 - The null VLAN ID.
* 1 - The default Port VID (PVID)
* 0x0FFF - Reserved for implementation use.
- * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */
+ * if_vlan.h: VLAN_N_VID 4096. */
if (GET_BITMASK(EBT_VLAN_ID)) {
if (!!info->id) { /* if id!=0 => check vid range */
- if (info->id > VLAN_GROUP_ARRAY_LEN) {
+ if (info->id > VLAN_N_VID) {
pr_debug("id %d is out of range (1-4096)\n",
info->id);
return -EINVAL;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bcc102e3be4..a1dcf83f0d5 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -124,16 +124,23 @@ ebt_dev_check(const char *entry, const struct net_device *device)
#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg))
/* process standard matches */
static inline int
-ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
+ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
const struct net_device *in, const struct net_device *out)
{
+ const struct ethhdr *h = eth_hdr(skb);
+ __be16 ethproto;
int verdict, i;
+ if (vlan_tx_tag_present(skb))
+ ethproto = htons(ETH_P_8021Q);
+ else
+ ethproto = h->h_proto;
+
if (e->bitmask & EBT_802_3) {
- if (FWINV2(ntohs(h->h_proto) >= 1536, EBT_IPROTO))
+ if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO))
return 1;
} else if (!(e->bitmask & EBT_NOPROTO) &&
- FWINV2(e->ethproto != h->h_proto, EBT_IPROTO))
+ FWINV2(e->ethproto != ethproto, EBT_IPROTO))
return 1;
if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
@@ -213,7 +220,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
base = private->entries;
i = 0;
while (i < nentries) {
- if (ebt_basic_match(point, eth_hdr(skb), in, out))
+ if (ebt_basic_match(point, skb, in, out))
goto letscontinue;
if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 0b586e9d137..b99369a055d 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -9,6 +9,8 @@
* and Sakari Ailus <sakari.ailus@nokia.com>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/version.h>
#include <linux/module.h>
#include <linux/kernel.h>
@@ -171,7 +173,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
net = dev_net(dev);
pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
caifd = caif_get(dev);
- if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+ if (!caifd || !caifd->layer.up || !caifd->layer.up->receive)
return NET_RX_DROP;
if (caifd->layer.up->receive(caifd->layer.up, pkt))
@@ -214,7 +216,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
switch (what) {
case NETDEV_REGISTER:
- pr_info("CAIF: %s():register %s\n", __func__, dev->name);
+ netdev_info(dev, "register\n");
caifd = caif_device_alloc(dev);
if (caifd == NULL)
break;
@@ -225,14 +227,13 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
break;
case NETDEV_UP:
- pr_info("CAIF: %s(): up %s\n", __func__, dev->name);
+ netdev_info(dev, "up\n");
caifd = caif_get(dev);
if (caifd == NULL)
break;
caifdev = netdev_priv(dev);
if (atomic_read(&caifd->state) == NETDEV_UP) {
- pr_info("CAIF: %s():%s already up\n",
- __func__, dev->name);
+ netdev_info(dev, "already up\n");
break;
}
atomic_set(&caifd->state, what);
@@ -273,7 +274,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
caifd = caif_get(dev);
if (caifd == NULL)
break;
- pr_info("CAIF: %s():going down %s\n", __func__, dev->name);
+ netdev_info(dev, "going down\n");
if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
atomic_read(&caifd->state) == NETDEV_DOWN)
@@ -295,11 +296,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
caifd = caif_get(dev);
if (caifd == NULL)
break;
- pr_info("CAIF: %s(): down %s\n", __func__, dev->name);
+ netdev_info(dev, "down\n");
if (atomic_read(&caifd->in_use))
- pr_warning("CAIF: %s(): "
- "Unregistering an active CAIF device: %s\n",
- __func__, dev->name);
+ netdev_warn(dev,
+ "Unregistering an active CAIF device\n");
cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
dev_put(dev);
atomic_set(&caifd->state, what);
@@ -307,7 +307,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
case NETDEV_UNREGISTER:
caifd = caif_get(dev);
- pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name);
+ netdev_info(dev, "unregister\n");
atomic_set(&caifd->state, what);
caif_device_destroy(dev);
break;
@@ -391,7 +391,7 @@ static int __init caif_device_init(void)
int result;
cfg = cfcnfg_create();
if (!cfg) {
- pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__);
+ pr_warn("can't create cfcnfg\n");
goto err_cfcnfg_create_failed;
}
result = register_pernet_device(&caif_net_ops);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 4bf28f25f36..2eca2dd0000 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -15,7 +17,6 @@
#include <linux/poll.h>
#include <linux/tcp.h>
#include <linux/uaccess.h>
-#include <linux/mutex.h>
#include <linux/debugfs.h>
#include <linux/caif/caif_socket.h>
#include <asm/atomic.h>
@@ -28,9 +29,6 @@
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(AF_CAIF);
-#define CAIF_DEF_SNDBUF (4096*10)
-#define CAIF_DEF_RCVBUF (4096*100)
-
/*
* CAIF state is re-using the TCP socket states.
* caif_states stored in sk_state reflect the state as reported by
@@ -157,9 +155,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
- trace_printk("CAIF: %s():"
- " sending flow OFF (queue len = %d %d)\n",
- __func__,
+ pr_debug("sending flow OFF (queue len = %d %d)\n",
atomic_read(&cf_sk->sk.sk_rmem_alloc),
sk_rcvbuf_lowwater(cf_sk));
set_rx_flow_off(cf_sk);
@@ -172,9 +168,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return err;
if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
set_rx_flow_off(cf_sk);
- trace_printk("CAIF: %s():"
- " sending flow OFF due to rmem_schedule\n",
- __func__);
+ pr_debug("sending flow OFF due to rmem_schedule\n");
dbfs_atomic_inc(&cnt.num_rx_flow_off);
caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
}
@@ -275,8 +269,7 @@ static void caif_ctrl_cb(struct cflayer *layr,
break;
default:
- pr_debug("CAIF: %s(): Unexpected flow command %d\n",
- __func__, flow);
+ pr_debug("Unexpected flow command %d\n", flow);
}
}
@@ -536,8 +529,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
/* Slight paranoia, probably not needed. */
if (unlikely(loopcnt++ > 1000)) {
- pr_warning("CAIF: %s(): transmit retries failed,"
- " error = %d\n", __func__, ret);
+ pr_warn("transmit retries failed, error = %d\n", ret);
break;
}
@@ -912,8 +904,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
cf_sk->tailroom = tailroom;
cf_sk->maxframe = mtu - (headroom + tailroom);
if (cf_sk->maxframe < 1) {
- pr_warning("CAIF: %s(): CAIF Interface MTU too small (%u)\n",
- __func__, mtu);
+ pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
goto out;
}
@@ -1132,10 +1123,6 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
/* Store the protocol */
sk->sk_protocol = (unsigned char) protocol;
- /* Sendbuf dictates the amount of outbound packets not yet sent */
- sk->sk_sndbuf = CAIF_DEF_SNDBUF;
- sk->sk_rcvbuf = CAIF_DEF_RCVBUF;
-
/*
* Lock in order to try to stop someone from opening the socket
* too early.
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 1c29189b344..41adafd1891 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -3,6 +3,9 @@
* Author: Sjur Brendeland/sjur.brandeland@stericsson.com
* License terms: GNU General Public License (GPL) version 2
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/kernel.h>
#include <linux/stddef.h>
#include <linux/slab.h>
@@ -78,7 +81,7 @@ struct cfcnfg *cfcnfg_create(void)
/* Initiate this layer */
this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
if (!this) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
this->mux = cfmuxl_create();
@@ -106,7 +109,7 @@ struct cfcnfg *cfcnfg_create(void)
layer_set_up(this->ctrl, this);
return this;
out_of_mem:
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
kfree(this->mux);
kfree(this->ctrl);
kfree(this);
@@ -194,7 +197,7 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
caif_assert(adap_layer != NULL);
channel_id = adap_layer->id;
if (adap_layer->dn == NULL || channel_id == 0) {
- pr_err("CAIF: %s():adap_layer->id is 0\n", __func__);
+ pr_err("adap_layer->dn == NULL or adap_layer->id is 0\n");
ret = -ENOTCONN;
goto end;
}
@@ -204,9 +207,8 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
layer_set_up(servl, NULL);
ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
if (servl == NULL) {
- pr_err("CAIF: %s(): PROTOCOL ERROR "
- "- Error removing service_layer Channel_Id(%d)",
- __func__, channel_id);
+ pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)",
+ channel_id);
ret = -EINVAL;
goto end;
}
@@ -216,18 +218,14 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
if (phyinfo == NULL) {
- pr_warning("CAIF: %s(): "
- "No interface to send disconnect to\n",
- __func__);
+ pr_warn("No interface to send disconnect to\n");
ret = -ENODEV;
goto end;
}
if (phyinfo->id != phyid ||
phyinfo->phy_layer->id != phyid ||
phyinfo->frm_layer->id != phyid) {
- pr_err("CAIF: %s(): "
- "Inconsistency in phy registration\n",
- __func__);
+ pr_err("Inconsistency in phy registration\n");
ret = -EINVAL;
goto end;
}
@@ -276,21 +274,20 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
{
struct cflayer *frml;
if (adap_layer == NULL) {
- pr_err("CAIF: %s(): adap_layer is zero", __func__);
+ pr_err("adap_layer is zero\n");
return -EINVAL;
}
if (adap_layer->receive == NULL) {
- pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__);
+ pr_err("adap_layer->receive is NULL\n");
return -EINVAL;
}
if (adap_layer->ctrlcmd == NULL) {
- pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__);
+ pr_err("adap_layer->ctrlcmd == NULL\n");
return -EINVAL;
}
frml = cnfg->phy_layers[param->phyid].frm_layer;
if (frml == NULL) {
- pr_err("CAIF: %s(): Specified PHY type does not exist!",
- __func__);
+ pr_err("Specified PHY type does not exist!\n");
return -ENODEV;
}
caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
@@ -330,9 +327,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
struct net_device *netdev;
if (adapt_layer == NULL) {
- pr_debug("CAIF: %s(): link setup response "
- "but no client exist, send linkdown back\n",
- __func__);
+ pr_debug("link setup response but no client exist, send linkdown back\n");
cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
return;
}
@@ -374,13 +369,11 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
break;
default:
- pr_err("CAIF: %s(): Protocol error. "
- "Link setup response - unknown channel type\n",
- __func__);
+ pr_err("Protocol error. Link setup response - unknown channel type\n");
return;
}
if (!servicel) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return;
}
layer_set_dn(servicel, cnfg->mux);
@@ -418,7 +411,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
}
}
if (*phyid == 0) {
- pr_err("CAIF: %s(): No Available PHY ID\n", __func__);
+ pr_err("No Available PHY ID\n");
return;
}
@@ -427,7 +420,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
phy_driver =
cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
if (!phy_driver) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return;
}
@@ -436,7 +429,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
phy_driver = NULL;
break;
default:
- pr_err("CAIF: %s(): %d", __func__, phy_type);
+ pr_err("%d\n", phy_type);
return;
break;
}
@@ -455,7 +448,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
phy_layer->type = phy_type;
frml = cffrml_create(*phyid, fcs);
if (!frml) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return;
}
cnfg->phy_layers[*phyid].frm_layer = frml;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 563145fdc4c..08f267a109a 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/stddef.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -36,7 +38,7 @@ struct cflayer *cfctrl_create(void)
struct cfctrl *this =
kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
if (!this) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -132,9 +134,7 @@ struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
if (cfctrl_req_eq(req, p)) {
if (p != first)
- pr_warning("CAIF: %s(): Requests are not "
- "received in order\n",
- __func__);
+ pr_warn("Requests are not received in order\n");
atomic_set(&ctrl->rsp_seq_no,
p->sequence_no);
@@ -177,7 +177,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
int ret;
struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
if (!pkt) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return;
}
caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -189,8 +189,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
ret =
cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
if (ret < 0) {
- pr_err("CAIF: %s(): Could not transmit enum message\n",
- __func__);
+ pr_err("Could not transmit enum message\n");
cfpkt_destroy(pkt);
}
}
@@ -208,7 +207,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
char utility_name[16];
struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
if (!pkt) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return -ENOMEM;
}
cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
@@ -253,13 +252,13 @@ int cfctrl_linkup_request(struct cflayer *layer,
param->u.utility.paramlen);
break;
default:
- pr_warning("CAIF: %s():Request setup of bad link type = %d\n",
- __func__, param->linktype);
+ pr_warn("Request setup of bad link type = %d\n",
+ param->linktype);
return -EINVAL;
}
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return -ENOMEM;
}
req->client_layer = user_layer;
@@ -276,8 +275,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
ret =
cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
if (ret < 0) {
- pr_err("CAIF: %s(): Could not transmit linksetup request\n",
- __func__);
+ pr_err("Could not transmit linksetup request\n");
cfpkt_destroy(pkt);
return -ENODEV;
}
@@ -291,7 +289,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
struct cfctrl *cfctrl = container_obj(layer);
struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
if (!pkt) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return -ENOMEM;
}
cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
@@ -300,8 +298,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
ret =
cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
if (ret < 0) {
- pr_err("CAIF: %s(): Could not transmit link-down request\n",
- __func__);
+ pr_err("Could not transmit link-down request\n");
cfpkt_destroy(pkt);
}
return ret;
@@ -313,7 +310,7 @@ void cfctrl_sleep_req(struct cflayer *layer)
struct cfctrl *cfctrl = container_obj(layer);
struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
if (!pkt) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return;
}
cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
@@ -330,7 +327,7 @@ void cfctrl_wake_req(struct cflayer *layer)
struct cfctrl *cfctrl = container_obj(layer);
struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
if (!pkt) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return;
}
cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
@@ -347,7 +344,7 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
struct cfctrl *cfctrl = container_obj(layer);
struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
if (!pkt) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return;
}
cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
@@ -364,12 +361,11 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
struct cfctrl_request_info *p, *tmp;
struct cfctrl *ctrl = container_obj(layr);
spin_lock(&ctrl->info_list_lock);
- pr_warning("CAIF: %s(): enter\n", __func__);
+ pr_warn("enter\n");
list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
if (p->client_layer == adap_layer) {
- pr_warning("CAIF: %s(): cancel req :%d\n", __func__,
- p->sequence_no);
+ pr_warn("cancel req :%d\n", p->sequence_no);
list_del(&p->list);
kfree(p);
}
@@ -520,9 +516,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
cfpkt_extr_head(pkt, &param, len);
break;
default:
- pr_warning("CAIF: %s(): Request setup "
- "- invalid link type (%d)",
- __func__, serv);
+ pr_warn("Request setup - invalid link type (%d)\n",
+ serv);
goto error;
}
@@ -532,9 +527,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
cfpkt_erroneous(pkt)) {
- pr_err("CAIF: %s(): Invalid O/E bit or parse "
- "error on CAIF control channel",
- __func__);
+ pr_err("Invalid O/E bit or parse error on CAIF control channel\n");
cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
0,
req ? req->client_layer
@@ -556,8 +549,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
break;
case CFCTRL_CMD_LINK_ERR:
- pr_err("CAIF: %s(): Frame Error Indication received\n",
- __func__);
+ pr_err("Frame Error Indication received\n");
cfctrl->res.linkerror_ind();
break;
case CFCTRL_CMD_ENUM:
@@ -576,7 +568,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
cfctrl->res.radioset_rsp();
break;
default:
- pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__);
+ pr_err("Unrecognized Control Frame\n");
goto error;
break;
}
@@ -595,8 +587,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
case CAIF_CTRLCMD_FLOW_OFF_IND:
spin_lock(&this->info_list_lock);
if (!list_empty(&this->list)) {
- pr_debug("CAIF: %s(): Received flow off in "
- "control layer", __func__);
+ pr_debug("Received flow off in control layer\n");
}
spin_unlock(&this->info_list_lock);
break;
@@ -620,7 +611,7 @@ static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
if (!ctrl->loop_linkused[linkid])
goto found;
spin_unlock(&ctrl->loop_linkid_lock);
- pr_err("CAIF: %s(): Out of link-ids\n", __func__);
+ pr_err("Out of link-ids\n");
return -EINVAL;
found:
if (!ctrl->loop_linkused[linkid])
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 676648cac8d..496fda9ac66 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/stddef.h>
#include <linux/slab.h>
#include <net/caif/caif_layer.h>
@@ -17,7 +19,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
{
struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
if (!dbg) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
caif_assert(offsetof(struct cfsrvl, layer) == 0);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index ed9d53aff28..d3ed264ad6c 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/stddef.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
{
struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
if (!dgm) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -49,14 +51,14 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
caif_assert(layr->ctrlcmd != NULL);
if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
- pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+ pr_err("Packet is erroneous!\n");
cfpkt_destroy(pkt);
return -EPROTO;
}
if ((cmd & DGM_CMD_BIT) == 0) {
if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
- pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+ pr_err("Packet is erroneous!\n");
cfpkt_destroy(pkt);
return -EPROTO;
}
@@ -75,8 +77,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
return 0;
default:
cfpkt_destroy(pkt);
- pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n",
- __func__, cmd, cmd);
+ pr_info("Unknown datagram control %d (0x%x)\n", cmd, cmd);
return -EPROTO;
}
}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index e86a4ca3b21..a445043931a 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -6,6 +6,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/stddef.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -32,7 +34,7 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
{
struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
if (!this) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
caif_assert(offsetof(struct cffrml, layer) == 0);
@@ -83,7 +85,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
if (cfpkt_setlen(pkt, len) < 0) {
++cffrml_rcv_error;
- pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len);
+ pr_err("Framing length error (%d)\n", len);
cfpkt_destroy(pkt);
return -EPROTO;
}
@@ -99,14 +101,14 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
cfpkt_add_trail(pkt, &tmp, 2);
++cffrml_rcv_error;
++cffrml_rcv_checsum_error;
- pr_info("CAIF: %s(): Frame checksum error "
- "(0x%x != 0x%x)\n", __func__, hdrchks, pktchks);
+ pr_info("Frame checksum error (0x%x != 0x%x)\n",
+ hdrchks, pktchks);
return -EILSEQ;
}
}
if (cfpkt_erroneous(pkt)) {
++cffrml_rcv_error;
- pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+ pr_err("Packet is erroneous!\n");
cfpkt_destroy(pkt);
return -EPROTO;
}
@@ -132,7 +134,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
cfpkt_add_head(pkt, &tmp, 2);
cfpkt_info(pkt)->hdr_len += 2;
if (cfpkt_erroneous(pkt)) {
- pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+ pr_err("Packet is erroneous!\n");
return -EPROTO;
}
ret = layr->dn->transmit(layr->dn, pkt);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 80c8d332b25..46f34b2e047 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -3,6 +3,9 @@
* Author: Sjur Brendeland/sjur.brandeland@stericsson.com
* License terms: GNU General Public License (GPL) version 2
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/stddef.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -190,7 +193,7 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
u8 id;
struct cflayer *up;
if (cfpkt_extr_head(pkt, &id, 1) < 0) {
- pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__);
+ pr_err("erroneous Caif Packet\n");
cfpkt_destroy(pkt);
return -EPROTO;
}
@@ -199,8 +202,8 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
up = get_up(muxl, id);
spin_unlock(&muxl->receive_lock);
if (up == NULL) {
- pr_info("CAIF: %s():Received data on unknown link ID = %d "
- "(0x%x) up == NULL", __func__, id, id);
+ pr_info("Received data on unknown link ID = %d (0x%x) up == NULL",
+ id, id);
cfpkt_destroy(pkt);
/*
* Don't return ERROR, since modem misbehaves and sends out
@@ -223,9 +226,8 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
struct caif_payload_info *info = cfpkt_info(pkt);
dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
if (dn == NULL) {
- pr_warning("CAIF: %s(): Send data on unknown phy "
- "ID = %d (0x%x)\n",
- __func__, info->dev_info->id, info->dev_info->id);
+ pr_warn("Send data on unknown phy ID = %d (0x%x)\n",
+ info->dev_info->id, info->dev_info->id);
return -ENOTCONN;
}
info->hdr_len += 1;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index c49a6695793..d7e865e2ff6 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/hardirq.h>
@@ -12,11 +14,12 @@
#define PKT_PREFIX 48
#define PKT_POSTFIX 2
#define PKT_LEN_WHEN_EXTENDING 128
-#define PKT_ERROR(pkt, errmsg) do { \
- cfpkt_priv(pkt)->erronous = true; \
- skb_reset_tail_pointer(&pkt->skb); \
- pr_warning("CAIF: " errmsg);\
- } while (0)
+#define PKT_ERROR(pkt, errmsg) \
+do { \
+ cfpkt_priv(pkt)->erronous = true; \
+ skb_reset_tail_pointer(&pkt->skb); \
+ pr_warn(errmsg); \
+} while (0)
struct cfpktq {
struct sk_buff_head head;
@@ -130,13 +133,13 @@ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
return -EPROTO;
if (unlikely(len > skb->len)) {
- PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n");
+ PKT_ERROR(pkt, "read beyond end of packet\n");
return -EPROTO;
}
if (unlikely(len > skb_headlen(skb))) {
if (unlikely(skb_linearize(skb) != 0)) {
- PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n");
+ PKT_ERROR(pkt, "linearize failed\n");
return -EPROTO;
}
}
@@ -156,11 +159,11 @@ int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
return -EPROTO;
if (unlikely(skb_linearize(skb) != 0)) {
- PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n");
+ PKT_ERROR(pkt, "linearize failed\n");
return -EPROTO;
}
if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
- PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n");
+ PKT_ERROR(pkt, "read beyond end of packet\n");
return -EPROTO;
}
from = skb_tail_pointer(skb) - len;
@@ -202,7 +205,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
/* Make sure data is writable */
if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
- PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n");
+ PKT_ERROR(pkt, "cow failed\n");
return -EPROTO;
}
/*
@@ -211,8 +214,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
* lengths of the top SKB.
*/
if (lastskb != skb) {
- pr_warning("CAIF: %s(): Packet is non-linear\n",
- __func__);
+ pr_warn("Packet is non-linear\n");
skb->len += len;
skb->data_len += len;
}
@@ -242,14 +244,14 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
if (unlikely(is_erronous(pkt)))
return -EPROTO;
if (unlikely(skb_headroom(skb) < len)) {
- PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n");
+ PKT_ERROR(pkt, "no headroom\n");
return -EPROTO;
}
/* Make sure data is writable */
ret = skb_cow_data(skb, 0, &lastskb);
if (unlikely(ret < 0)) {
- PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n");
+ PKT_ERROR(pkt, "cow failed\n");
return ret;
}
@@ -283,7 +285,7 @@ inline u16 cfpkt_iterate(struct cfpkt *pkt,
if (unlikely(is_erronous(pkt)))
return -EPROTO;
if (unlikely(skb_linearize(&pkt->skb) != 0)) {
- PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n");
+ PKT_ERROR(pkt, "linearize failed\n");
return -EPROTO;
}
return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
@@ -309,7 +311,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
/* Need to expand SKB */
if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
- PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n");
+ PKT_ERROR(pkt, "skb_pad_trail failed\n");
return cfpkt_getlen(pkt);
}
@@ -380,8 +382,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
return NULL;
if (skb->data + pos > skb_tail_pointer(skb)) {
- PKT_ERROR(pkt,
- "cfpkt_split: trying to split beyond end of packet");
+ PKT_ERROR(pkt, "trying to split beyond end of packet\n");
return NULL;
}
@@ -455,17 +456,17 @@ int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
return -EPROTO;
/* Make sure SKB is writable */
if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
- PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n");
+ PKT_ERROR(pkt, "skb_cow_data failed\n");
return -EPROTO;
}
if (unlikely(skb_linearize(skb) != 0)) {
- PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n");
+ PKT_ERROR(pkt, "linearize failed\n");
return -EPROTO;
}
if (unlikely(skb_tailroom(skb) < buflen)) {
- PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n");
+ PKT_ERROR(pkt, "buffer too short - failed\n");
return -EPROTO;
}
@@ -483,14 +484,13 @@ int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
return -EPROTO;
if (unlikely(buflen > skb->len)) {
- PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large "
- "- failed\n");
+ PKT_ERROR(pkt, "buflen too large - failed\n");
return -EPROTO;
}
if (unlikely(buflen > skb_headlen(skb))) {
if (unlikely(skb_linearize(skb) != 0)) {
- PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n");
+ PKT_ERROR(pkt, "linearize failed\n");
return -EPROTO;
}
}
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 9a699242d10..bde8481e8d2 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/stddef.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -48,7 +50,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
if (!this) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
@@ -178,9 +180,7 @@ out:
cfpkt_destroy(rfml->incomplete_frm);
rfml->incomplete_frm = NULL;
- pr_info("CAIF: %s(): "
- "Connection error %d triggered on RFM link\n",
- __func__, err);
+ pr_info("Connection error %d triggered on RFM link\n", err);
/* Trigger connection error upon failure.*/
layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
@@ -280,9 +280,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
out:
if (err != 0) {
- pr_info("CAIF: %s(): "
- "Connection error %d triggered on RFM link\n",
- __func__, err);
+ pr_info("Connection error %d triggered on RFM link\n", err);
/* Trigger connection error upon failure.*/
layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index a11fbd68a13..9297f7dea9d 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/stddef.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -34,7 +36,7 @@ struct cflayer *cfserl_create(int type, int instance, bool use_stx)
{
struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
if (!this) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
caif_assert(offsetof(struct cfserl, layer) == 0);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index f40939a9121..ab5e542526b 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/errno.h>
@@ -79,8 +81,7 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
layr->up->ctrlcmd(layr->up, ctrl, phyid);
break;
default:
- pr_warning("CAIF: %s(): "
- "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
+ pr_warn("Unexpected ctrl in cfsrvl (%d)\n", ctrl);
/* We have both modem and phy flow on, send flow on */
layr->up->ctrlcmd(layr->up, ctrl, phyid);
service->phy_flow_on = true;
@@ -107,14 +108,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
u8 flow_on = SRVL_FLOW_ON;
pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
if (!pkt) {
- pr_warning("CAIF: %s(): Out of memory\n",
- __func__);
+ pr_warn("Out of memory\n");
return -ENOMEM;
}
if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
- pr_err("CAIF: %s(): Packet is erroneous!\n",
- __func__);
+ pr_err("Packet is erroneous!\n");
cfpkt_destroy(pkt);
return -EPROTO;
}
@@ -131,14 +130,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
u8 flow_off = SRVL_FLOW_OFF;
pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
if (!pkt) {
- pr_warning("CAIF: %s(): Out of memory\n",
- __func__);
+ pr_warn("Out of memory\n");
return -ENOMEM;
}
if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
- pr_err("CAIF: %s(): Packet is erroneous!\n",
- __func__);
+ pr_err("Packet is erroneous!\n");
cfpkt_destroy(pkt);
return -EPROTO;
}
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 02795aff57a..efad410e4c8 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
{
struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
if (!util) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
caif_assert(layr->up->receive != NULL);
caif_assert(layr->up->ctrlcmd != NULL);
if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
- pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+ pr_err("Packet is erroneous!\n");
cfpkt_destroy(pkt);
return -EPROTO;
}
@@ -64,16 +66,14 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
cfpkt_destroy(pkt);
return 0;
case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */
- pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n",
- __func__);
+ pr_err("REMOTE SHUTDOWN REQUEST RECEIVED\n");
layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
service->open = false;
cfpkt_destroy(pkt);
return 0;
default:
cfpkt_destroy(pkt);
- pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n",
- __func__, cmd, cmd);
+ pr_warn("Unknown service control %d (0x%x)\n", cmd, cmd);
return -EPROTO;
}
}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 77cc09faac9..3b425b189a9 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/stddef.h>
#include <linux/slab.h>
#include <net/caif/caif_layer.h>
@@ -25,7 +27,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
{
struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
if (!vei) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
- pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+ pr_err("Packet is erroneous!\n");
cfpkt_destroy(pkt);
return -EPROTO;
}
@@ -67,8 +69,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
cfpkt_destroy(pkt);
return 0;
default: /* SET RS232 PIN */
- pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n",
- __func__, cmd, cmd);
+ pr_warn("Unknown VEI control packet %d (0x%x)!\n", cmd, cmd);
cfpkt_destroy(pkt);
return -EPROTO;
}
@@ -86,7 +87,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
caif_assert(layr->dn->transmit != NULL);
if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
- pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+ pr_err("Packet is erroneous!\n");
return -EPROTO;
}
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index ada6ee2d48f..bf6fef2a0ef 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -4,6 +4,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -21,7 +23,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
{
struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
if (!vid) {
- pr_warning("CAIF: %s(): Out of memory\n", __func__);
+ pr_warn("Out of memory\n");
return NULL;
}
caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -38,7 +40,7 @@ static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
{
u32 videoheader;
if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
- pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+ pr_err("Packet is erroneous!\n");
cfpkt_destroy(pkt);
return -EPROTO;
}
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 4293e190ec5..84a422c9894 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -5,6 +5,8 @@
* License terms: GNU General Public License (GPL) version 2
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
+
#include <linux/version.h>
#include <linux/fs.h>
#include <linux/init.h>
@@ -28,9 +30,6 @@
#define CONNECT_TIMEOUT (5 * HZ)
#define CAIF_NET_DEFAULT_QUEUE_LEN 500
-#undef pr_debug
-#define pr_debug pr_warning
-
/*This list is protected by the rtnl lock. */
static LIST_HEAD(chnl_net_list);
@@ -142,8 +141,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
int phyid)
{
struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
- pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n",
- __func__,
+ pr_debug("NET flowctrl func called flow: %s\n",
flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
@@ -196,12 +194,12 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
priv = netdev_priv(dev);
if (skb->len > priv->netdev->mtu) {
- pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__);
+ pr_warn("Size of skb exceeded MTU\n");
return -ENOSPC;
}
if (!priv->flowenabled) {
- pr_debug("CAIF: %s(): dropping packets flow off\n", __func__);
+ pr_debug("dropping packets flow off\n");
return NETDEV_TX_BUSY;
}
@@ -237,7 +235,7 @@ static int chnl_net_open(struct net_device *dev)
ASSERT_RTNL();
priv = netdev_priv(dev);
if (!priv) {
- pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__);
+ pr_debug("chnl_net_open: no priv\n");
return -ENODEV;
}
@@ -246,18 +244,17 @@ static int chnl_net_open(struct net_device *dev)
result = caif_connect_client(&priv->conn_req, &priv->chnl,
&llifindex, &headroom, &tailroom);
if (result != 0) {
- pr_debug("CAIF: %s(): err: "
- "Unable to register and open device,"
- " Err:%d\n",
- __func__,
- result);
+ pr_debug("err: "
+ "Unable to register and open device,"
+ " Err:%d\n",
+ result);
goto error;
}
lldev = dev_get_by_index(dev_net(dev), llifindex);
if (lldev == NULL) {
- pr_debug("CAIF: %s(): no interface?\n", __func__);
+ pr_debug("no interface?\n");
result = -ENODEV;
goto error;
}
@@ -279,9 +276,7 @@ static int chnl_net_open(struct net_device *dev)
dev_put(lldev);
if (mtu < 100) {
- pr_warning("CAIF: %s(): "
- "CAIF Interface MTU too small (%d)\n",
- __func__, mtu);
+ pr_warn("CAIF Interface MTU too small (%d)\n", mtu);
result = -ENODEV;
goto error;
}
@@ -296,33 +291,32 @@ static int chnl_net_open(struct net_device *dev)
rtnl_lock();
if (result == -ERESTARTSYS) {
- pr_debug("CAIF: %s(): wait_event_interruptible"
- " woken by a signal\n", __func__);
+ pr_debug("wait_event_interruptible woken by a signal\n");
result = -ERESTARTSYS;
goto error;
}
if (result == 0) {
- pr_debug("CAIF: %s(): connect timeout\n", __func__);
+ pr_debug("connect timeout\n");
caif_disconnect_client(&priv->chnl);
priv->state = CAIF_DISCONNECTED;
- pr_debug("CAIF: %s(): state disconnected\n", __func__);
+ pr_debug("state disconnected\n");
result = -ETIMEDOUT;
goto error;
}
if (priv->state != CAIF_CONNECTED) {
- pr_debug("CAIF: %s(): connect failed\n", __func__);
+ pr_debug("connect failed\n");
result = -ECONNREFUSED;
goto error;
}
- pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__);
+ pr_debug("CAIF Netdevice connected\n");
return 0;
error:
caif_disconnect_client(&priv->chnl);
priv->state = CAIF_DISCONNECTED;
- pr_debug("CAIF: %s(): state disconnected\n", __func__);
+ pr_debug("state disconnected\n");
return result;
}
@@ -413,7 +407,7 @@ static void caif_netlink_parms(struct nlattr *data[],
struct caif_connect_request *conn_req)
{
if (!data) {
- pr_warning("CAIF: %s: no params data found\n", __func__);
+ pr_warn("no params data found\n");
return;
}
if (data[IFLA_CAIF_IPV4_CONNID])
@@ -442,8 +436,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
ret = register_netdevice(dev);
if (ret)
- pr_warning("CAIF: %s(): device rtml registration failed\n",
- __func__);
+ pr_warn("device rtml registration failed\n");
return ret;
}
diff --git a/net/can/raw.c b/net/can/raw.c
index a10e3338f08..e88f610fdb7 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -90,23 +90,39 @@ struct raw_sock {
can_err_mask_t err_mask;
};
+/*
+ * Return pointer to store the extra msg flags for raw_recvmsg().
+ * We use the space of one unsigned int beyond the 'struct sockaddr_can'
+ * in skb->cb.
+ */
+static inline unsigned int *raw_flags(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(struct sockaddr_can) +
+ sizeof(unsigned int)));
+
+ /* return pointer after struct sockaddr_can */
+ return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
+}
+
static inline struct raw_sock *raw_sk(const struct sock *sk)
{
return (struct raw_sock *)sk;
}
-static void raw_rcv(struct sk_buff *skb, void *data)
+static void raw_rcv(struct sk_buff *oskb, void *data)
{
struct sock *sk = (struct sock *)data;
struct raw_sock *ro = raw_sk(sk);
struct sockaddr_can *addr;
+ struct sk_buff *skb;
+ unsigned int *pflags;
/* check the received tx sock reference */
- if (!ro->recv_own_msgs && skb->sk == sk)
+ if (!ro->recv_own_msgs && oskb->sk == sk)
return;
/* clone the given skb to be able to enqueue it into the rcv queue */
- skb = skb_clone(skb, GFP_ATOMIC);
+ skb = skb_clone(oskb, GFP_ATOMIC);
if (!skb)
return;
@@ -123,6 +139,14 @@ static void raw_rcv(struct sk_buff *skb, void *data)
addr->can_family = AF_CAN;
addr->can_ifindex = skb->dev->ifindex;
+ /* add CAN specific message flags for raw_recvmsg() */
+ pflags = raw_flags(skb);
+ *pflags = 0;
+ if (oskb->sk)
+ *pflags |= MSG_DONTROUTE;
+ if (oskb->sk == sk)
+ *pflags |= MSG_CONFIRM;
+
if (sock_queue_rcv_skb(sk, skb) < 0)
kfree_skb(skb);
}
@@ -647,12 +671,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
if (err < 0)
goto free_skb;
- err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+ err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (err < 0)
goto free_skb;
/* to be able to check the received tx sock reference in raw_rcv() */
- skb_tx(skb)->prevent_sk_orphan = 1;
+ skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
skb->dev = dev;
skb->sk = sk;
@@ -707,6 +731,9 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
}
+ /* assign the flags that have been recorded in raw_rcv() */
+ msg->msg_flags |= *(raw_flags(skb));
+
skb_free_datagram(sk, skb);
return size;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 282806ba7a5..cd1e039c875 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -747,13 +747,12 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP;
+ mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- (sk->sk_shutdown & RCV_SHUTDOWN))
+ if (!skb_queue_empty(&sk->sk_receive_queue))
mask |= POLLIN | POLLRDNORM;
/* Connection-based need to check for termination and startup */
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ec85e27bee..78b5a89b0f4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@
#include <trace/events/net.h>
#include <trace/events/skb.h>
#include <linux/pci.h>
+#include <linux/inetdevice.h>
#include "net-sysfs.h"
@@ -373,6 +374,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
* --ANK (980803)
*/
+static inline struct list_head *ptype_head(const struct packet_type *pt)
+{
+ if (pt->type == htons(ETH_P_ALL))
+ return &ptype_all;
+ else
+ return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+}
+
/**
* dev_add_pack - add packet handler
* @pt: packet type declaration
@@ -388,16 +397,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
void dev_add_pack(struct packet_type *pt)
{
- int hash;
+ struct list_head *head = ptype_head(pt);
- spin_lock_bh(&ptype_lock);
- if (pt->type == htons(ETH_P_ALL))
- list_add_rcu(&pt->list, &ptype_all);
- else {
- hash = ntohs(pt->type) & PTYPE_HASH_MASK;
- list_add_rcu(&pt->list, &ptype_base[hash]);
- }
- spin_unlock_bh(&ptype_lock);
+ spin_lock(&ptype_lock);
+ list_add_rcu(&pt->list, head);
+ spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(dev_add_pack);
@@ -416,15 +420,10 @@ EXPORT_SYMBOL(dev_add_pack);
*/
void __dev_remove_pack(struct packet_type *pt)
{
- struct list_head *head;
+ struct list_head *head = ptype_head(pt);
struct packet_type *pt1;
- spin_lock_bh(&ptype_lock);
-
- if (pt->type == htons(ETH_P_ALL))
- head = &ptype_all;
- else
- head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+ spin_lock(&ptype_lock);
list_for_each_entry(pt1, head, list) {
if (pt == pt1) {
@@ -435,7 +434,7 @@ void __dev_remove_pack(struct packet_type *pt)
printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
out:
- spin_unlock_bh(&ptype_lock);
+ spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(__dev_remove_pack);
@@ -1486,8 +1485,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb_orphan(skb);
nf_reset(skb);
- if (!(dev->flags & IFF_UP) ||
- (skb->len > (dev->mtu + dev->hard_header_len))) {
+ if (unlikely(!(dev->flags & IFF_UP) ||
+ (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
+ atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -1555,21 +1555,56 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
* Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
* greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
*/
-void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
{
- unsigned int real_num = dev->real_num_tx_queues;
+ if (txq < 1 || txq > dev->num_tx_queues)
+ return -EINVAL;
- if (unlikely(txq > dev->num_tx_queues))
- ;
- else if (txq > real_num)
- dev->real_num_tx_queues = txq;
- else if (txq < real_num) {
- dev->real_num_tx_queues = txq;
- qdisc_reset_all_tx_gt(dev, txq);
+ if (dev->reg_state == NETREG_REGISTERED) {
+ ASSERT_RTNL();
+
+ if (txq < dev->real_num_tx_queues)
+ qdisc_reset_all_tx_gt(dev, txq);
}
+
+ dev->real_num_tx_queues = txq;
+ return 0;
}
EXPORT_SYMBOL(netif_set_real_num_tx_queues);
+#ifdef CONFIG_RPS
+/**
+ * netif_set_real_num_rx_queues - set actual number of RX queues used
+ * @dev: Network device
+ * @rxq: Actual number of RX queues
+ *
+ * This must be called either with the rtnl_lock held or before
+ * registration of the net device. Returns 0 on success, or a
+ * negative error code. If called before registration, it always
+ * succeeds.
+ */
+int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
+{
+ int rc;
+
+ if (rxq < 1 || rxq > dev->num_rx_queues)
+ return -EINVAL;
+
+ if (dev->reg_state == NETREG_REGISTERED) {
+ ASSERT_RTNL();
+
+ rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
+ rxq);
+ if (rc)
+ return rc;
+ }
+
+ dev->real_num_rx_queues = rxq;
+ return 0;
+}
+EXPORT_SYMBOL(netif_set_real_num_rx_queues);
+#endif
+
static inline void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
@@ -1661,7 +1696,12 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
{
- if (can_checksum_protocol(dev->features, skb->protocol))
+ int features = dev->features;
+
+ if (vlan_tx_tag_present(skb))
+ features &= dev->vlan_features;
+
+ if (can_checksum_protocol(features, skb->protocol))
return true;
if (skb->protocol == htons(ETH_P_8021Q)) {
@@ -1760,6 +1800,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
__be16 type = skb->protocol;
int err;
+ if (type == htons(ETH_P_8021Q)) {
+ struct vlan_ethhdr *veh;
+
+ if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+ return ERR_PTR(-EINVAL);
+
+ veh = (struct vlan_ethhdr *)skb->data;
+ type = veh->h_vlan_encapsulated_proto;
+ }
+
skb_reset_mac_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
__skb_pull(skb, skb->mac_len);
@@ -1904,14 +1954,14 @@ static int dev_gso_segment(struct sk_buff *skb)
/*
* Try to orphan skb early, right before transmission by the device.
- * We cannot orphan skb if tx timestamp is requested, since
- * drivers need to call skb_tstamp_tx() to send the timestamp.
+ * We cannot orphan skb if tx timestamp is requested or the sk-reference
+ * is needed on driver level for other reasons, e.g. see net/can/raw.c
*/
static inline void skb_orphan_try(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
- if (sk && !skb_tx(skb)->flags) {
+ if (sk && !skb_shinfo(skb)->tx_flags) {
/* skb_tx_hash() wont be able to get sk.
* We copy sk_hash into skb->rxhash
*/
@@ -1931,9 +1981,14 @@ static inline void skb_orphan_try(struct sk_buff *skb)
static inline int skb_needs_linearize(struct sk_buff *skb,
struct net_device *dev)
{
+ int features = dev->features;
+
+ if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
+ features &= dev->vlan_features;
+
return skb_is_nonlinear(skb) &&
- ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
- (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+ ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
+ (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
illegal_highdma(dev, skb))));
}
@@ -1956,6 +2011,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb_orphan_try(skb);
+ if (vlan_tx_tag_present(skb) &&
+ !(dev->features & NETIF_F_HW_VLAN_TX)) {
+ skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ if (unlikely(!skb))
+ goto out;
+
+ skb->vlan_tci = 0;
+ }
+
if (netif_needs_gso(dev, skb)) {
if (unlikely(dev_gso_segment(skb)))
goto out_kfree_skb;
@@ -2019,6 +2083,7 @@ out_kfree_gso_skb:
skb->destructor = DEV_GSO_CB(skb)->destructor;
out_kfree_skb:
kfree_skb(skb);
+out:
return rc;
}
@@ -2147,6 +2212,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
return rc;
}
+static DEFINE_PER_CPU(int, xmit_recursion);
+#define RECURSION_LIMIT 3
+
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
@@ -2213,10 +2281,15 @@ int dev_queue_xmit(struct sk_buff *skb)
if (txq->xmit_lock_owner != cpu) {
+ if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+ goto recursion_alert;
+
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_tx_queue_stopped(txq)) {
+ __this_cpu_inc(xmit_recursion);
rc = dev_hard_start_xmit(skb, dev, txq);
+ __this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
goto out;
@@ -2228,7 +2301,9 @@ int dev_queue_xmit(struct sk_buff *skb)
"queue packet!\n", dev->name);
} else {
/* Recursion is detected! It is possible,
- * unfortunately */
+ * unfortunately
+ */
+recursion_alert:
if (net_ratelimit())
printk(KERN_CRIT "Dead loop on virtual device "
"%s, fix it urgently!\n", dev->name);
@@ -2264,69 +2339,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
-#ifdef CONFIG_RPS
-
-/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
-EXPORT_SYMBOL(rps_sock_flow_table);
-
/*
- * get_rps_cpu is called from netif_receive_skb and returns the target
- * CPU from the RPS map of the receiving queue for a given skb.
- * rcu_read_lock must be held on entry.
+ * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers. Returns a non-zero hash number on success
+ * and 0 on failure.
*/
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
- struct rps_dev_flow **rflowp)
+__u32 __skb_get_rxhash(struct sk_buff *skb)
{
+ int nhoff, hash = 0, poff;
struct ipv6hdr *ip6;
struct iphdr *ip;
- struct netdev_rx_queue *rxqueue;
- struct rps_map *map;
- struct rps_dev_flow_table *flow_table;
- struct rps_sock_flow_table *sock_flow_table;
- int cpu = -1;
u8 ip_proto;
- u16 tcpu;
u32 addr1, addr2, ihl;
union {
u32 v32;
u16 v16[2];
} ports;
- if (skb_rx_queue_recorded(skb)) {
- u16 index = skb_get_rx_queue(skb);
- if (unlikely(index >= dev->num_rx_queues)) {
- WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
- "on queue %u, but number of RX queues is %u\n",
- dev->name, index, dev->num_rx_queues);
- goto done;
- }
- rxqueue = dev->_rx + index;
- } else
- rxqueue = dev->_rx;
-
- if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
- goto done;
-
- if (skb->rxhash)
- goto got_hash; /* Skip hash computation on packet header */
+ nhoff = skb_network_offset(skb);
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
- if (!pskb_may_pull(skb, sizeof(*ip)))
+ if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
goto done;
- ip = (struct iphdr *) skb->data;
- ip_proto = ip->protocol;
+ ip = (struct iphdr *) (skb->data + nhoff);
+ if (ip->frag_off & htons(IP_MF | IP_OFFSET))
+ ip_proto = 0;
+ else
+ ip_proto = ip->protocol;
addr1 = (__force u32) ip->saddr;
addr2 = (__force u32) ip->daddr;
ihl = ip->ihl;
break;
case __constant_htons(ETH_P_IPV6):
- if (!pskb_may_pull(skb, sizeof(*ip6)))
+ if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
goto done;
- ip6 = (struct ipv6hdr *) skb->data;
+ ip6 = (struct ipv6hdr *) (skb->data + nhoff);
ip_proto = ip6->nexthdr;
addr1 = (__force u32) ip6->saddr.s6_addr32[3];
addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2335,33 +2385,81 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
default:
goto done;
}
- switch (ip_proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_DCCP:
- case IPPROTO_ESP:
- case IPPROTO_AH:
- case IPPROTO_SCTP:
- case IPPROTO_UDPLITE:
- if (pskb_may_pull(skb, (ihl * 4) + 4)) {
- ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
+
+ ports.v32 = 0;
+ poff = proto_ports_offset(ip_proto);
+ if (poff >= 0) {
+ nhoff += ihl * 4 + poff;
+ if (pskb_may_pull(skb, nhoff + 4)) {
+ ports.v32 = * (__force u32 *) (skb->data + nhoff);
if (ports.v16[1] < ports.v16[0])
swap(ports.v16[0], ports.v16[1]);
- break;
}
- default:
- ports.v32 = 0;
- break;
}
/* get a consistent hash (same value on both flow directions) */
if (addr2 < addr1)
swap(addr1, addr2);
- skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
- if (!skb->rxhash)
- skb->rxhash = 1;
-got_hash:
+ hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+ if (!hash)
+ hash = 1;
+
+done:
+ return hash;
+}
+EXPORT_SYMBOL(__skb_get_rxhash);
+
+#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+ struct rps_dev_flow **rflowp)
+{
+ struct netdev_rx_queue *rxqueue;
+ struct rps_map *map = NULL;
+ struct rps_dev_flow_table *flow_table;
+ struct rps_sock_flow_table *sock_flow_table;
+ int cpu = -1;
+ u16 tcpu;
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+ if (unlikely(index >= dev->real_num_rx_queues)) {
+ WARN_ONCE(dev->real_num_rx_queues > 1,
+ "%s received packet on queue %u, but number "
+ "of RX queues is %u\n",
+ dev->name, index, dev->real_num_rx_queues);
+ goto done;
+ }
+ rxqueue = dev->_rx + index;
+ } else
+ rxqueue = dev->_rx;
+
+ if (rxqueue->rps_map) {
+ map = rcu_dereference(rxqueue->rps_map);
+ if (map && map->len == 1) {
+ tcpu = map->cpus[0];
+ if (cpu_online(tcpu))
+ cpu = tcpu;
+ goto done;
+ }
+ } else if (!rxqueue->rps_flow_table) {
+ goto done;
+ }
+
+ skb_reset_network_header(skb);
+ if (!skb_get_rxhash(skb))
+ goto done;
+
flow_table = rcu_dereference(rxqueue->rps_flow_table);
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (flow_table && sock_flow_table) {
@@ -2401,7 +2499,6 @@ got_hash:
}
}
- map = rcu_dereference(rxqueue->rps_map);
if (map) {
tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
@@ -2487,6 +2584,7 @@ enqueue:
local_irq_restore(flags);
+ atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -2643,11 +2741,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
* the ingress scheduler, you just cant add policies on ingress.
*
*/
-static int ing_filter(struct sk_buff *skb)
+static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
{
struct net_device *dev = skb->dev;
u32 ttl = G_TC_RTTL(skb->tc_verd);
- struct netdev_queue *rxq;
int result = TC_ACT_OK;
struct Qdisc *q;
@@ -2661,8 +2758,6 @@ static int ing_filter(struct sk_buff *skb)
skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- rxq = &dev->rx_queue;
-
q = rxq->qdisc;
if (q != &noop_qdisc) {
spin_lock(qdisc_lock(q));
@@ -2678,7 +2773,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- if (skb->dev->rx_queue.qdisc == &noop_qdisc)
+ struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+
+ if (!rxq || rxq->qdisc == &noop_qdisc)
goto out;
if (*pt_prev) {
@@ -2686,7 +2783,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
*pt_prev = NULL;
}
- switch (ing_filter(skb)) {
+ switch (ing_filter(skb, rxq)) {
case TC_ACT_SHOT:
case TC_ACT_STOLEN:
kfree_skb(skb);
@@ -2699,33 +2796,6 @@ out:
}
#endif
-/*
- * netif_nit_deliver - deliver received packets to network taps
- * @skb: buffer
- *
- * This function is used to deliver incoming packets to network
- * taps. It should be used when the normal netif_receive_skb path
- * is bypassed, for example because of VLAN acceleration.
- */
-void netif_nit_deliver(struct sk_buff *skb)
-{
- struct packet_type *ptype;
-
- if (list_empty(&ptype_all))
- return;
-
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- skb->mac_len = skb->network_header - skb->mac_header;
-
- rcu_read_lock();
- list_for_each_entry_rcu(ptype, &ptype_all, list) {
- if (!ptype->dev || ptype->dev == skb->dev)
- deliver_skb(skb, ptype, skb->dev);
- }
- rcu_read_unlock();
-}
-
/**
* netdev_rx_handler_register - register receive handler
* @dev: device to register a handler for
@@ -2836,8 +2906,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
net_timestamp_check(skb);
trace_netif_receive_skb(skb);
- if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
- return NET_RX_SUCCESS;
/* if we've gotten here through NAPI, check netpoll */
if (netpoll_receive_skb(skb))
@@ -2851,8 +2919,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
* be delivered to pkt handlers that are exact matches. Also
* the deliver_no_wcard flag will be set. If packet handlers
* are sensitive to duplicate packets these skbs will need to
- * be dropped at the handler. The vlan accel path may have
- * already set the deliver_no_wcard flag.
+ * be dropped at the handler.
*/
null_or_orig = NULL;
orig_dev = skb->dev;
@@ -2911,6 +2978,18 @@ ncls:
goto out;
}
+ if (vlan_tx_tag_present(skb)) {
+ if (pt_prev) {
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = NULL;
+ }
+ if (vlan_hwaccel_do_receive(&skb)) {
+ ret = __netif_receive_skb(skb);
+ goto out;
+ } else if (unlikely(!skb))
+ goto out;
+ }
+
/*
* Make sure frames received on VLAN interfaces stacked on
* bonding interfaces still make their way to any base bonding
@@ -2938,6 +3017,7 @@ ncls:
if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
+ atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
@@ -3058,7 +3138,7 @@ out:
return netif_receive_skb(skb);
}
-static void napi_gro_flush(struct napi_struct *napi)
+inline void napi_gro_flush(struct napi_struct *napi)
{
struct sk_buff *skb, *next;
@@ -3071,6 +3151,7 @@ static void napi_gro_flush(struct napi_struct *napi)
napi->gro_count = 0;
napi->gro_list = NULL;
}
+EXPORT_SYMBOL(napi_gro_flush);
enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
@@ -3085,7 +3166,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
goto normal;
- if (skb_is_gso(skb) || skb_has_frags(skb))
+ if (skb_is_gso(skb) || skb_has_frag_list(skb))
goto normal;
rcu_read_lock();
@@ -3164,16 +3245,19 @@ normal:
}
EXPORT_SYMBOL(dev_gro_receive);
-static gro_result_t
+static inline gro_result_t
__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff *p;
for (p = napi->gro_list; p; p = p->next) {
- NAPI_GRO_CB(p)->same_flow =
- (p->dev == skb->dev) &&
- !compare_ether_header(skb_mac_header(p),
+ unsigned long diffs;
+
+ diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+ diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= compare_ether_header(skb_mac_header(p),
skb_gro_mac_header(skb));
+ NAPI_GRO_CB(p)->same_flow = !diffs;
NAPI_GRO_CB(p)->flush = 0;
}
@@ -3226,14 +3310,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
}
EXPORT_SYMBOL(napi_gro_receive);
-void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
{
__skb_pull(skb, skb_headlen(skb));
skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+ skb->vlan_tci = 0;
napi->skb = skb;
}
-EXPORT_SYMBOL(napi_reuse_skb);
struct sk_buff *napi_get_frags(struct napi_struct *napi)
{
@@ -4867,21 +4951,6 @@ static void rollback_registered(struct net_device *dev)
rollback_registered_many(&single);
}
-static void __netdev_init_queue_locks_one(struct net_device *dev,
- struct netdev_queue *dev_queue,
- void *_unused)
-{
- spin_lock_init(&dev_queue->_xmit_lock);
- netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
- dev_queue->xmit_lock_owner = -1;
-}
-
-static void netdev_init_queue_locks(struct net_device *dev)
-{
- netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
- __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
-}
-
unsigned long netdev_fix_features(unsigned long features, const char *name)
{
/* Fix illegal SG+CSUM combinations. */
@@ -4949,6 +5018,66 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
+static int netif_alloc_rx_queues(struct net_device *dev)
+{
+#ifdef CONFIG_RPS
+ unsigned int i, count = dev->num_rx_queues;
+ struct netdev_rx_queue *rx;
+
+ BUG_ON(count < 1);
+
+ rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+ if (!rx) {
+ pr_err("netdev: Unable to allocate %u rx queues.\n", count);
+ return -ENOMEM;
+ }
+ dev->_rx = rx;
+
+ /*
+ * Set a pointer to first element in the array which holds the
+ * reference count.
+ */
+ for (i = 0; i < count; i++)
+ rx[i].first = rx;
+#endif
+ return 0;
+}
+
+static int netif_alloc_netdev_queues(struct net_device *dev)
+{
+ unsigned int count = dev->num_tx_queues;
+ struct netdev_queue *tx;
+
+ BUG_ON(count < 1);
+
+ tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
+ if (!tx) {
+ pr_err("netdev: Unable to allocate %u tx queues.\n",
+ count);
+ return -ENOMEM;
+ }
+ dev->_tx = tx;
+ return 0;
+}
+
+static void netdev_init_one_queue(struct net_device *dev,
+ struct netdev_queue *queue,
+ void *_unused)
+{
+ queue->dev = dev;
+
+ /* Initialize queue lock */
+ spin_lock_init(&queue->_xmit_lock);
+ netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+ queue->xmit_lock_owner = -1;
+}
+
+static void netdev_init_queues(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+ spin_lock_init(&dev->tx_global_lock);
+}
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -4982,28 +5111,19 @@ int register_netdevice(struct net_device *dev)
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
- netdev_init_queue_locks(dev);
dev->iflink = -1;
-#ifdef CONFIG_RPS
- if (!dev->num_rx_queues) {
- /*
- * Allocate a single RX queue if driver never called
- * alloc_netdev_mq
- */
+ ret = netif_alloc_rx_queues(dev);
+ if (ret)
+ goto out;
- dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
- if (!dev->_rx) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = netif_alloc_netdev_queues(dev);
+ if (ret)
+ goto out;
+
+ netdev_init_queues(dev);
- dev->_rx->first = dev->_rx;
- atomic_set(&dev->_rx->count, 1);
- dev->num_rx_queues = 1;
- }
-#endif
/* Init, if this function is available */
if (dev->netdev_ops->ndo_init) {
ret = dev->netdev_ops->ndo_init(dev);
@@ -5043,6 +5163,12 @@ int register_netdevice(struct net_device *dev)
if (dev->features & NETIF_F_SG)
dev->features |= NETIF_F_GSO;
+ /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
+ * vlan_dev_init() will do the dev->features check, so these features
+ * are enabled only if supported by underlying device.
+ */
+ dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
+
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
if (ret)
@@ -5113,9 +5239,6 @@ int init_dummy_netdev(struct net_device *dev)
*/
dev->reg_state = NETREG_DUMMY;
- /* initialize the ref count */
- atomic_set(&dev->refcnt, 1);
-
/* NAPI wants this */
INIT_LIST_HEAD(&dev->napi_list);
@@ -5123,6 +5246,11 @@ int init_dummy_netdev(struct net_device *dev)
set_bit(__LINK_STATE_PRESENT, &dev->state);
set_bit(__LINK_STATE_START, &dev->state);
+ /* Note : We dont allocate pcpu_refcnt for dummy devices,
+ * because users of this 'device' dont need to change
+ * its refcount.
+ */
+
return 0;
}
EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -5164,6 +5292,16 @@ out:
}
EXPORT_SYMBOL(register_netdev);
+int netdev_refcnt_read(const struct net_device *dev)
+{
+ int i, refcnt = 0;
+
+ for_each_possible_cpu(i)
+ refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
+ return refcnt;
+}
+EXPORT_SYMBOL(netdev_refcnt_read);
+
/*
* netdev_wait_allrefs - wait until all references are gone.
*
@@ -5178,11 +5316,14 @@ EXPORT_SYMBOL(register_netdev);
static void netdev_wait_allrefs(struct net_device *dev)
{
unsigned long rebroadcast_time, warning_time;
+ int refcnt;
linkwatch_forget_dev(dev);
rebroadcast_time = warning_time = jiffies;
- while (atomic_read(&dev->refcnt) != 0) {
+ refcnt = netdev_refcnt_read(dev);
+
+ while (refcnt != 0) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
rtnl_lock();
@@ -5209,11 +5350,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
msleep(250);
+ refcnt = netdev_refcnt_read(dev);
+
if (time_after(jiffies, warning_time + 10 * HZ)) {
printk(KERN_EMERG "unregister_netdevice: "
"waiting for %s to become free. Usage "
"count = %d\n",
- dev->name, atomic_read(&dev->refcnt));
+ dev->name, refcnt);
warning_time = jiffies;
}
}
@@ -5271,8 +5414,8 @@ void netdev_run_todo(void)
netdev_wait_allrefs(dev);
/* paranoia */
- BUG_ON(atomic_read(&dev->refcnt));
- WARN_ON(dev->ip_ptr);
+ BUG_ON(netdev_refcnt_read(dev));
+ WARN_ON(rcu_dereference_raw(dev->ip_ptr));
WARN_ON(dev->ip6_ptr);
WARN_ON(dev->dn_ptr);
@@ -5350,30 +5493,34 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
if (ops->ndo_get_stats64) {
memset(storage, 0, sizeof(*storage));
- return ops->ndo_get_stats64(dev, storage);
- }
- if (ops->ndo_get_stats) {
+ ops->ndo_get_stats64(dev, storage);
+ } else if (ops->ndo_get_stats) {
netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
- return storage;
+ } else {
+ netdev_stats_to_stats64(storage, &dev->stats);
+ dev_txq_stats_fold(dev, storage);
}
- netdev_stats_to_stats64(storage, &dev->stats);
- dev_txq_stats_fold(dev, storage);
+ storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
-static void netdev_init_one_queue(struct net_device *dev,
- struct netdev_queue *queue,
- void *_unused)
+struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
{
- queue->dev = dev;
-}
+ struct netdev_queue *queue = dev_ingress_queue(dev);
-static void netdev_init_queues(struct net_device *dev)
-{
- netdev_init_one_queue(dev, &dev->rx_queue, NULL);
- netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
- spin_lock_init(&dev->tx_global_lock);
+#ifdef CONFIG_NET_CLS_ACT
+ if (queue)
+ return queue;
+ queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+ if (!queue)
+ return NULL;
+ netdev_init_one_queue(dev, queue, NULL);
+ queue->qdisc = &noop_qdisc;
+ queue->qdisc_sleeping = &noop_qdisc;
+ rcu_assign_pointer(dev->ingress_queue, queue);
+#endif
+ return queue;
}
/**
@@ -5390,17 +5537,18 @@ static void netdev_init_queues(struct net_device *dev)
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
- struct netdev_queue *tx;
struct net_device *dev;
size_t alloc_size;
struct net_device *p;
-#ifdef CONFIG_RPS
- struct netdev_rx_queue *rx;
- int i;
-#endif
BUG_ON(strlen(name) >= sizeof(dev->name));
+ if (queue_count < 1) {
+ pr_err("alloc_netdev: Unable to allocate device "
+ "with zero queues.\n");
+ return NULL;
+ }
+
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
@@ -5416,55 +5564,31 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
return NULL;
}
- tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
- if (!tx) {
- printk(KERN_ERR "alloc_netdev: Unable to allocate "
- "tx qdiscs.\n");
- goto free_p;
- }
-
-#ifdef CONFIG_RPS
- rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
- if (!rx) {
- printk(KERN_ERR "alloc_netdev: Unable to allocate "
- "rx queues.\n");
- goto free_tx;
- }
-
- atomic_set(&rx->count, queue_count);
-
- /*
- * Set a pointer to first element in the array which holds the
- * reference count.
- */
- for (i = 0; i < queue_count; i++)
- rx[i].first = rx;
-#endif
-
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
+ dev->pcpu_refcnt = alloc_percpu(int);
+ if (!dev->pcpu_refcnt)
+ goto free_p;
+
if (dev_addr_init(dev))
- goto free_rx;
+ goto free_pcpu;
dev_mc_init(dev);
dev_uc_init(dev);
dev_net_set(dev, &init_net);
- dev->_tx = tx;
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
#ifdef CONFIG_RPS
- dev->_rx = rx;
dev->num_rx_queues = queue_count;
+ dev->real_num_rx_queues = queue_count;
#endif
dev->gso_max_size = GSO_MAX_SIZE;
- netdev_init_queues(dev);
-
INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
dev->ethtool_ntuple_list.count = 0;
INIT_LIST_HEAD(&dev->napi_list);
@@ -5475,12 +5599,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
strcpy(dev->name, name);
return dev;
-free_rx:
-#ifdef CONFIG_RPS
- kfree(rx);
-free_tx:
-#endif
- kfree(tx);
+free_pcpu:
+ free_percpu(dev->pcpu_refcnt);
free_p:
kfree(p);
return NULL;
@@ -5503,6 +5623,8 @@ void free_netdev(struct net_device *dev)
kfree(dev->_tx);
+ kfree(rcu_dereference_raw(dev->ingress_queue));
+
/* Flush device addresses */
dev_addr_flush(dev);
@@ -5512,6 +5634,9 @@ void free_netdev(struct net_device *dev)
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
+ free_percpu(dev->pcpu_refcnt);
+ dev->pcpu_refcnt = NULL;
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -5666,6 +5791,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* Notify protocols, that we are about to destroy
this device. They should clean all the things.
+
+ Note that dev->reg_state stays at NETREG_REGISTERED.
+ This is wanted because this way 8021q and macvlan know
+ the device is just moving and can keep their slaves up.
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
diff --git a/net/core/dst.c b/net/core/dst.c
index 6c41b1fac3d..8abe628b79f 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -168,7 +168,7 @@ void *dst_alloc(struct dst_ops *ops)
{
struct dst_entry *dst;
- if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
+ if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
if (ops->gc(ops))
return NULL;
}
@@ -183,7 +183,7 @@ void *dst_alloc(struct dst_ops *ops)
#if RT_CACHE_DEBUG >= 2
atomic_inc(&dst_total);
#endif
- atomic_inc(&ops->entries);
+ dst_entries_add(ops, 1);
return dst;
}
EXPORT_SYMBOL(dst_alloc);
@@ -228,15 +228,15 @@ again:
child = dst->child;
dst->hh = NULL;
- if (hh && atomic_dec_and_test(&hh->hh_refcnt))
- kfree(hh);
+ if (hh)
+ hh_cache_put(hh);
if (neigh) {
dst->neighbour = NULL;
neigh_release(neigh);
}
- atomic_dec(&dst->ops->entries);
+ dst_entries_add(dst->ops, -1);
if (dst->ops->destroy)
dst->ops->destroy(dst);
@@ -271,13 +271,40 @@ void dst_release(struct dst_entry *dst)
if (dst) {
int newrefcnt;
- smp_mb__before_atomic_dec();
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+ }
}
}
EXPORT_SYMBOL(dst_release);
+/**
+ * skb_dst_set_noref - sets skb dst, without a reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst
+ * skb_dst_drop() should not dst_release() this dst
+ */
+void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+{
+ WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ /* If dst not in cache, we must take a reference, because
+ * dst_release() will destroy dst as soon as its refcount becomes zero
+ */
+ if (unlikely(dst->flags & DST_NOCACHE)) {
+ dst_hold(dst);
+ skb_dst_set(skb, dst);
+ } else {
+ skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
+ }
+}
+EXPORT_SYMBOL(skb_dst_set_noref);
+
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
* this mistake in 2.3, but we have no choice
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 8451ab48109..956a9f4971c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -19,6 +19,7 @@
#include <linux/netdevice.h>
#include <linux/bitops.h>
#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
#include <linux/slab.h>
/*
@@ -131,7 +132,8 @@ EXPORT_SYMBOL(ethtool_op_set_ufo);
* NETIF_F_xxx values in include/linux/netdevice.h
*/
static const u32 flags_dup_features =
- (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
+ (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
+ ETH_FLAG_RXHASH);
u32 ethtool_op_get_flags(struct net_device *dev)
{
@@ -205,18 +207,24 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo info;
const struct ethtool_ops *ops = dev->ethtool_ops;
- if (!ops->get_drvinfo)
- return -EOPNOTSUPP;
-
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GDRVINFO;
- ops->get_drvinfo(dev, &info);
+ if (ops && ops->get_drvinfo) {
+ ops->get_drvinfo(dev, &info);
+ } else if (dev->dev.parent && dev->dev.parent->driver) {
+ strlcpy(info.bus_info, dev_name(dev->dev.parent),
+ sizeof(info.bus_info));
+ strlcpy(info.driver, dev->dev.parent->driver->name,
+ sizeof(info.driver));
+ } else {
+ return -EOPNOTSUPP;
+ }
/*
* this method of obtaining string set info is deprecated;
* Use ETHTOOL_GSSET_INFO instead.
*/
- if (ops->get_sset_count) {
+ if (ops && ops->get_sset_count) {
int rc;
rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -229,9 +237,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
if (rc >= 0)
info.n_priv_flags = rc;
}
- if (ops->get_regs_len)
+ if (ops && ops->get_regs_len)
info.regdump_len = ops->get_regs_len(dev);
- if (ops->get_eeprom_len)
+ if (ops && ops->get_eeprom_len)
info.eedump_len = ops->get_eeprom_len(dev);
if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -479,6 +487,38 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
list->count++;
}
+/*
+ * ethtool does not (or did not) set masks for flow parameters that are
+ * not specified, so if both value and mask are 0 then this must be
+ * treated as equivalent to a mask with all bits set. Implement that
+ * here rather than in drivers.
+ */
+static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
+{
+ struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
+ struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
+
+ if (fs->flow_type != TCP_V4_FLOW &&
+ fs->flow_type != UDP_V4_FLOW &&
+ fs->flow_type != SCTP_V4_FLOW)
+ return;
+
+ if (!(entry->ip4src | mask->ip4src))
+ mask->ip4src = htonl(0xffffffff);
+ if (!(entry->ip4dst | mask->ip4dst))
+ mask->ip4dst = htonl(0xffffffff);
+ if (!(entry->psrc | mask->psrc))
+ mask->psrc = htons(0xffff);
+ if (!(entry->pdst | mask->pdst))
+ mask->pdst = htons(0xffff);
+ if (!(entry->tos | mask->tos))
+ mask->tos = 0xff;
+ if (!(fs->vlan_tag | fs->vlan_tag_mask))
+ fs->vlan_tag_mask = 0xffff;
+ if (!(fs->data | fs->data_mask))
+ fs->data_mask = 0xffffffffffffffffULL;
+}
+
static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
void __user *useraddr)
{
@@ -493,6 +533,8 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
+ rx_ntuple_fix_masks(&cmd.fs);
+
/*
* Cache filter in dev struct for GET operation only if
* the underlying driver doesn't have its own GET operation, and
@@ -667,19 +709,19 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
break;
case IP_USER_FLOW:
sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.raw_ip4_spec.ip4src);
+ fsc->fs.h_u.usr_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.raw_ip4_spec.ip4src);
+ fsc->fs.m_u.usr_ip4_spec.ip4src);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.raw_ip4_spec.ip4dst);
+ fsc->fs.h_u.usr_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.raw_ip4_spec.ip4dst);
+ fsc->fs.m_u.usr_ip4_spec.ip4dst);
p += ETH_GSTRING_LEN;
num_strings++;
break;
@@ -775,7 +817,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
if (regs.len > reglen)
regs.len = reglen;
- regbuf = kzalloc(reglen, GFP_USER);
+ regbuf = vmalloc(reglen);
if (!regbuf)
return -ENOMEM;
@@ -790,7 +832,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
ret = 0;
out:
- kfree(regbuf);
+ vfree(regbuf);
return ret;
}
@@ -1175,8 +1217,11 @@ static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
return -EFAULT;
if (edata.data) {
- if (!dev->ethtool_ops->get_rx_csum ||
- !dev->ethtool_ops->get_rx_csum(dev))
+ u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
+ dev->ethtool_ops->get_rx_csum(dev) :
+ ethtool_op_get_rx_csum(dev);
+
+ if (!rxcsum)
return -EINVAL;
dev->features |= NETIF_F_GRO;
} else
@@ -1402,14 +1447,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (!dev || !netif_device_present(dev))
return -ENODEV;
- if (!dev->ethtool_ops)
- return -EOPNOTSUPP;
-
if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
return -EFAULT;
+ if (!dev->ethtool_ops) {
+ /* ETHTOOL_GDRVINFO does not require any driver support.
+ * It is also unprivileged and does not change anything,
+ * so we can take a shortcut to it. */
+ if (ethcmd == ETHTOOL_GDRVINFO)
+ return ethtool_get_drvinfo(dev, useraddr);
+ else
+ return -EOPNOTSUPP;
+ }
+
/* Allow some commands to be done by anyone */
switch (ethcmd) {
+ case ETHTOOL_GSET:
case ETHTOOL_GDRVINFO:
case ETHTOOL_GMSGLVL:
case ETHTOOL_GCOALESCE:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 42e84e08a1b..1bc3f253ba6 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -144,7 +144,7 @@ fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
}
EXPORT_SYMBOL_GPL(fib_rules_register);
-void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
+static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
{
struct fib_rule *rule, *tmp;
@@ -153,7 +153,6 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
fib_rule_put(rule);
}
}
-EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
static void fib_rules_put_rcu(struct rcu_head *head)
{
@@ -182,7 +181,8 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
{
int ret = 0;
- if (rule->iifindex && (rule->iifindex != fl->iif))
+ if (rule->iifindex && (rule->iifindex != fl->iif) &&
+ !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
goto out;
if (rule->oifindex && (rule->oifindex != fl->oif))
@@ -225,9 +225,12 @@ jumped:
err = ops->action(rule, fl, flags, arg);
if (err != -EAGAIN) {
- fib_rule_get(rule);
- arg->rule = rule;
- goto out;
+ if ((arg->flags & FIB_LOOKUP_NOREF) ||
+ likely(atomic_inc_not_zero(&rule->refcnt))) {
+ arg->rule = rule;
+ goto out;
+ }
+ break;
}
}
@@ -491,7 +494,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
}
}
- synchronize_rcu();
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).pid);
fib_rule_put(rule);
diff --git a/net/core/filter.c b/net/core/filter.c
index 52b051f82a0..7adf5035291 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -638,10 +638,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
return err;
}
- rcu_read_lock_bh();
- old_fp = rcu_dereference_bh(sk->sk_filter);
+ old_fp = rcu_dereference_protected(sk->sk_filter,
+ sock_owned_by_user(sk));
rcu_assign_pointer(sk->sk_filter, fp);
- rcu_read_unlock_bh();
if (old_fp)
sk_filter_delayed_uncharge(sk, old_fp);
@@ -654,14 +653,13 @@ int sk_detach_filter(struct sock *sk)
int ret = -ENOENT;
struct sk_filter *filter;
- rcu_read_lock_bh();
- filter = rcu_dereference_bh(sk->sk_filter);
+ filter = rcu_dereference_protected(sk->sk_filter,
+ sock_owned_by_user(sk));
if (filter) {
rcu_assign_pointer(sk->sk_filter, NULL);
sk_filter_delayed_uncharge(sk, filter);
ret = 0;
}
- rcu_read_unlock_bh();
return ret;
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
index f67dcbfe54e..127c8a7ffd6 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -53,8 +53,7 @@ struct flow_flush_info {
struct flow_cache {
u32 hash_shift;
- unsigned long order;
- struct flow_cache_percpu *percpu;
+ struct flow_cache_percpu __percpu *percpu;
struct notifier_block hotcpu_notifier;
int low_watermark;
int high_watermark;
@@ -64,7 +63,7 @@ struct flow_cache {
atomic_t flow_cache_genid = ATOMIC_INIT(0);
EXPORT_SYMBOL(flow_cache_genid);
static struct flow_cache flow_cache_global;
-static struct kmem_cache *flow_cachep;
+static struct kmem_cache *flow_cachep __read_mostly;
static DEFINE_SPINLOCK(flow_cache_gc_lock);
static LIST_HEAD(flow_cache_gc_list);
@@ -177,15 +176,11 @@ static u32 flow_hash_code(struct flow_cache *fc,
{
u32 *k = (u32 *) key;
- return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
- & (flow_cache_hash_size(fc) - 1));
+ return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+ & (flow_cache_hash_size(fc) - 1);
}
-#if (BITS_PER_LONG == 64)
-typedef u64 flow_compare_t;
-#else
-typedef u32 flow_compare_t;
-#endif
+typedef unsigned long flow_compare_t;
/* I hear what you're saying, use memcmp. But memcmp cannot make
* important assumptions that we can here, such as alignment and
@@ -357,62 +352,73 @@ void flow_cache_flush(void)
put_online_cpus();
}
-static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
- struct flow_cache_percpu *fcp)
+static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
{
- fcp->hash_table = (struct hlist_head *)
- __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
- if (!fcp->hash_table)
- panic("NET: failed to allocate flow cache order %lu\n", fc->order);
+ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+ size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
- fcp->hash_rnd_recalc = 1;
- fcp->hash_count = 0;
- tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
+ if (!fcp->hash_table) {
+ fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
+ if (!fcp->hash_table) {
+ pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+ return -ENOMEM;
+ }
+ fcp->hash_rnd_recalc = 1;
+ fcp->hash_count = 0;
+ tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
+ }
+ return 0;
}
-static int flow_cache_cpu(struct notifier_block *nfb,
+static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
- int cpu = (unsigned long) hcpu;
+ int res, cpu = (unsigned long) hcpu;
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ res = flow_cache_cpu_prepare(fc, cpu);
+ if (res)
+ return notifier_from_errno(res);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
__flow_cache_shrink(fc, fcp, 0);
+ break;
+ }
return NOTIFY_OK;
}
-static int flow_cache_init(struct flow_cache *fc)
+static int __init flow_cache_init(struct flow_cache *fc)
{
- unsigned long order;
int i;
fc->hash_shift = 10;
fc->low_watermark = 2 * flow_cache_hash_size(fc);
fc->high_watermark = 4 * flow_cache_hash_size(fc);
- for (order = 0;
- (PAGE_SIZE << order) <
- (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
- order++)
- /* NOTHING */;
- fc->order = order;
fc->percpu = alloc_percpu(struct flow_cache_percpu);
+ if (!fc->percpu)
+ return -ENOMEM;
- setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
- (unsigned long) fc);
- fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
- add_timer(&fc->rnd_timer);
-
- for_each_possible_cpu(i)
- flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
-
+ for_each_online_cpu(i) {
+ if (flow_cache_cpu_prepare(fc, i))
+ return -ENOMEM;
+ }
fc->hotcpu_notifier = (struct notifier_block){
.notifier_call = flow_cache_cpu,
};
register_hotcpu_notifier(&fc->hotcpu_notifier);
+ setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
+ (unsigned long) fc);
+ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+ add_timer(&fc->rnd_timer);
+
return 0;
}
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 6743146e4d6..7c2373321b7 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -274,9 +274,9 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
while ((e = gen_find_node(bstats, rate_est))) {
rb_erase(&e->node, &est_root);
- write_lock_bh(&est_lock);
+ write_lock(&est_lock);
e->bstats = NULL;
- write_unlock_bh(&est_lock);
+ write_unlock(&est_lock);
list_del_rcu(&e->list);
call_rcu(&e->e_rcu, __gen_kill_estimator);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index e6b133b77cc..72aceb1fe4f 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -42,7 +42,9 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
if (m->msg_namelen) {
if (mode == VERIFY_READ) {
- err = move_addr_to_kernel(m->msg_name, m->msg_namelen,
+ void __user *namep;
+ namep = (void __user __force *) m->msg_name;
+ err = move_addr_to_kernel(namep, m->msg_namelen,
address);
if (err < 0)
return err;
@@ -53,7 +55,7 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
}
size = m->msg_iovlen * sizeof(struct iovec);
- if (copy_from_user(iov, m->msg_iov, size))
+ if (copy_from_user(iov, (void __user __force *) m->msg_iov, size))
return -EFAULT;
m->msg_iov = iov;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a4e0a7482c2..8cc8f9a79db 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
unsigned long neigh_rand_reach_time(unsigned long base)
{
- return (base ? (net_random() % base) + (base >> 1) : 0);
+ return base ? (net_random() % base) + (base >> 1) : 0;
}
EXPORT_SYMBOL(neigh_rand_reach_time);
@@ -131,15 +131,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)
{
int shrunk = 0;
int i;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
write_lock_bh(&tbl->lock);
- for (i = 0; i <= tbl->hash_mask; i++) {
- struct neighbour *n, **np;
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ for (i = 0; i <= nht->hash_mask; i++) {
+ struct neighbour *n;
+ struct neighbour __rcu **np;
- np = &tbl->hash_buckets[i];
- while ((n = *np) != NULL) {
+ np = &nht->hash_buckets[i];
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
/* Neighbour record may be discarded if:
* - nobody refers to it.
* - it is not permanent
@@ -147,7 +152,9 @@ static int neigh_forced_gc(struct neigh_table *tbl)
write_lock(&n->lock);
if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state & NUD_PERMANENT)) {
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
@@ -199,16 +206,24 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
{
int i;
+ struct neigh_hash_table *nht;
- for (i = 0; i <= tbl->hash_mask; i++) {
- struct neighbour *n, **np = &tbl->hash_buckets[i];
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
- while ((n = *np) != NULL) {
+ for (i = 0; i <= nht->hash_mask; i++) {
+ struct neighbour *n;
+ struct neighbour __rcu **np = &nht->hash_buckets[i];
+
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
if (dev && n->dev != dev) {
np = &n->next;
continue;
}
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
write_lock(&n->lock);
neigh_del_timer(n);
n->dead = 1;
@@ -279,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
skb_queue_head_init(&n->arp_queue);
rwlock_init(&n->lock);
+ seqlock_init(&n->ha_lock);
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
@@ -297,64 +313,86 @@ out_entries:
goto out;
}
-static struct neighbour **neigh_hash_alloc(unsigned int entries)
+static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
{
- unsigned long size = entries * sizeof(struct neighbour *);
- struct neighbour **ret;
+ size_t size = entries * sizeof(struct neighbour *);
+ struct neigh_hash_table *ret;
+ struct neighbour **buckets;
- if (size <= PAGE_SIZE) {
- ret = kzalloc(size, GFP_ATOMIC);
- } else {
- ret = (struct neighbour **)
- __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
+ ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
+ if (!ret)
+ return NULL;
+ if (size <= PAGE_SIZE)
+ buckets = kzalloc(size, GFP_ATOMIC);
+ else
+ buckets = (struct neighbour **)
+ __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
+ get_order(size));
+ if (!buckets) {
+ kfree(ret);
+ return NULL;
}
+ rcu_assign_pointer(ret->hash_buckets, buckets);
+ ret->hash_mask = entries - 1;
+ get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
return ret;
}
-static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+static void neigh_hash_free_rcu(struct rcu_head *head)
{
- unsigned long size = entries * sizeof(struct neighbour *);
+ struct neigh_hash_table *nht = container_of(head,
+ struct neigh_hash_table,
+ rcu);
+ size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
+ struct neighbour **buckets = nht->hash_buckets;
if (size <= PAGE_SIZE)
- kfree(hash);
+ kfree(buckets);
else
- free_pages((unsigned long)hash, get_order(size));
+ free_pages((unsigned long)buckets, get_order(size));
+ kfree(nht);
}
-static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
+static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
+ unsigned long new_entries)
{
- struct neighbour **new_hash, **old_hash;
- unsigned int i, new_hash_mask, old_entries;
+ unsigned int i, hash;
+ struct neigh_hash_table *new_nht, *old_nht;
NEIGH_CACHE_STAT_INC(tbl, hash_grows);
BUG_ON(!is_power_of_2(new_entries));
- new_hash = neigh_hash_alloc(new_entries);
- if (!new_hash)
- return;
-
- old_entries = tbl->hash_mask + 1;
- new_hash_mask = new_entries - 1;
- old_hash = tbl->hash_buckets;
+ old_nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ new_nht = neigh_hash_alloc(new_entries);
+ if (!new_nht)
+ return old_nht;
- get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
- for (i = 0; i < old_entries; i++) {
+ for (i = 0; i <= old_nht->hash_mask; i++) {
struct neighbour *n, *next;
- for (n = old_hash[i]; n; n = next) {
- unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
-
- hash_val &= new_hash_mask;
- next = n->next;
-
- n->next = new_hash[hash_val];
- new_hash[hash_val] = n;
+ for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
+ lockdep_is_held(&tbl->lock));
+ n != NULL;
+ n = next) {
+ hash = tbl->hash(n->primary_key, n->dev,
+ new_nht->hash_rnd);
+
+ hash &= new_nht->hash_mask;
+ next = rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock));
+
+ rcu_assign_pointer(n->next,
+ rcu_dereference_protected(
+ new_nht->hash_buckets[hash],
+ lockdep_is_held(&tbl->lock)));
+ rcu_assign_pointer(new_nht->hash_buckets[hash], n);
}
}
- tbl->hash_buckets = new_hash;
- tbl->hash_mask = new_hash_mask;
- neigh_hash_free(old_hash, old_entries);
+ rcu_assign_pointer(tbl->nht, new_nht);
+ call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
+ return new_nht;
}
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
@@ -363,19 +401,26 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct neighbour *n;
int key_len = tbl->key_len;
u32 hash_val;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, lookups);
- read_lock_bh(&tbl->lock);
- hash_val = tbl->hash(pkey, dev);
- for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
+
+ for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+ n != NULL;
+ n = rcu_dereference_bh(n->next)) {
if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
- neigh_hold(n);
+ if (!atomic_inc_not_zero(&n->refcnt))
+ n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
- read_unlock_bh(&tbl->lock);
+
+ rcu_read_unlock_bh();
return n;
}
EXPORT_SYMBOL(neigh_lookup);
@@ -386,20 +431,27 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
struct neighbour *n;
int key_len = tbl->key_len;
u32 hash_val;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, lookups);
- read_lock_bh(&tbl->lock);
- hash_val = tbl->hash(pkey, NULL);
- for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+ hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
+
+ for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+ n != NULL;
+ n = rcu_dereference_bh(n->next)) {
if (!memcmp(n->primary_key, pkey, key_len) &&
net_eq(dev_net(n->dev), net)) {
- neigh_hold(n);
+ if (!atomic_inc_not_zero(&n->refcnt))
+ n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
- read_unlock_bh(&tbl->lock);
+
+ rcu_read_unlock_bh();
return n;
}
EXPORT_SYMBOL(neigh_lookup_nodev);
@@ -411,6 +463,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+ struct neigh_hash_table *nht;
if (!n) {
rc = ERR_PTR(-ENOBUFS);
@@ -437,18 +490,24 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
write_lock_bh(&tbl->lock);
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
- neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
+ if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
+ nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
- hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+ hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
if (n->parms->dead) {
rc = ERR_PTR(-EINVAL);
goto out_tbl_unlock;
}
- for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
+ for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
+ lockdep_is_held(&tbl->lock));
+ n1 != NULL;
+ n1 = rcu_dereference_protected(n1->next,
+ lockdep_is_held(&tbl->lock))) {
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
neigh_hold(n1);
rc = n1;
@@ -456,10 +515,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
}
}
- n->next = tbl->hash_buckets[hash_val];
- tbl->hash_buckets[hash_val] = n;
n->dead = 0;
neigh_hold(n);
+ rcu_assign_pointer(n->next,
+ rcu_dereference_protected(nht->hash_buckets[hash_val],
+ lockdep_is_held(&tbl->lock)));
+ rcu_assign_pointer(nht->hash_buckets[hash_val], n);
write_unlock_bh(&tbl->lock);
NEIGH_PRINTK2("neigh %p is created.\n", n);
rc = n;
@@ -616,6 +677,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
neigh_parms_destroy(parms);
}
+static void neigh_destroy_rcu(struct rcu_head *head)
+{
+ struct neighbour *neigh = container_of(head, struct neighbour, rcu);
+
+ kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+}
/*
* neighbour must already be out of the table;
*
@@ -643,8 +710,7 @@ void neigh_destroy(struct neighbour *neigh)
write_seqlock_bh(&hh->hh_lock);
hh->hh_output = neigh_blackhole;
write_sequnlock_bh(&hh->hh_lock);
- if (atomic_dec_and_test(&hh->hh_refcnt))
- kfree(hh);
+ hh_cache_put(hh);
}
skb_queue_purge(&neigh->arp_queue);
@@ -655,7 +721,7 @@ void neigh_destroy(struct neighbour *neigh)
NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
atomic_dec(&neigh->tbl->entries);
- kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+ call_rcu(&neigh->rcu, neigh_destroy_rcu);
}
EXPORT_SYMBOL(neigh_destroy);
@@ -696,12 +762,16 @@ static void neigh_connect(struct neighbour *neigh)
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
- struct neighbour *n, **np;
+ struct neighbour *n;
+ struct neighbour __rcu **np;
unsigned int i;
+ struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
write_lock_bh(&tbl->lock);
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
/*
* periodically recompute ReachableTime from random function
@@ -715,10 +785,11 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(p->base_reachable_time);
}
- for (i = 0 ; i <= tbl->hash_mask; i++) {
- np = &tbl->hash_buckets[i];
+ for (i = 0 ; i <= nht->hash_mask; i++) {
+ np = &nht->hash_buckets[i];
- while ((n = *np) != NULL) {
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
unsigned int state;
write_lock(&n->lock);
@@ -766,9 +837,9 @@ next_elt:
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
- return (n->nud_state & NUD_PROBE ?
+ return (n->nud_state & NUD_PROBE) ?
p->ucast_probes :
- p->ucast_probes + p->app_probes + p->mcast_probes);
+ p->ucast_probes + p->app_probes + p->mcast_probes;
}
static void neigh_invalidate(struct neighbour *neigh)
@@ -945,7 +1016,7 @@ out_unlock_bh:
}
EXPORT_SYMBOL(__neigh_event_send);
-static void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(const struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1081,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
}
if (lladdr != neigh->ha) {
+ write_seqlock(&neigh->ha_lock);
memcpy(&neigh->ha, lladdr, dev->addr_len);
+ write_sequnlock(&neigh->ha_lock);
neigh_update_hhs(neigh);
if (!(new & NUD_CONNECTED))
neigh->confirmed = jiffies -
@@ -1139,44 +1212,73 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
}
EXPORT_SYMBOL(neigh_event_ns);
+static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
+ __be16 protocol)
+{
+ struct hh_cache *hh;
+
+ smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
+ for (hh = n->hh; hh; hh = hh->hh_next) {
+ if (hh->hh_type == protocol) {
+ atomic_inc(&hh->hh_refcnt);
+ if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+ hh_cache_put(hh);
+ return true;
+ }
+ }
+ return false;
+}
+
+/* called with read_lock_bh(&n->lock); */
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
__be16 protocol)
{
struct hh_cache *hh;
struct net_device *dev = dst->dev;
- for (hh = n->hh; hh; hh = hh->hh_next)
- if (hh->hh_type == protocol)
- break;
+ if (likely(neigh_hh_lookup(n, dst, protocol)))
+ return;
- if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
- seqlock_init(&hh->hh_lock);
- hh->hh_type = protocol;
- atomic_set(&hh->hh_refcnt, 0);
- hh->hh_next = NULL;
+ /* slow path */
+ hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
+ if (!hh)
+ return;
- if (dev->header_ops->cache(n, hh)) {
- kfree(hh);
- hh = NULL;
- } else {
- atomic_inc(&hh->hh_refcnt);
- hh->hh_next = n->hh;
- n->hh = hh;
- if (n->nud_state & NUD_CONNECTED)
- hh->hh_output = n->ops->hh_output;
- else
- hh->hh_output = n->ops->output;
- }
+ seqlock_init(&hh->hh_lock);
+ hh->hh_type = protocol;
+ atomic_set(&hh->hh_refcnt, 2);
+
+ if (dev->header_ops->cache(n, hh)) {
+ kfree(hh);
+ return;
}
- if (hh) {
- atomic_inc(&hh->hh_refcnt);
- dst->hh = hh;
+
+ write_lock_bh(&n->lock);
+
+ /* must check if another thread already did the insert */
+ if (neigh_hh_lookup(n, dst, protocol)) {
+ kfree(hh);
+ goto end;
}
+
+ if (n->nud_state & NUD_CONNECTED)
+ hh->hh_output = n->ops->hh_output;
+ else
+ hh->hh_output = n->ops->output;
+
+ hh->hh_next = n->hh;
+ smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
+ n->hh = hh;
+
+ if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
+ hh_cache_put(hh);
+end:
+ write_unlock_bh(&n->lock);
}
/* This function can be used in contexts, where only old dev_queue_xmit
- worked, f.e. if you want to override normal output path (eql, shaper),
- but resolution is not made yet.
+ * worked, f.e. if you want to override normal output path (eql, shaper),
+ * but resolution is not made yet.
*/
int neigh_compat_output(struct sk_buff *skb)
@@ -1210,19 +1312,19 @@ int neigh_resolve_output(struct sk_buff *skb)
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
- if (dev->header_ops->cache && !dst->hh) {
- write_lock_bh(&neigh->lock);
- if (!dst->hh)
- neigh_hh_init(neigh, dst, dst->ops->protocol);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- write_unlock_bh(&neigh->lock);
- } else {
- read_lock_bh(&neigh->lock);
+ unsigned int seq;
+
+ if (dev->header_ops->cache &&
+ !dst->hh &&
+ !(dst->flags & DST_NOCACHE))
+ neigh_hh_init(neigh, dst, dst->ops->protocol);
+
+ do {
+ seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
- }
+ } while (read_seqretry(&neigh->ha_lock, seq));
+
if (err >= 0)
rc = neigh->ops->queue_xmit(skb);
else
@@ -1248,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
+ unsigned int seq;
__skb_pull(skb, skb_network_offset(skb));
- read_lock_bh(&neigh->lock);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
+ do {
+ seq = read_seqbegin(&neigh->ha_lock);
+ err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
+ } while (read_seqretry(&neigh->ha_lock, seq));
+
if (err >= 0)
err = neigh->ops->queue_xmit(skb);
else {
@@ -1436,17 +1541,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot create neighbour proc dir entry");
#endif
- tbl->hash_mask = 1;
- tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
+ tbl->nht = neigh_hash_alloc(8);
phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
- if (!tbl->hash_buckets || !tbl->phash_buckets)
+ if (!tbl->nht || !tbl->phash_buckets)
panic("cannot allocate neighbour cache hashes");
- get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
-
rwlock_init(&tbl->lock);
INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
@@ -1486,8 +1588,7 @@ int neigh_table_clear(struct neigh_table *tbl)
struct neigh_table **tp;
/* It is not clean... Fix it to unload IPv6 module safely */
- cancel_delayed_work(&tbl->gc_work);
- flush_scheduled_work();
+ cancel_delayed_work_sync(&tbl->gc_work);
del_timer_sync(&tbl->proxy_timer);
pneigh_queue_purge(&tbl->proxy_queue);
neigh_ifdown(tbl, NULL);
@@ -1502,8 +1603,8 @@ int neigh_table_clear(struct neigh_table *tbl)
}
write_unlock(&neigh_tbl_lock);
- neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
- tbl->hash_buckets = NULL;
+ call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+ tbl->nht = NULL;
kfree(tbl->phash_buckets);
tbl->phash_buckets = NULL;
@@ -1529,6 +1630,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct net_device *dev = NULL;
int err = -EINVAL;
+ ASSERT_RTNL();
if (nlmsg_len(nlh) < sizeof(*ndm))
goto out;
@@ -1538,7 +1640,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
@@ -1554,34 +1656,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
read_unlock(&neigh_tbl_lock);
if (nla_len(dst_attr) < tbl->key_len)
- goto out_dev_put;
+ goto out;
if (ndm->ndm_flags & NTF_PROXY) {
err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
- goto out_dev_put;
+ goto out;
}
if (dev == NULL)
- goto out_dev_put;
+ goto out;
neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
if (neigh == NULL) {
err = -ENOENT;
- goto out_dev_put;
+ goto out;
}
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE |
NEIGH_UPDATE_F_ADMIN);
neigh_release(neigh);
- goto out_dev_put;
+ goto out;
}
read_unlock(&neigh_tbl_lock);
err = -EAFNOSUPPORT;
-out_dev_put:
- if (dev)
- dev_put(dev);
out:
return err;
}
@@ -1595,6 +1694,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct net_device *dev = NULL;
int err;
+ ASSERT_RTNL();
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
if (err < 0)
goto out;
@@ -1605,14 +1705,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
- dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
err = -ENODEV;
goto out;
}
if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
- goto out_dev_put;
+ goto out;
}
read_lock(&neigh_tbl_lock);
@@ -1626,7 +1726,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
read_unlock(&neigh_tbl_lock);
if (nla_len(tb[NDA_DST]) < tbl->key_len)
- goto out_dev_put;
+ goto out;
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
@@ -1639,29 +1739,29 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
pn->flags = ndm->ndm_flags;
err = 0;
}
- goto out_dev_put;
+ goto out;
}
if (dev == NULL)
- goto out_dev_put;
+ goto out;
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
- goto out_dev_put;
+ goto out;
}
neigh = __neigh_lookup_errno(tbl, dst, dev);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
- goto out_dev_put;
+ goto out;
}
} else {
if (nlh->nlmsg_flags & NLM_F_EXCL) {
err = -EEXIST;
neigh_release(neigh);
- goto out_dev_put;
+ goto out;
}
if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
@@ -1674,15 +1774,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
} else
err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
neigh_release(neigh);
- goto out_dev_put;
+ goto out;
}
read_unlock(&neigh_tbl_lock);
err = -EAFNOSUPPORT;
-
-out_dev_put:
- if (dev)
- dev_put(dev);
out:
return err;
}
@@ -1748,18 +1844,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
unsigned long now = jiffies;
unsigned int flush_delta = now - tbl->last_flush;
unsigned int rand_delta = now - tbl->last_rand;
-
+ struct neigh_hash_table *nht;
struct ndt_config ndc = {
.ndtc_key_len = tbl->key_len,
.ndtc_entry_size = tbl->entry_size,
.ndtc_entries = atomic_read(&tbl->entries),
.ndtc_last_flush = jiffies_to_msecs(flush_delta),
.ndtc_last_rand = jiffies_to_msecs(rand_delta),
- .ndtc_hash_rnd = tbl->hash_rnd,
- .ndtc_hash_mask = tbl->hash_mask,
.ndtc_proxy_qlen = tbl->proxy_queue.qlen,
};
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+ ndc.ndtc_hash_rnd = nht->hash_rnd;
+ ndc.ndtc_hash_mask = nht->hash_mask;
+ rcu_read_unlock_bh();
+
NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
}
@@ -2056,10 +2156,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
read_lock_bh(&neigh->lock);
ndm->ndm_state = neigh->nud_state;
- if ((neigh->nud_state & NUD_VALID) &&
- nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
- read_unlock_bh(&neigh->lock);
- goto nla_put_failure;
+ if (neigh->nud_state & NUD_VALID) {
+ char haddr[MAX_ADDR_LEN];
+
+ neigh_ha_snapshot(haddr, neigh, neigh->dev);
+ if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
+ read_unlock_bh(&neigh->lock);
+ goto nla_put_failure;
+ }
}
ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
@@ -2087,18 +2191,23 @@ static void neigh_update_notify(struct neighbour *neigh)
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct net * net = sock_net(skb->sk);
+ struct net *net = sock_net(skb->sk);
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
+ struct neigh_hash_table *nht;
- read_lock_bh(&tbl->lock);
- for (h = 0; h <= tbl->hash_mask; h++) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+
+ for (h = 0; h <= nht->hash_mask; h++) {
if (h < s_h)
continue;
if (h > s_h)
s_idx = 0;
- for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
+ for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
+ n != NULL;
+ n = rcu_dereference_bh(n->next)) {
if (!net_eq(dev_net(n->dev), net))
continue;
if (idx < s_idx)
@@ -2107,17 +2216,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI) <= 0) {
- read_unlock_bh(&tbl->lock);
rc = -1;
goto out;
}
- next:
+next:
idx++;
}
}
- read_unlock_bh(&tbl->lock);
rc = skb->len;
out:
+ rcu_read_unlock_bh();
cb->args[1] = h;
cb->args[2] = idx;
return rc;
@@ -2150,15 +2258,22 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
{
int chain;
+ struct neigh_hash_table *nht;
- read_lock_bh(&tbl->lock);
- for (chain = 0; chain <= tbl->hash_mask; chain++) {
+ rcu_read_lock_bh();
+ nht = rcu_dereference_bh(tbl->nht);
+
+ read_lock(&tbl->lock); /* avoid resizes */
+ for (chain = 0; chain <= nht->hash_mask; chain++) {
struct neighbour *n;
- for (n = tbl->hash_buckets[chain]; n; n = n->next)
+ for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
+ n != NULL;
+ n = rcu_dereference_bh(n->next))
cb(n, cookie);
}
- read_unlock_bh(&tbl->lock);
+ read_unlock(&tbl->lock);
+ rcu_read_unlock_bh();
}
EXPORT_SYMBOL(neigh_for_each);
@@ -2167,18 +2282,25 @@ void __neigh_for_each_release(struct neigh_table *tbl,
int (*cb)(struct neighbour *))
{
int chain;
+ struct neigh_hash_table *nht;
- for (chain = 0; chain <= tbl->hash_mask; chain++) {
- struct neighbour *n, **np;
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+ for (chain = 0; chain <= nht->hash_mask; chain++) {
+ struct neighbour *n;
+ struct neighbour __rcu **np;
- np = &tbl->hash_buckets[chain];
- while ((n = *np) != NULL) {
+ np = &nht->hash_buckets[chain];
+ while ((n = rcu_dereference_protected(*np,
+ lockdep_is_held(&tbl->lock))) != NULL) {
int release;
write_lock(&n->lock);
release = cb(n);
if (release) {
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
n->dead = 1;
} else
np = &n->next;
@@ -2196,13 +2318,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct neigh_table *tbl = state->tbl;
+ struct neigh_hash_table *nht = state->nht;
struct neighbour *n = NULL;
int bucket = state->bucket;
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
- for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
- n = tbl->hash_buckets[bucket];
+ for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
+ n = rcu_dereference_bh(nht->hash_buckets[bucket]);
while (n) {
if (!net_eq(dev_net(n->dev), net))
@@ -2219,8 +2341,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
break;
if (n->nud_state & ~NUD_NOARP)
break;
- next:
- n = n->next;
+next:
+ n = rcu_dereference_bh(n->next);
}
if (n)
@@ -2237,14 +2359,14 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
{
struct neigh_seq_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct neigh_table *tbl = state->tbl;
+ struct neigh_hash_table *nht = state->nht;
if (state->neigh_sub_iter) {
void *v = state->neigh_sub_iter(state, n, pos);
if (v)
return n;
}
- n = n->next;
+ n = rcu_dereference_bh(n->next);
while (1) {
while (n) {
@@ -2261,17 +2383,17 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (n->nud_state & ~NUD_NOARP)
break;
- next:
- n = n->next;
+next:
+ n = rcu_dereference_bh(n->next);
}
if (n)
break;
- if (++state->bucket > tbl->hash_mask)
+ if (++state->bucket > nht->hash_mask)
break;
- n = tbl->hash_buckets[state->bucket];
+ n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
}
if (n && pos)
@@ -2369,7 +2491,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
}
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
- __acquires(tbl->lock)
+ __acquires(rcu_bh)
{
struct neigh_seq_state *state = seq->private;
@@ -2377,7 +2499,8 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
state->bucket = 0;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
- read_lock_bh(&tbl->lock);
+ rcu_read_lock_bh();
+ state->nht = rcu_dereference_bh(tbl->nht);
return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
@@ -2411,12 +2534,9 @@ out:
EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
- __releases(tbl->lock)
+ __releases(rcu_bh)
{
- struct neigh_seq_state *state = seq->private;
- struct neigh_table *tbl = state->tbl;
-
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock_bh();
}
EXPORT_SYMBOL(neigh_seq_stop);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index af4dfbadf2a..b143173e3eb 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -515,7 +515,7 @@ static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
return attribute->store(queue, attribute, buf, count);
}
-static struct sysfs_ops rx_queue_sysfs_ops = {
+static const struct sysfs_ops rx_queue_sysfs_ops = {
.show = rx_queue_attr_show,
.store = rx_queue_attr_store,
};
@@ -726,6 +726,7 @@ static struct kobj_type rx_queue_ktype = {
static int rx_queue_add_kobject(struct net_device *net, int index)
{
struct netdev_rx_queue *queue = net->_rx + index;
+ struct netdev_rx_queue *first = queue->first;
struct kobject *kobj = &queue->kobj;
int error = 0;
@@ -738,38 +739,43 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
}
kobject_uevent(kobj, KOBJ_ADD);
+ atomic_inc(&first->count);
return error;
}
-static int rx_queue_register_kobjects(struct net_device *net)
+int
+net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
{
int i;
int error = 0;
- net->queues_kset = kset_create_and_add("queues",
- NULL, &net->dev.kobj);
- if (!net->queues_kset)
- return -ENOMEM;
- for (i = 0; i < net->num_rx_queues; i++) {
+ for (i = old_num; i < new_num; i++) {
error = rx_queue_add_kobject(net, i);
- if (error)
+ if (error) {
+ new_num = old_num;
break;
+ }
}
- if (error)
- while (--i >= 0)
- kobject_put(&net->_rx[i].kobj);
+ while (--i >= new_num)
+ kobject_put(&net->_rx[i].kobj);
return error;
}
-static void rx_queue_remove_kobjects(struct net_device *net)
+static int rx_queue_register_kobjects(struct net_device *net)
{
- int i;
+ net->queues_kset = kset_create_and_add("queues",
+ NULL, &net->dev.kobj);
+ if (!net->queues_kset)
+ return -ENOMEM;
+ return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
+}
- for (i = 0; i < net->num_rx_queues; i++)
- kobject_put(&net->_rx[i].kobj);
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+ net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
kset_unregister(net->queues_kset);
}
#endif /* CONFIG_RPS */
@@ -789,12 +795,13 @@ static const void *net_netlink_ns(struct sock *sk)
return sock_net(sk);
}
-static struct kobj_ns_type_operations net_ns_type_operations = {
+struct kobj_ns_type_operations net_ns_type_operations = {
.type = KOBJ_NS_TYPE_NET,
.current_ns = net_current_ns,
.netlink_ns = net_netlink_ns,
.initial_ns = net_initial_ns,
};
+EXPORT_SYMBOL_GPL(net_ns_type_operations);
static void net_kobj_ns_exit(struct net *net)
{
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 805555e8b18..778e1571548 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,4 +4,8 @@
int netdev_kobject_init(void);
int netdev_register_kobject(struct net_device *);
void netdev_unregister_kobject(struct net_device *);
+#ifdef CONFIG_RPS
+int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
+#endif
+
#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 537e01afd81..4e98ffac3af 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -288,11 +288,11 @@ static int netpoll_owner_active(struct net_device *dev)
return 0;
}
-void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
+ struct net_device *dev)
{
int status = NETDEV_TX_BUSY;
unsigned long tries;
- struct net_device *dev = np->dev;
const struct net_device_ops *ops = dev->netdev_ops;
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo = np->dev->npinfo;
@@ -346,7 +346,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
schedule_delayed_work(&npinfo->tx_work,0);
}
}
-EXPORT_SYMBOL(netpoll_send_skb);
+EXPORT_SYMBOL(netpoll_send_skb_on_dev);
void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 10a1ea72010..2c0df0f95b3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -729,16 +729,14 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen,
*num = 0;
for (; i < maxlen; i++) {
+ int value;
char c;
*num <<= 4;
if (get_user(c, &user_buffer[i]))
return -EFAULT;
- if ((c >= '0') && (c <= '9'))
- *num |= c - '0';
- else if ((c >= 'a') && (c <= 'f'))
- *num |= c - 'a' + 10;
- else if ((c >= 'A') && (c <= 'F'))
- *num |= c - 'A' + 10;
+ value = hex_to_bin(c);
+ if (value >= 0)
+ *num |= value;
else
break;
}
@@ -3907,8 +3905,6 @@ static void __exit pg_cleanup(void)
{
struct pktgen_thread *t;
struct list_head *q, *n;
- wait_queue_head_t queue;
- init_waitqueue_head(&queue);
/* Stop all interfaces & threads */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f78d821bd93..8121268ddbd 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -299,14 +299,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
unregister_netdevice_many(&list_kill);
}
-void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
-{
- rtnl_lock();
- __rtnl_kill_links(net, ops);
- rtnl_unlock();
-}
-EXPORT_SYMBOL_GPL(rtnl_kill_links);
-
/**
* __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
* @ops: struct rtnl_link_ops * to unregister
@@ -612,36 +604,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
{
- struct rtnl_link_stats64 a;
-
- a.rx_packets = b->rx_packets;
- a.tx_packets = b->tx_packets;
- a.rx_bytes = b->rx_bytes;
- a.tx_bytes = b->tx_bytes;
- a.rx_errors = b->rx_errors;
- a.tx_errors = b->tx_errors;
- a.rx_dropped = b->rx_dropped;
- a.tx_dropped = b->tx_dropped;
-
- a.multicast = b->multicast;
- a.collisions = b->collisions;
-
- a.rx_length_errors = b->rx_length_errors;
- a.rx_over_errors = b->rx_over_errors;
- a.rx_crc_errors = b->rx_crc_errors;
- a.rx_frame_errors = b->rx_frame_errors;
- a.rx_fifo_errors = b->rx_fifo_errors;
- a.rx_missed_errors = b->rx_missed_errors;
-
- a.tx_aborted_errors = b->tx_aborted_errors;
- a.tx_carrier_errors = b->tx_carrier_errors;
- a.tx_fifo_errors = b->tx_fifo_errors;
- a.tx_heartbeat_errors = b->tx_heartbeat_errors;
- a.tx_window_errors = b->tx_window_errors;
-
- a.rx_compressed = b->rx_compressed;
- a.tx_compressed = b->tx_compressed;
- memcpy(v, &a, sizeof(a));
+ memcpy(v, b, sizeof(*b));
}
/* All VF info */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56ba3c4e476..104f8444754 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -202,8 +202,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
skb->data = data;
skb_reset_tail_pointer(skb);
skb->end = skb->tail + size;
- kmemcheck_annotate_bitfield(skb, flags1);
- kmemcheck_annotate_bitfield(skb, flags2);
#ifdef NET_SKBUFF_DATA_USES_OFFSET
skb->mac_header = ~0U;
#endif
@@ -249,10 +247,9 @@ EXPORT_SYMBOL(__alloc_skb);
struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
unsigned int length, gfp_t gfp_mask)
{
- int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
struct sk_buff *skb;
- skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
+ skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
if (likely(skb)) {
skb_reserve(skb, NET_SKB_PAD);
skb->dev = dev;
@@ -261,16 +258,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
}
EXPORT_SYMBOL(__netdev_alloc_skb);
-struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
-{
- int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
- struct page *page;
-
- page = alloc_pages_node(node, gfp_mask, 0);
- return page;
-}
-EXPORT_SYMBOL(__netdev_alloc_page);
-
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
int size)
{
@@ -340,7 +327,7 @@ static void skb_release_data(struct sk_buff *skb)
put_page(skb_shinfo(skb)->frags[i].page);
}
- if (skb_has_frags(skb))
+ if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
kfree(skb->head);
@@ -686,16 +673,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
- int headerlen = skb->data - skb->head;
- /*
- * Allocate the copy buffer
- */
- struct sk_buff *n;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- n = alloc_skb(skb->end + skb->data_len, gfp_mask);
-#else
- n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
-#endif
+ int headerlen = skb_headroom(skb);
+ unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
+ struct sk_buff *n = alloc_skb(size, gfp_mask);
+
if (!n)
return NULL;
@@ -727,20 +708,14 @@ EXPORT_SYMBOL(skb_copy);
struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
{
- /*
- * Allocate the copy buffer
- */
- struct sk_buff *n;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- n = alloc_skb(skb->end, gfp_mask);
-#else
- n = alloc_skb(skb->end - skb->head, gfp_mask);
-#endif
+ unsigned int size = skb_end_pointer(skb) - skb->head;
+ struct sk_buff *n = alloc_skb(size, gfp_mask);
+
if (!n)
goto out;
/* Set the data pointer */
- skb_reserve(n, skb->data - skb->head);
+ skb_reserve(n, skb_headroom(skb));
/* Set the tail pointer and length */
skb_put(n, skb_headlen(skb));
/* Copy the bytes */
@@ -760,7 +735,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
skb_shinfo(n)->nr_frags = i;
}
- if (skb_has_frags(skb)) {
+ if (skb_has_frag_list(skb)) {
skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
skb_clone_fraglist(n);
}
@@ -792,12 +767,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
{
int i;
u8 *data;
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- int size = nhead + skb->end + ntail;
-#else
- int size = nhead + (skb->end - skb->head) + ntail;
-#endif
+ int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
long off;
+ bool fastpath;
BUG_ON(nhead < 0);
@@ -811,23 +783,36 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
goto nodata;
/* Copy only real data... and, alas, header. This should be
- * optimized for the cases when header is void. */
-#ifdef NET_SKBUFF_DATA_USES_OFFSET
- memcpy(data + nhead, skb->head, skb->tail);
-#else
- memcpy(data + nhead, skb->head, skb->tail - skb->head);
-#endif
- memcpy(data + size, skb_end_pointer(skb),
+ * optimized for the cases when header is void.
+ */
+ memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
+
+ memcpy((struct skb_shared_info *)(data + size),
+ skb_shinfo(skb),
offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- get_page(skb_shinfo(skb)->frags[i].page);
+ /* Check if we can avoid taking references on fragments if we own
+ * the last reference on skb->head. (see skb_release_data())
+ */
+ if (!skb->cloned)
+ fastpath = true;
+ else {
+ int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
- if (skb_has_frags(skb))
- skb_clone_fraglist(skb);
+ fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
+ }
- skb_release_data(skb);
+ if (fastpath) {
+ kfree(skb->head);
+ } else {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ get_page(skb_shinfo(skb)->frags[i].page);
+ if (skb_has_frag_list(skb))
+ skb_clone_fraglist(skb);
+
+ skb_release_data(skb);
+ }
off = (data + nhead) - skb->head;
skb->head = data;
@@ -1100,7 +1085,7 @@ drop_pages:
for (; i < nfrags; i++)
put_page(skb_shinfo(skb)->frags[i].page);
- if (skb_has_frags(skb))
+ if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
goto done;
}
@@ -1195,7 +1180,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Optimization: no fragments, no reasons to preestimate
* size of pulled pages. Superb.
*/
- if (!skb_has_frags(skb))
+ if (!skb_has_frag_list(skb))
goto pull_pages;
/* Estimate size of pulled pages. */
@@ -2324,7 +2309,7 @@ next_skb:
st->frag_data = NULL;
}
- if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) {
+ if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
st->frag_idx = 0;
goto next_skb;
@@ -2894,7 +2879,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
return -ENOMEM;
/* Easy case. Most of packets will go this way. */
- if (!skb_has_frags(skb)) {
+ if (!skb_has_frag_list(skb)) {
/* A little of trouble, not enough of space for trailer.
* This should not happen, when stack is tuned to generate
* good frames. OK, on miss we reallocate and reserve even more
@@ -2929,7 +2914,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
if (skb1->next == NULL && tailbits) {
if (skb_shinfo(skb1)->nr_frags ||
- skb_has_frags(skb1) ||
+ skb_has_frag_list(skb1) ||
skb_tailroom(skb1) < tailbits)
ntail = tailbits + 128;
}
@@ -2938,7 +2923,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
skb_cloned(skb1) ||
ntail ||
skb_shinfo(skb1)->nr_frags ||
- skb_has_frags(skb1)) {
+ skb_has_frag_list(skb1)) {
struct sk_buff *skb2;
/* Fuck, we are miserable poor guys... */
@@ -3021,7 +3006,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
} else {
/*
* no hardware time stamps available,
- * so keep the skb_shared_tx and only
+ * so keep the shared tx_flags and only
* store software time stamp
*/
skb->tstamp = ktime_get_real();
diff --git a/net/core/sock.c b/net/core/sock.c
index 7d99e13148e..11db43632df 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1560,6 +1560,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
EXPORT_SYMBOL(sock_alloc_send_skb);
static void __lock_sock(struct sock *sk)
+ __releases(&sk->sk_lock.slock)
+ __acquires(&sk->sk_lock.slock)
{
DEFINE_WAIT(wait);
@@ -1576,6 +1578,8 @@ static void __lock_sock(struct sock *sk)
}
static void __release_sock(struct sock *sk)
+ __releases(&sk->sk_lock.slock)
+ __acquires(&sk->sk_lock.slock)
{
struct sk_buff *skb = sk->sk_backlog.head;
diff --git a/net/core/utils.c b/net/core/utils.c
index f4185447053..5fea0ab2190 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -75,7 +75,7 @@ __be32 in_aton(const char *str)
str++;
}
}
- return(htonl(l));
+ return htonl(l);
}
EXPORT_SYMBOL(in_aton);
@@ -92,18 +92,19 @@ EXPORT_SYMBOL(in_aton);
static inline int xdigit2bin(char c, int delim)
{
+ int val;
+
if (c == delim || c == '\0')
return IN6PTON_DELIM;
if (c == ':')
return IN6PTON_COLON_MASK;
if (c == '.')
return IN6PTON_DOT;
- if (c >= '0' && c <= '9')
- return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0'));
- if (c >= 'a' && c <= 'f')
- return (IN6PTON_XDIGIT | (c - 'a' + 10));
- if (c >= 'A' && c <= 'F')
- return (IN6PTON_XDIGIT | (c - 'A' + 10));
+
+ val = hex_to_bin(c);
+ if (val >= 0)
+ return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0);
+
if (delim == -1)
return IN6PTON_DELIM;
return IN6PTON_UNKNOWN;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 6df6f8ac963..117fb093dca 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -62,22 +62,18 @@ struct ccid_operations {
void (*ccid_hc_tx_exit)(struct sock *sk);
void (*ccid_hc_rx_packet_recv)(struct sock *sk,
struct sk_buff *skb);
- int (*ccid_hc_rx_parse_options)(struct sock *sk,
- unsigned char option,
- unsigned char len, u16 idx,
- unsigned char* value);
+ int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt,
+ u8 opt, u8 *val, u8 len);
int (*ccid_hc_rx_insert_options)(struct sock *sk,
struct sk_buff *skb);
void (*ccid_hc_tx_packet_recv)(struct sock *sk,
struct sk_buff *skb);
- int (*ccid_hc_tx_parse_options)(struct sock *sk,
- unsigned char option,
- unsigned char len, u16 idx,
- unsigned char* value);
+ int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt,
+ u8 opt, u8 *val, u8 len);
int (*ccid_hc_tx_send_packet)(struct sock *sk,
struct sk_buff *skb);
void (*ccid_hc_tx_packet_sent)(struct sock *sk,
- int more, unsigned int len);
+ unsigned int len);
void (*ccid_hc_rx_get_info)(struct sock *sk,
struct tcp_info *info);
void (*ccid_hc_tx_get_info)(struct sock *sk,
@@ -148,10 +144,10 @@ static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
- int more, unsigned int len)
+ unsigned int len)
{
if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
- ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len);
+ ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len);
}
static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
@@ -168,27 +164,31 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
}
+/**
+ * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver
+ * @pkt: type of packet that @opt appears on (RFC 4340, 5.1)
+ * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3)
+ * @val: value of @opt
+ * @len: length of @val in bytes
+ */
static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
- unsigned char option,
- unsigned char len, u16 idx,
- unsigned char* value)
+ u8 pkt, u8 opt, u8 *val, u8 len)
{
- int rc = 0;
- if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL)
- rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx,
- value);
- return rc;
+ if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
+ return 0;
+ return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
}
+/**
+ * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender
+ * Arguments are analogous to ccid_hc_tx_parse_options()
+ */
static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
- unsigned char option,
- unsigned char len, u16 idx,
- unsigned char* value)
+ u8 pkt, u8 opt, u8 *val, u8 len)
{
- int rc = 0;
- if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL)
- rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value);
- return rc;
+ if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
+ return 0;
+ return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
}
static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 8408398cd44..0581143cb80 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG
If in doubt, say N.
-config IP_DCCP_CCID3_RTO
- int "Use higher bound for nofeedback timer"
- default 100
- depends on IP_DCCP_CCID3 && EXPERIMENTAL
- ---help---
- Use higher lower bound for nofeedback timer expiration.
-
- The TFRC nofeedback timer normally expires after the maximum of 4
- RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
- with a small RTT this can mean a high processing load and reduced
- performance, since then the nofeedback timer is triggered very
- frequently.
-
- This option enables to set a higher lower bound for the nofeedback
- value. Values in units of milliseconds can be set here.
-
- A value of 0 disables this feature by enforcing the value specified
- in RFC 3448. The following values have been suggested as bounds for
- experimental use:
- * 16-20ms to match the typical multimedia inter-frame interval
- * 100ms as a reasonable compromise [default]
- * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
-
- The default of 100ms is a compromise between a large value for
- efficient DCCP implementations, and a small value to avoid disrupting
- the network in times of congestion.
-
- The purpose of the nofeedback timer is to slow DCCP down when there
- is serious network congestion: experimenting with larger values should
- therefore not be performed on WANs.
-
config IP_DCCP_TFRC_LIB
def_bool y if IP_DCCP_CCID3
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9b3ae9922be..d850e291f87 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,59 +25,14 @@
*/
#include <linux/slab.h>
#include "../feat.h"
-#include "../ccid.h"
-#include "../dccp.h"
#include "ccid2.h"
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
static int ccid2_debug;
#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
-
-static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc)
-{
- int len = 0;
- int pipe = 0;
- struct ccid2_seq *seqp = hc->tx_seqh;
-
- /* there is data in the chain */
- if (seqp != hc->tx_seqt) {
- seqp = seqp->ccid2s_prev;
- len++;
- if (!seqp->ccid2s_acked)
- pipe++;
-
- while (seqp != hc->tx_seqt) {
- struct ccid2_seq *prev = seqp->ccid2s_prev;
-
- len++;
- if (!prev->ccid2s_acked)
- pipe++;
-
- /* packets are sent sequentially */
- BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
- prev->ccid2s_seq ) >= 0);
- BUG_ON(time_before(seqp->ccid2s_sent,
- prev->ccid2s_sent));
-
- seqp = prev;
- }
- }
-
- BUG_ON(pipe != hc->tx_pipe);
- ccid2_pr_debug("len of chain=%d\n", len);
-
- do {
- seqp = seqp->ccid2s_prev;
- len++;
- } while (seqp != hc->tx_seqh);
-
- ccid2_pr_debug("total len=%d\n", len);
- BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN);
-}
#else
#define ccid2_pr_debug(format, a...)
-#define ccid2_hc_tx_check_sanity(hc)
#endif
static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
@@ -156,19 +111,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
dp->dccps_l_ack_ratio = val;
}
-static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
-{
- ccid2_pr_debug("change SRTT to %ld\n", val);
- hc->tx_srtt = val;
-}
-
-static void ccid2_start_rto_timer(struct sock *sk);
-
static void ccid2_hc_tx_rto_expire(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- long s;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
@@ -178,23 +124,19 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
ccid2_pr_debug("RTO_EXPIRE\n");
- ccid2_hc_tx_check_sanity(hc);
-
/* back-off timer */
hc->tx_rto <<= 1;
+ if (hc->tx_rto > DCCP_RTO_MAX)
+ hc->tx_rto = DCCP_RTO_MAX;
- s = hc->tx_rto / HZ;
- if (s > 60)
- hc->tx_rto = 60 * HZ;
-
- ccid2_start_rto_timer(sk);
+ sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
/* adjust pipe, cwnd etc */
hc->tx_ssthresh = hc->tx_cwnd / 2;
if (hc->tx_ssthresh < 2)
hc->tx_ssthresh = 2;
- hc->tx_cwnd = 1;
- hc->tx_pipe = 0;
+ hc->tx_cwnd = 1;
+ hc->tx_pipe = 0;
/* clear state about stuff we sent */
hc->tx_seqt = hc->tx_seqh;
@@ -204,23 +146,12 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
hc->tx_rpseq = 0;
hc->tx_rpdupack = -1;
ccid2_change_l_ack_ratio(sk, 1);
- ccid2_hc_tx_check_sanity(hc);
out:
bh_unlock_sock(sk);
sock_put(sk);
}
-static void ccid2_start_rto_timer(struct sock *sk)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
- ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto);
-
- BUG_ON(timer_pending(&hc->tx_rtotimer));
- sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-}
-
-static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
+static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
@@ -230,7 +161,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
hc->tx_seqh->ccid2s_acked = 0;
- hc->tx_seqh->ccid2s_sent = jiffies;
+ hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
next = hc->tx_seqh->ccid2s_next;
/* check if we need to alloc more space */
@@ -296,23 +227,20 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
}
#endif
- /* setup RTO timer */
- if (!timer_pending(&hc->tx_rtotimer))
- ccid2_start_rto_timer(sk);
+ sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
do {
struct ccid2_seq *seqp = hc->tx_seqt;
while (seqp != hc->tx_seqh) {
- ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
+ ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
(unsigned long long)seqp->ccid2s_seq,
seqp->ccid2s_acked, seqp->ccid2s_sent);
seqp = seqp->ccid2s_next;
}
} while (0);
ccid2_pr_debug("=========\n");
- ccid2_hc_tx_check_sanity(hc);
#endif
}
@@ -378,17 +306,87 @@ out_invalid_option:
return -1;
}
-static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
+/**
+ * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
+ * This code is almost identical with TCP's tcp_rtt_estimator(), since
+ * - it has a higher sampling frequency (recommended by RFC 1323),
+ * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
+ * - it is simple (cf. more complex proposals such as Eifel timer or research
+ * which suggests that the gain should be set according to window size),
+ * - in tests it was found to work well with CCID2 [gerrit].
+ */
+static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
{
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ long m = mrtt ? : 1;
- sk_stop_timer(sk, &hc->tx_rtotimer);
- ccid2_pr_debug("deleted RTO timer\n");
+ if (hc->tx_srtt == 0) {
+ /* First measurement m */
+ hc->tx_srtt = m << 3;
+ hc->tx_mdev = m << 1;
+
+ hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
+ hc->tx_rttvar = hc->tx_mdev_max;
+
+ hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
+ } else {
+ /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
+ m -= (hc->tx_srtt >> 3);
+ hc->tx_srtt += m;
+
+ /* Similarly, update scaled mdev with regard to |m| */
+ if (m < 0) {
+ m = -m;
+ m -= (hc->tx_mdev >> 2);
+ /*
+ * This neutralises RTO increase when RTT < SRTT - mdev
+ * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
+ * in Linux TCP", USENIX 2002, pp. 49-62).
+ */
+ if (m > 0)
+ m >>= 3;
+ } else {
+ m -= (hc->tx_mdev >> 2);
+ }
+ hc->tx_mdev += m;
+
+ if (hc->tx_mdev > hc->tx_mdev_max) {
+ hc->tx_mdev_max = hc->tx_mdev;
+ if (hc->tx_mdev_max > hc->tx_rttvar)
+ hc->tx_rttvar = hc->tx_mdev_max;
+ }
+
+ /*
+ * Decay RTTVAR at most once per flight, exploiting that
+ * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
+ * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
+ * GAR is a useful bound for FlightSize = pipe.
+ * AWL is probably too low here, as it over-estimates pipe.
+ */
+ if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
+ if (hc->tx_mdev_max < hc->tx_rttvar)
+ hc->tx_rttvar -= (hc->tx_rttvar -
+ hc->tx_mdev_max) >> 2;
+ hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
+ hc->tx_mdev_max = tcp_rto_min(sk);
+ }
+ }
+
+ /*
+ * Set RTO from SRTT and RTTVAR
+ * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
+ * This agrees with RFC 4341, 5:
+ * "Because DCCP does not retransmit data, DCCP does not require
+ * TCP's recommended minimum timeout of one second".
+ */
+ hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
+
+ if (hc->tx_rto > DCCP_RTO_MAX)
+ hc->tx_rto = DCCP_RTO_MAX;
}
-static inline void ccid2_new_ack(struct sock *sk,
- struct ccid2_seq *seqp,
- unsigned int *maxincr)
+static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
+ unsigned int *maxincr)
{
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
@@ -402,93 +400,27 @@ static inline void ccid2_new_ack(struct sock *sk,
hc->tx_cwnd += 1;
hc->tx_packets_acked = 0;
}
-
- /* update RTO */
- if (hc->tx_srtt == -1 ||
- time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) {
- unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
- int s;
-
- /* first measurement */
- if (hc->tx_srtt == -1) {
- ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
- r, jiffies,
- (unsigned long long)seqp->ccid2s_seq);
- ccid2_change_srtt(hc, r);
- hc->tx_rttvar = r >> 1;
- } else {
- /* RTTVAR */
- long tmp = hc->tx_srtt - r;
- long srtt;
-
- if (tmp < 0)
- tmp *= -1;
-
- tmp >>= 2;
- hc->tx_rttvar *= 3;
- hc->tx_rttvar >>= 2;
- hc->tx_rttvar += tmp;
-
- /* SRTT */
- srtt = hc->tx_srtt;
- srtt *= 7;
- srtt >>= 3;
- tmp = r >> 3;
- srtt += tmp;
- ccid2_change_srtt(hc, srtt);
- }
- s = hc->tx_rttvar << 2;
- /* clock granularity is 1 when based on jiffies */
- if (!s)
- s = 1;
- hc->tx_rto = hc->tx_srtt + s;
-
- /* must be at least a second */
- s = hc->tx_rto / HZ;
- /* DCCP doesn't require this [but I like it cuz my code sux] */
-#if 1
- if (s < 1)
- hc->tx_rto = HZ;
-#endif
- /* max 60 seconds */
- if (s > 60)
- hc->tx_rto = HZ * 60;
-
- hc->tx_lastrtt = jiffies;
-
- ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
- hc->tx_srtt, hc->tx_rttvar,
- hc->tx_rto, HZ, r);
- }
-
- /* we got a new ack, so re-start RTO timer */
- ccid2_hc_tx_kill_rto_timer(sk);
- ccid2_start_rto_timer(sk);
-}
-
-static void ccid2_hc_tx_dec_pipe(struct sock *sk)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
- if (hc->tx_pipe == 0)
- DCCP_BUG("pipe == 0");
- else
- hc->tx_pipe--;
-
- if (hc->tx_pipe == 0)
- ccid2_hc_tx_kill_rto_timer(sk);
+ /*
+ * FIXME: RTT is sampled several times per acknowledgment (for each
+ * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
+ * This causes the RTT to be over-estimated, since the older entries
+ * in the Ack Vector have earlier sending times.
+ * The cleanest solution is to not use the ccid2s_sent field at all
+ * and instead use DCCP timestamps: requires changes in other places.
+ */
+ ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
}
static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
{
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) {
+ if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
return;
}
- hc->tx_last_cong = jiffies;
+ hc->tx_last_cong = ccid2_time_stamp;
hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -510,7 +442,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
int done = 0;
unsigned int maxincr = 0;
- ccid2_hc_tx_check_sanity(hc);
/* check reverse path congestion */
seqno = DCCP_SKB_CB(skb)->dccpd_seq;
@@ -620,7 +551,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
seqp->ccid2s_acked = 1;
ccid2_pr_debug("Got ack for %llu\n",
(unsigned long long)seqp->ccid2s_seq);
- ccid2_hc_tx_dec_pipe(sk);
+ hc->tx_pipe--;
}
if (seqp == hc->tx_seqt) {
done = 1;
@@ -677,7 +608,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* one ack vector.
*/
ccid2_congestion_event(sk, seqp);
- ccid2_hc_tx_dec_pipe(sk);
+ hc->tx_pipe--;
}
if (seqp == hc->tx_seqt)
break;
@@ -695,7 +626,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
hc->tx_seqt = hc->tx_seqt->ccid2s_next;
}
- ccid2_hc_tx_check_sanity(hc);
+ /* restart RTO timer if not all outstanding data has been acked */
+ if (hc->tx_pipe == 0)
+ sk_stop_timer(sk, &hc->tx_rtotimer);
+ else
+ sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
}
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -707,12 +642,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
hc->tx_ssthresh = ~0U;
- /*
- * RFC 4341, 5: "The cwnd parameter is initialized to at most four
- * packets for new connections, following the rules from [RFC3390]".
- * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
- */
- hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
+ /* Use larger initial windows (RFC 4341, section 5). */
+ hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
/* Make sure that Ack Ratio is enabled and within bounds. */
max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -723,15 +654,11 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
if (ccid2_hc_tx_alloc_seq(hc))
return -ENOMEM;
- hc->tx_rto = 3 * HZ;
- ccid2_change_srtt(hc, -1);
- hc->tx_rttvar = -1;
+ hc->tx_rto = DCCP_TIMEOUT_INIT;
hc->tx_rpdupack = -1;
- hc->tx_last_cong = jiffies;
+ hc->tx_last_cong = ccid2_time_stamp;
setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
(unsigned long)sk);
-
- ccid2_hc_tx_check_sanity(hc);
return 0;
}
@@ -740,7 +667,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
int i;
- ccid2_hc_tx_kill_rto_timer(sk);
+ sk_stop_timer(sk, &hc->tx_rtotimer);
for (i = 0; i < hc->tx_seqbufc; i++)
kfree(hc->tx_seqbuf[i]);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 1ec6a30103b..9731c2dc148 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -18,18 +18,23 @@
#ifndef _DCCP_CCID2_H_
#define _DCCP_CCID2_H_
-#include <linux/dccp.h>
#include <linux/timer.h>
#include <linux/types.h>
#include "../ccid.h"
+#include "../dccp.h"
+
+/*
+ * CCID-2 timestamping faces the same issues as TCP timestamping.
+ * Hence we reuse/share as much of the code as possible.
+ */
+#define ccid2_time_stamp tcp_time_stamp
+
/* NUMDUPACK parameter from RFC 4341, p. 6 */
#define NUMDUPACK 3
-struct sock;
-
struct ccid2_seq {
u64 ccid2s_seq;
- unsigned long ccid2s_sent;
+ u32 ccid2s_sent;
int ccid2s_acked;
struct ccid2_seq *ccid2s_prev;
struct ccid2_seq *ccid2s_next;
@@ -42,7 +47,12 @@ struct ccid2_seq {
* struct ccid2_hc_tx_sock - CCID2 TX half connection
* @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
* @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
- * @tx_lastrtt: time RTT was last measured
+ * @tx_srtt: smoothed RTT estimate, scaled by 2^3
+ * @tx_mdev: smoothed RTT variation, scaled by 2^2
+ * @tx_mdev_max: maximum of @mdev during one flight
+ * @tx_rttvar: moving average/maximum of @mdev_max
+ * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
+ * @tx_rtt_seq: to decay RTTVAR at most once per flight
* @tx_rpseq: last consecutive seqno
* @tx_rpdupack: dupacks since rpseq
*/
@@ -55,14 +65,19 @@ struct ccid2_hc_tx_sock {
int tx_seqbufc;
struct ccid2_seq *tx_seqh;
struct ccid2_seq *tx_seqt;
- long tx_rto;
- long tx_srtt;
- long tx_rttvar;
- unsigned long tx_lastrtt;
+
+ /* RTT measurement: variables/principles are the same as in TCP */
+ u32 tx_srtt,
+ tx_mdev,
+ tx_mdev_max,
+ tx_rttvar,
+ tx_rto;
+ u64 tx_rtt_seq:48;
struct timer_list tx_rtotimer;
+
u64 tx_rpseq;
int tx_rpdupack;
- unsigned long tx_last_cong;
+ u32 tx_last_cong;
u64 tx_high_ack;
};
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 95f75298649..3060a60ed5a 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -54,7 +54,6 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
[TFRC_SSTATE_NO_SENT] = "NO_SENT",
[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
[TFRC_SSTATE_FBACK] = "FBACK",
- [TFRC_SSTATE_TERM] = "TERM",
};
return ccid3_state_names[state];
@@ -91,19 +90,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
return scaled_div(w_init << 6, hc->tx_rtt);
}
-/*
- * Recalculate t_ipi and delta (should be called whenever X changes)
+/**
+ * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst
+ * This respects the granularity of X_inst (64 * bytes/second).
*/
static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc)
{
- /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x);
- /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
- hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN);
-
- ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi,
- hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6));
+ ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi,
+ hc->tx_s, (unsigned)(hc->tx_x >> 6));
}
static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
@@ -211,16 +207,19 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
ccid3_tx_state_name(hc->tx_state));
+ /* Ignore and do not restart after leaving the established state */
+ if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
+ goto out;
+
+ /* Reset feedback state to "no feedback received" */
if (hc->tx_state == TFRC_SSTATE_FBACK)
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
- else if (hc->tx_state != TFRC_SSTATE_NO_FBACK)
- goto out;
/*
* Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
+ * RTO is 0 if and only if no feedback has been received yet.
*/
- if (hc->tx_t_rto == 0 || /* no feedback received yet */
- hc->tx_p == 0) {
+ if (hc->tx_t_rto == 0 || hc->tx_p == 0) {
/* halve send rate directly */
hc->tx_x = max(hc->tx_x / 2,
@@ -256,7 +255,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
* Set new timeout for the nofeedback timer.
* See comments in packet_recv() regarding the value of t_RTO.
*/
- if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */
+ if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */
t_nfb = TFRC_INITIAL_TIMEOUT;
else
t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi);
@@ -290,8 +289,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
if (unlikely(skb->len == 0))
return -EBADMSG;
- switch (hc->tx_state) {
- case TFRC_SSTATE_NO_SENT:
+ if (hc->tx_state == TFRC_SSTATE_NO_SENT) {
sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies +
usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
hc->tx_last_win_count = 0;
@@ -326,27 +324,22 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
ccid3_update_send_interval(hc);
ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
- break;
- case TFRC_SSTATE_NO_FBACK:
- case TFRC_SSTATE_FBACK:
+
+ } else {
delay = ktime_us_delta(hc->tx_t_nom, now);
ccid3_pr_debug("delay=%ld\n", (long)delay);
/*
- * Scheduling of packet transmissions [RFC 3448, 4.6]
+ * Scheduling of packet transmissions (RFC 5348, 8.3)
*
* if (t_now > t_nom - delta)
* // send the packet now
* else
* // send the packet in (t_nom - t_now) milliseconds.
*/
- if (delay - (s64)hc->tx_delta >= 1000)
- return (u32)delay / 1000L;
+ if (delay >= TFRC_T_DELTA)
+ return (u32)delay / USEC_PER_MSEC;
ccid3_hc_tx_update_win_count(hc, now);
- break;
- case TFRC_SSTATE_TERM:
- DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
- return -EINVAL;
}
/* prepare to send now (add options etc.) */
@@ -358,8 +351,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
- unsigned int len)
+static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
{
struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
@@ -372,48 +364,34 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- struct ccid3_options_received *opt_recv;
+ struct tfrc_tx_hist_entry *acked;
ktime_t now;
unsigned long t_nfb;
- u32 pinv, r_sample;
+ u32 r_sample;
/* we are only interested in ACKs */
if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
return;
- /* ... and only in the established state */
- if (hc->tx_state != TFRC_SSTATE_FBACK &&
- hc->tx_state != TFRC_SSTATE_NO_FBACK)
- return;
-
- opt_recv = &hc->tx_options_received;
- now = ktime_get_real();
-
- /* Estimate RTT from history if ACK number is valid */
- r_sample = tfrc_tx_hist_rtt(hc->tx_hist,
- DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
- if (r_sample == 0) {
- DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
- dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
- (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
- return;
- }
-
- /* Update receive rate in units of 64 * bytes/second */
- hc->tx_x_recv = opt_recv->ccid3or_receive_rate;
- hc->tx_x_recv <<= 6;
-
- /* Update loss event rate (which is scaled by 1e6) */
- pinv = opt_recv->ccid3or_loss_event_rate;
- if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
- hc->tx_p = 0;
- else /* can not exceed 100% */
- hc->tx_p = scaled_div(1, pinv);
/*
- * Validate new RTT sample and update moving average
+ * Locate the acknowledged packet in the TX history.
+ *
+ * Returning "entry not found" here can for instance happen when
+ * - the host has not sent out anything (e.g. a passive server),
+ * - the Ack is outdated (packet with higher Ack number was received),
+ * - it is a bogus Ack (for a packet not sent on this connection).
*/
- r_sample = dccp_sample_rtt(sk, r_sample);
+ acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb));
+ if (acked == NULL)
+ return;
+ /* For the sake of RTT sampling, ignore/remove all older entries */
+ tfrc_tx_hist_purge(&acked->next);
+
+ /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */
+ now = ktime_get_real();
+ r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9);
+
/*
* Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
*/
@@ -461,13 +439,12 @@ done_computing_x:
sk->sk_write_space(sk);
/*
- * Update timeout interval for the nofeedback timer.
- * We use a configuration option to increase the lower bound.
- * This can help avoid triggering the nofeedback timer too
- * often ('spinning') on LANs with small RTTs.
+ * Update timeout interval for the nofeedback timer. In order to control
+ * rate halving on networks with very low RTTs (<= 1 ms), use per-route
+ * tunable RTAX_RTO_MIN value as the lower bound.
*/
- hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO *
- (USEC_PER_SEC / 1000)));
+ hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
+ USEC_PER_SEC/HZ * tcp_rto_min(sk));
/*
* Schedule no feedback timer to expire in
* max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -482,66 +459,41 @@ done_computing_x:
jiffies + usecs_to_jiffies(t_nfb));
}
-static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
- unsigned char len, u16 idx,
- unsigned char *value)
+static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
+ u8 option, u8 *optval, u8 optlen)
{
- int rc = 0;
- const struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- struct ccid3_options_received *opt_recv;
__be32 opt_val;
- opt_recv = &hc->tx_options_received;
-
- if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
- opt_recv->ccid3or_seqno = dp->dccps_gsr;
- opt_recv->ccid3or_loss_event_rate = ~0;
- opt_recv->ccid3or_loss_intervals_idx = 0;
- opt_recv->ccid3or_loss_intervals_len = 0;
- opt_recv->ccid3or_receive_rate = 0;
- }
-
switch (option) {
+ case TFRC_OPT_RECEIVE_RATE:
case TFRC_OPT_LOSS_EVENT_RATE:
- if (unlikely(len != 4)) {
- DCCP_WARN("%s(%p), invalid len %d "
- "for TFRC_OPT_LOSS_EVENT_RATE\n",
- dccp_role(sk), sk, len);
- rc = -EINVAL;
- } else {
- opt_val = get_unaligned((__be32 *)value);
- opt_recv->ccid3or_loss_event_rate = ntohl(opt_val);
- ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
- dccp_role(sk), sk,
- opt_recv->ccid3or_loss_event_rate);
+ /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */
+ if (packet_type == DCCP_PKT_DATA)
+ break;
+ if (unlikely(optlen != 4)) {
+ DCCP_WARN("%s(%p), invalid len %d for %u\n",
+ dccp_role(sk), sk, optlen, option);
+ return -EINVAL;
}
- break;
- case TFRC_OPT_LOSS_INTERVALS:
- opt_recv->ccid3or_loss_intervals_idx = idx;
- opt_recv->ccid3or_loss_intervals_len = len;
- ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n",
- dccp_role(sk), sk,
- opt_recv->ccid3or_loss_intervals_idx,
- opt_recv->ccid3or_loss_intervals_len);
- break;
- case TFRC_OPT_RECEIVE_RATE:
- if (unlikely(len != 4)) {
- DCCP_WARN("%s(%p), invalid len %d "
- "for TFRC_OPT_RECEIVE_RATE\n",
- dccp_role(sk), sk, len);
- rc = -EINVAL;
- } else {
- opt_val = get_unaligned((__be32 *)value);
- opt_recv->ccid3or_receive_rate = ntohl(opt_val);
+ opt_val = ntohl(get_unaligned((__be32 *)optval));
+
+ if (option == TFRC_OPT_RECEIVE_RATE) {
+ /* Receive Rate is kept in units of 64 bytes/second */
+ hc->tx_x_recv = opt_val;
+ hc->tx_x_recv <<= 6;
+
ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
- dccp_role(sk), sk,
- opt_recv->ccid3or_receive_rate);
+ dccp_role(sk), sk, opt_val);
+ } else {
+ /* Update the fixpoint Loss Event Rate fraction */
+ hc->tx_p = tfrc_invert_loss_event_rate(opt_val);
+
+ ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
+ dccp_role(sk), sk, opt_val);
}
- break;
}
-
- return rc;
+ return 0;
}
static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -559,42 +511,36 @@ static void ccid3_hc_tx_exit(struct sock *sk)
{
struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
sk_stop_timer(sk, &hc->tx_no_feedback_timer);
-
tfrc_tx_hist_purge(&hc->tx_hist);
}
static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
{
- struct ccid3_hc_tx_sock *hc;
-
- /* Listen socks doesn't have a private CCID block */
- if (sk->sk_state == DCCP_LISTEN)
- return;
-
- hc = ccid3_hc_tx_sk(sk);
- info->tcpi_rto = hc->tx_t_rto;
- info->tcpi_rtt = hc->tx_rtt;
+ info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto;
+ info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt;
}
static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
u32 __user *optval, int __user *optlen)
{
- const struct ccid3_hc_tx_sock *hc;
+ const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
+ struct tfrc_tx_info tfrc;
const void *val;
- /* Listen socks doesn't have a private CCID block */
- if (sk->sk_state == DCCP_LISTEN)
- return -EINVAL;
-
- hc = ccid3_hc_tx_sk(sk);
switch (optname) {
case DCCP_SOCKOPT_CCID_TX_INFO:
- if (len < sizeof(hc->tx_tfrc))
+ if (len < sizeof(tfrc))
return -EINVAL;
- len = sizeof(hc->tx_tfrc);
- val = &hc->tx_tfrc;
+ tfrc.tfrctx_x = hc->tx_x;
+ tfrc.tfrctx_x_recv = hc->tx_x_recv;
+ tfrc.tfrctx_x_calc = hc->tx_x_calc;
+ tfrc.tfrctx_rtt = hc->tx_rtt;
+ tfrc.tfrctx_p = hc->tx_p;
+ tfrc.tfrctx_rto = hc->tx_t_rto;
+ tfrc.tfrctx_ipi = hc->tx_t_ipi;
+ len = sizeof(tfrc);
+ val = &tfrc;
break;
default:
return -ENOPROTOOPT;
@@ -624,7 +570,6 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
static const char *const ccid3_rx_state_names[] = {
[TFRC_RSTATE_NO_DATA] = "NO_DATA",
[TFRC_RSTATE_DATA] = "DATA",
- [TFRC_RSTATE_TERM] = "TERM",
};
return ccid3_rx_state_names[state];
@@ -650,14 +595,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
{
struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
- ktime_t now;
+ ktime_t now = ktime_get_real();
s64 delta = 0;
- if (unlikely(hc->rx_state == TFRC_RSTATE_TERM))
- return;
-
- now = ktime_get_real();
-
switch (fbtype) {
case CCID3_FBACK_INITIAL:
hc->rx_x_recv = 0;
@@ -701,14 +641,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
{
- const struct ccid3_hc_rx_sock *hc;
+ const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
__be32 x_recv, pinv;
if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
return 0;
- hc = ccid3_hc_rx_sk(sk);
-
if (dccp_packet_without_ack(skb))
return 0;
@@ -749,10 +687,11 @@ static u32 ccid3_first_li(struct sock *sk)
x_recv = scaled_div32(hc->rx_bytes_recv, delta);
if (x_recv == 0) { /* would also trigger divide-by-zero */
DCCP_WARN("X_recv==0\n");
- if ((x_recv = hc->rx_x_recv) == 0) {
+ if (hc->rx_x_recv == 0) {
DCCP_BUG("stored value of X_recv is zero");
return ~0U;
}
+ x_recv = hc->rx_x_recv;
}
fval = scaled_div(hc->rx_s, hc->rx_rtt);
@@ -862,46 +801,31 @@ static void ccid3_hc_rx_exit(struct sock *sk)
{
struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
- ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
-
tfrc_rx_hist_purge(&hc->rx_hist);
tfrc_lh_cleanup(&hc->rx_li_hist);
}
static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
{
- const struct ccid3_hc_rx_sock *hc;
-
- /* Listen socks doesn't have a private CCID block */
- if (sk->sk_state == DCCP_LISTEN)
- return;
-
- hc = ccid3_hc_rx_sk(sk);
- info->tcpi_ca_state = hc->rx_state;
+ info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state;
info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
- info->tcpi_rcv_rtt = hc->rx_rtt;
+ info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt;
}
static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
u32 __user *optval, int __user *optlen)
{
- const struct ccid3_hc_rx_sock *hc;
+ const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
struct tfrc_rx_info rx_info;
const void *val;
- /* Listen socks doesn't have a private CCID block */
- if (sk->sk_state == DCCP_LISTEN)
- return -EINVAL;
-
- hc = ccid3_hc_rx_sk(sk);
switch (optname) {
case DCCP_SOCKOPT_CCID_RX_INFO:
if (len < sizeof(rx_info))
return -EINVAL;
rx_info.tfrcrx_x_recv = hc->rx_x_recv;
rx_info.tfrcrx_rtt = hc->rx_rtt;
- rx_info.tfrcrx_p = hc->rx_pinv == 0 ? ~0U :
- scaled_div(1, hc->rx_pinv);
+ rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv);
len = sizeof(rx_info);
val = &rx_info;
break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 03263577665..1a9933c2967 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,35 +42,36 @@
#include "lib/tfrc.h"
#include "../ccid.h"
-/* Two seconds as per RFC 3448 4.2 */
+/* Two seconds as per RFC 5348, 4.2 */
#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
-/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
-#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
-
/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
#define TFRC_T_MBI 64
+/*
+ * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are
+ * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
+ * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
+ * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
+ * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
+ */
+#if (HZ >= 500)
+# define TFRC_T_DELTA USEC_PER_MSEC
+#else
+# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
+#endif
+
enum ccid3_options {
TFRC_OPT_LOSS_EVENT_RATE = 192,
TFRC_OPT_LOSS_INTERVALS = 193,
TFRC_OPT_RECEIVE_RATE = 194,
};
-struct ccid3_options_received {
- u64 ccid3or_seqno:48,
- ccid3or_loss_intervals_idx:16;
- u16 ccid3or_loss_intervals_len;
- u32 ccid3or_loss_event_rate;
- u32 ccid3or_receive_rate;
-};
-
/* TFRC sender states */
enum ccid3_hc_tx_states {
TFRC_SSTATE_NO_SENT = 1,
TFRC_SSTATE_NO_FBACK,
TFRC_SSTATE_FBACK,
- TFRC_SSTATE_TERM,
};
/**
@@ -90,19 +91,16 @@ enum ccid3_hc_tx_states {
* @tx_no_feedback_timer: Handle to no feedback timer
* @tx_t_ld: Time last doubled during slow start
* @tx_t_nom: Nominal send time of next packet
- * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs
* @tx_hist: Packet history
- * @tx_options_received: Parsed set of retrieved options
*/
struct ccid3_hc_tx_sock {
- struct tfrc_tx_info tx_tfrc;
-#define tx_x tx_tfrc.tfrctx_x
-#define tx_x_recv tx_tfrc.tfrctx_x_recv
-#define tx_x_calc tx_tfrc.tfrctx_x_calc
-#define tx_rtt tx_tfrc.tfrctx_rtt
-#define tx_p tx_tfrc.tfrctx_p
-#define tx_t_rto tx_tfrc.tfrctx_rto
-#define tx_t_ipi tx_tfrc.tfrctx_ipi
+ u64 tx_x;
+ u64 tx_x_recv;
+ u32 tx_x_calc;
+ u32 tx_rtt;
+ u32 tx_p;
+ u32 tx_t_rto;
+ u32 tx_t_ipi;
u16 tx_s;
enum ccid3_hc_tx_states tx_state:8;
u8 tx_last_win_count;
@@ -110,9 +108,7 @@ struct ccid3_hc_tx_sock {
struct timer_list tx_no_feedback_timer;
ktime_t tx_t_ld;
ktime_t tx_t_nom;
- u32 tx_delta;
struct tfrc_tx_hist_entry *tx_hist;
- struct ccid3_options_received tx_options_received;
};
static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
@@ -126,21 +122,16 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
enum ccid3_hc_rx_states {
TFRC_RSTATE_NO_DATA = 1,
TFRC_RSTATE_DATA,
- TFRC_RSTATE_TERM = 127,
};
/**
* struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
- * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3)
- * @rx_rtt: Receiver estimate of rtt (non-standard)
- * @rx_p: Current loss event rate (RFC 3448 5.4)
* @rx_last_counter: Tracks window counter (RFC 4342, 8.1)
* @rx_state: Receiver state, one of %ccid3_hc_rx_states
* @rx_bytes_recv: Total sum of DCCP payload bytes
* @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3)
* @rx_rtt: Receiver estimate of RTT
* @rx_tstamp_last_feedback: Time at which last feedback was sent
- * @rx_tstamp_last_ack: Time at which last feedback was sent
* @rx_hist: Packet history (loss detection + RTT sampling)
* @rx_li_hist: Loss Interval database
* @rx_s: Received packet size in bytes
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 8fc3cbf7907..497723c4d4b 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
cur->li_length = len;
tfrc_lh_calc_i_mean(lh);
- return (lh->i_mean < old_i_mean);
+ return lh->i_mean < old_i_mean;
}
/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 3a4f414e94a..de8fe294bf0 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -38,18 +38,6 @@
#include "packet_history.h"
#include "../../dccp.h"
-/**
- * tfrc_tx_hist_entry - Simple singly-linked TX history list
- * @next: next oldest entry (LIFO order)
- * @seqno: sequence number of this entry
- * @stamp: send time of packet with sequence number @seqno
- */
-struct tfrc_tx_hist_entry {
- struct tfrc_tx_hist_entry *next;
- u64 seqno;
- ktime_t stamp;
-};
-
/*
* Transmitter History Routines
*/
@@ -71,15 +59,6 @@ void tfrc_tx_packet_history_exit(void)
}
}
-static struct tfrc_tx_hist_entry *
- tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
-{
- while (head != NULL && head->seqno != seqno)
- head = head->next;
-
- return head;
-}
-
int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
{
struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -107,24 +86,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
*headp = NULL;
}
-u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
- const ktime_t now)
-{
- u32 rtt = 0;
- struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
-
- if (packet != NULL) {
- rtt = ktime_us_delta(now, packet->stamp);
- /*
- * Garbage-collect older (irrelevant) entries:
- */
- tfrc_tx_hist_purge(&packet->next);
- }
-
- return rtt;
-}
-
-
/*
* Receiver History Routines
*/
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 7df6c529999..7ee4a9d9d33 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,12 +40,28 @@
#include <linux/slab.h>
#include "tfrc.h"
-struct tfrc_tx_hist_entry;
+/**
+ * tfrc_tx_hist_entry - Simple singly-linked TX history list
+ * @next: next oldest entry (LIFO order)
+ * @seqno: sequence number of this entry
+ * @stamp: send time of packet with sequence number @seqno
+ */
+struct tfrc_tx_hist_entry {
+ struct tfrc_tx_hist_entry *next;
+ u64 seqno;
+ ktime_t stamp;
+};
+
+static inline struct tfrc_tx_hist_entry *
+ tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
+{
+ while (head != NULL && head->seqno != seqno)
+ head = head->next;
+ return head;
+}
extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
-extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
- const u64 seqno, const ktime_t now);
/* Subtraction a-b modulo-16, respects circular wrap-around */
#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 01bb48e96c2..f8ee3f54977 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -57,6 +57,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
+extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
extern int tfrc_tx_packet_history_init(void);
extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 22ca1cf0eb5..a052a4377e2 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -687,3 +687,17 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
index = tfrc_binsearch(fvalue, 0);
return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
}
+
+/**
+ * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
+ * When @loss_event_rate is large, there is a chance that p is truncated to 0.
+ * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
+ */
+u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
+{
+ if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
+ return 0;
+ if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
+ return 1000000;
+ return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
+}
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3ccef1b70fe..3eb264b6082 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -153,18 +153,27 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
}
/**
- * dccp_loss_free - Evaluates condition for data loss from RFC 4340, 7.7.1
- * @s1: start sequence number
- * @s2: end sequence number
+ * dccp_loss_count - Approximate the number of lost data packets in a burst loss
+ * @s1: last known sequence number before the loss ('hole')
+ * @s2: first sequence number seen after the 'hole'
* @ndp: NDP count on packet with sequence number @s2
- * Returns true if the sequence range s1...s2 has no data loss.
*/
-static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp)
+static inline u64 dccp_loss_count(const u64 s1, const u64 s2, const u64 ndp)
{
s64 delta = dccp_delta_seqno(s1, s2);
WARN_ON(delta < 0);
- return (u64)delta <= ndp + 1;
+ delta -= ndp + 1;
+
+ return delta > 0 ? delta : 0;
+}
+
+/**
+ * dccp_loss_free - Evaluate condition for data loss from RFC 4340, 7.7.1
+ */
+static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp)
+{
+ return dccp_loss_count(s1, s2, ndp) == 0;
}
enum {
@@ -246,7 +255,6 @@ static inline void dccp_clear_xmit_timers(struct sock *sk)
extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
extern const char *dccp_packet_name(const int type);
-extern const char *dccp_state_name(const int state);
extern void dccp_set_state(struct sock *sk, const int state);
extern void dccp_done(struct sock *sk);
@@ -415,6 +423,23 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq)
dp->dccps_gsr = seq;
/* Sequence validity window depends on remote Sequence Window (7.5.1) */
dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
+ /*
+ * Adjust SWL so that it is not below ISR. In contrast to RFC 4340,
+ * 7.5.1 we perform this check beyond the initial handshake: W/W' are
+ * always > 32, so for the first W/W' packets in the lifetime of a
+ * connection we always have to adjust SWL.
+ * A second reason why we are doing this is that the window depends on
+ * the feature-remote value of Sequence Window: nothing stops the peer
+ * from updating this value while we are busy adjusting SWL for the
+ * first W packets (we would have to count from scratch again then).
+ * Therefore it is safer to always make sure that the Sequence Window
+ * is not artificially extended by a peer who grows SWL downwards by
+ * continually updating the feature-remote Sequence-Window.
+ * If sequence numbers wrap it is bad luck. But that will take a while
+ * (48 bit), and this measure prevents Sequence-number attacks.
+ */
+ if (before48(dp->dccps_swl, dp->dccps_isr))
+ dp->dccps_swl = dp->dccps_isr;
dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
}
@@ -425,14 +450,16 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
dp->dccps_gss = seq;
/* Ack validity window depends on local Sequence Window value (7.5.1) */
dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
+ /* Adjust AWL so that it is not below ISS - see comment above for SWL */
+ if (before48(dp->dccps_awl, dp->dccps_iss))
+ dp->dccps_awl = dp->dccps_iss;
dp->dccps_awh = dp->dccps_gss;
}
static inline int dccp_ack_pending(const struct sock *sk)
{
const struct dccp_sock *dp = dccp_sk(sk);
- return dp->dccps_timestamp_echo != 0 ||
- (dp->dccps_hc_rx_ackvec != NULL &&
+ return (dp->dccps_hc_rx_ackvec != NULL &&
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
inet_csk_ack_scheduled(sk);
}
@@ -449,7 +476,6 @@ extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
extern u32 dccp_timestamp(void);
extern void dccp_timestamping_init(void);
-extern int dccp_insert_option_timestamp(struct sk_buff *skb);
extern int dccp_insert_option(struct sk_buff *skb, unsigned char option,
const void *value, unsigned char len);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index df7dd26cf07..568def95272 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -730,16 +730,6 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
0, list, len);
}
-/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */
-int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val)
-{
- /* any changes must be registered before establishing the connection */
- if (sk->sk_state != DCCP_CLOSED)
- return -EISCONN;
- if (dccp_feat_type(feat) != FEAT_NN)
- return -EINVAL;
- return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val);
-}
/*
* Tracking features whose value depend on the choice of CCID
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index f96721619de..e56a4e5e634 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -111,7 +111,6 @@ extern int dccp_feat_init(struct sock *sk);
extern void dccp_feat_initialise_sysctls(void);
extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
u8 const *list, u8 len);
-extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 10c957a88f4..265985370fa 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -259,7 +259,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
sysctl_dccp_sync_ratelimit)))
return 0;
- DCCP_WARN("DCCP: Step 6 failed for %s packet, "
+ DCCP_WARN("Step 6 failed for %s packet, "
"(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
"(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
"sending SYNC...\n", dccp_packet_name(dh->dccph_type),
@@ -441,20 +441,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
- dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
- dccp_update_gsr(sk, dp->dccps_isr);
/*
- * SWL and AWL are initially adjusted so that they are not less than
- * the initial Sequence Numbers received and sent, respectively:
- * SWL := max(GSR + 1 - floor(W/4), ISR),
- * AWL := max(GSS - W' + 1, ISS).
- * These adjustments MUST be applied only at the beginning of the
- * connection.
- *
- * AWL was adjusted in dccp_v4_connect -acme
+ * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect
+ * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH
+ * is done as part of activating the feature values below, since
+ * these settings depend on the local/remote Sequence Window
+ * features, which were undefined or not confirmed until now.
*/
- dccp_set_seqno(&dp->dccps_swl,
- max48(dp->dccps_swl, dp->dccps_isr));
+ dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d4a166f0f39..3f69ea11482 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk = dccp_create_openreq_child(sk, req, skb);
if (newsk == NULL)
- goto exit;
+ goto exit_nonewsk;
sk_setup_caps(newsk, dst);
@@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
dccp_sync_mss(newsk, dst_mtu(dst));
+ if (__inet_inherit_port(sk, newsk) < 0) {
+ sock_put(newsk);
+ goto exit;
+ }
__inet_hash_nolisten(newsk, NULL);
- __inet_inherit_port(sk, newsk);
return newsk;
exit_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+exit_nonewsk:
+ dst_release(dst);
exit:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
- dst_release(dst);
return NULL;
}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e3f32575df..dca711df9b6 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
newsk = dccp_create_openreq_child(sk, req, skb);
if (newsk == NULL)
- goto out;
+ goto out_nonewsk;
/*
* No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
+ if (__inet_inherit_port(sk, newsk) < 0) {
+ sock_put(newsk);
+ goto out;
+ }
__inet6_hash(newsk, NULL);
- __inet_inherit_port(sk, newsk);
return newsk;
out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+out_nonewsk:
+ dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
if (opt != NULL && opt != np->opt)
sock_kfree_s(sk, opt, opt->tot_len);
- dst_release(dst);
return NULL;
}
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 128b089d3ae..d7041a0963a 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -121,30 +121,18 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
*
* Choose S.ISS (initial seqno) or set from Init Cookies
* Initialize S.GAR := S.ISS
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
- */
- newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
- dccp_update_gss(newsk, dreq->dreq_iss);
-
- newdp->dccps_isr = dreq->dreq_isr;
- dccp_update_gsr(newsk, dreq->dreq_isr);
-
- /*
- * SWL and AWL are initially adjusted so that they are not less than
- * the initial Sequence Numbers received and sent, respectively:
- * SWL := max(GSR + 1 - floor(W/4), ISR),
- * AWL := max(GSS - W' + 1, ISS).
- * These adjustments MUST be applied only at the beginning of the
- * connection.
+ * Set S.ISR, S.GSR from packet (or Init Cookies)
+ *
+ * Setting AWL/AWH and SWL/SWH happens as part of the feature
+ * activation below, as these windows all depend on the local
+ * and remote Sequence Window feature values (7.5.2).
*/
- dccp_set_seqno(&newdp->dccps_swl,
- max48(newdp->dccps_swl, newdp->dccps_isr));
- dccp_set_seqno(&newdp->dccps_awl,
- max48(newdp->dccps_awl, newdp->dccps_iss));
+ newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss;
+ newdp->dccps_gar = newdp->dccps_iss;
+ newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr;
/*
- * Activate features after initialising the sequence numbers,
- * since CCID initialisation may depend on GSS, ISR, ISS etc.
+ * Activate features: initialise CCIDs, sequence windows etc.
*/
if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
/* It is still raw copy of parent, so invalidate
diff --git a/net/dccp/options.c b/net/dccp/options.c
index bfda087bd90..cd306181300 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -96,18 +96,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
}
/*
- * CCID-Specific Options (from RFC 4340, sec. 10.3):
- *
- * Option numbers 128 through 191 are for options sent from the
- * HC-Sender to the HC-Receiver; option numbers 192 through 255
- * are for options sent from the HC-Receiver to the HC-Sender.
- *
* CCID-specific options are ignored during connection setup, as
* negotiation may still be in progress (see RFC 4340, 10.3).
* The same applies to Ack Vectors, as these depend on the CCID.
- *
*/
- if (dreq != NULL && (opt >= 128 ||
+ if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
goto ignore_option;
@@ -170,6 +163,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
dccp_role(sk), ntohl(opt_val),
(unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
+ /* schedule an Ack in case this sender is quiescent */
+ inet_csk_schedule_ack(sk);
break;
case DCCPO_TIMESTAMP_ECHO:
if (len != 4 && len != 6 && len != 8)
@@ -226,23 +221,15 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
dccp_role(sk), elapsed_time);
break;
- case 128 ... 191: {
- const u16 idx = value - options;
-
+ case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
- opt, len, idx,
- value) != 0)
+ pkt_type, opt, value, len))
goto out_invalid_option;
- }
break;
- case 192 ... 255: {
- const u16 idx = value - options;
-
+ case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
- opt, len, idx,
- value) != 0)
+ pkt_type, opt, value, len))
goto out_invalid_option;
- }
break;
default:
DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
@@ -384,7 +371,7 @@ int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
-int dccp_insert_option_timestamp(struct sk_buff *skb)
+static int dccp_insert_option_timestamp(struct sk_buff *skb)
{
__be32 now = htonl(dccp_timestamp());
/* yes this will overflow but that is the point as we want a
@@ -393,8 +380,6 @@ int dccp_insert_option_timestamp(struct sk_buff *skb)
return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now));
}
-EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
-
static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
struct dccp_request_sock *dreq,
struct sk_buff *skb)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index aadbdb58758..a988fe9ffcb 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -304,7 +304,7 @@ void dccp_write_xmit(struct sock *sk, int block)
dcb->dccpd_type = DCCP_PKT_DATA;
err = dccp_transmit_skb(sk, skb);
- ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+ ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
if (err)
DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
err);
@@ -474,8 +474,9 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
/*
* Do all connect socket setups that can be done AF independent.
*/
-static inline void dccp_connect_init(struct sock *sk)
+int dccp_connect(struct sock *sk)
{
+ struct sk_buff *skb;
struct dccp_sock *dp = dccp_sk(sk);
struct dst_entry *dst = __sk_dst_get(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -485,22 +486,12 @@ static inline void dccp_connect_init(struct sock *sk)
dccp_sync_mss(sk, dst_mtu(dst));
- /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
- dp->dccps_gar = dp->dccps_iss;
-
- icsk->icsk_retransmits = 0;
-}
-
-int dccp_connect(struct sock *sk)
-{
- struct sk_buff *skb;
- struct inet_connection_sock *icsk = inet_csk(sk);
-
/* do not connect if feature negotiation setup fails */
if (dccp_feat_finalise_settings(dccp_sk(sk)))
return -EPROTO;
- dccp_connect_init(sk);
+ /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
+ dp->dccps_gar = dp->dccps_iss;
skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
if (unlikely(skb == NULL))
@@ -516,6 +507,7 @@ int dccp_connect(struct sock *sk)
DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
/* Timer for repeating the REQUEST until an answer. */
+ icsk->icsk_retransmits = 0;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
icsk->icsk_rto, DCCP_RTO_MAX);
return 0;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 096250d1323..7e5fc04eb6d 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -50,6 +50,30 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo);
/* the maximum queue length for tx in packets. 0 is no limit */
int sysctl_dccp_tx_qlen __read_mostly = 5;
+#ifdef CONFIG_IP_DCCP_DEBUG
+static const char *dccp_state_name(const int state)
+{
+ static const char *const dccp_state_names[] = {
+ [DCCP_OPEN] = "OPEN",
+ [DCCP_REQUESTING] = "REQUESTING",
+ [DCCP_PARTOPEN] = "PARTOPEN",
+ [DCCP_LISTEN] = "LISTEN",
+ [DCCP_RESPOND] = "RESPOND",
+ [DCCP_CLOSING] = "CLOSING",
+ [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
+ [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
+ [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
+ [DCCP_TIME_WAIT] = "TIME_WAIT",
+ [DCCP_CLOSED] = "CLOSED",
+ };
+
+ if (state >= DCCP_MAX_STATES)
+ return "INVALID STATE!";
+ else
+ return dccp_state_names[state];
+}
+#endif
+
void dccp_set_state(struct sock *sk, const int state)
{
const int oldstate = sk->sk_state;
@@ -146,30 +170,6 @@ const char *dccp_packet_name(const int type)
EXPORT_SYMBOL_GPL(dccp_packet_name);
-const char *dccp_state_name(const int state)
-{
- static const char *const dccp_state_names[] = {
- [DCCP_OPEN] = "OPEN",
- [DCCP_REQUESTING] = "REQUESTING",
- [DCCP_PARTOPEN] = "PARTOPEN",
- [DCCP_LISTEN] = "LISTEN",
- [DCCP_RESPOND] = "RESPOND",
- [DCCP_CLOSING] = "CLOSING",
- [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
- [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
- [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
- [DCCP_TIME_WAIT] = "TIME_WAIT",
- [DCCP_CLOSED] = "CLOSED",
- };
-
- if (state >= DCCP_MAX_STATES)
- return "INVALID STATE!";
- else
- return dccp_state_names[state];
-}
-
-EXPORT_SYMBOL_GPL(dccp_state_name);
-
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -944,7 +944,7 @@ void dccp_close(struct sock *sk, long timeout)
if (data_was_unread) {
/* Unread data was tossed, send an appropriate Reset Code */
- DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
+ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
dccp_set_state(sk, DCCP_CLOSED);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 0363bb95cc7..a085dbcf5c7 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -48,7 +48,6 @@
#include <net/dn_neigh.h>
#include <net/dn_route.h>
-static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev);
static int dn_neigh_construct(struct neighbour *);
static void dn_long_error_report(struct neighbour *, struct sk_buff *);
static void dn_short_error_report(struct neighbour *, struct sk_buff *);
@@ -93,6 +92,13 @@ static const struct neigh_ops dn_phase3_ops = {
.queue_xmit = dev_queue_xmit
};
+static u32 dn_neigh_hash(const void *pkey,
+ const struct net_device *dev,
+ __u32 hash_rnd)
+{
+ return jhash_2words(*(__u16 *)pkey, 0, hash_rnd);
+}
+
struct neigh_table dn_neigh_table = {
.family = PF_DECnet,
.entry_size = sizeof(struct dn_neigh),
@@ -122,11 +128,6 @@ struct neigh_table dn_neigh_table = {
.gc_thresh3 = 1024,
};
-static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev)
-{
- return jhash_2words(*(__u16 *)pkey, 0, dn_neigh_table.hash_rnd);
-}
-
static int dn_neigh_construct(struct neighbour *neigh)
{
struct net_device *dev = neigh->dev;
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index baeb1eaf011..2ef115277be 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -693,22 +693,22 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
aux = scp->accessdata.acc_userl;
*skb_put(skb, 1) = aux;
if (aux > 0)
- memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
+ memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
aux = scp->accessdata.acc_passl;
*skb_put(skb, 1) = aux;
if (aux > 0)
- memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
+ memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
aux = scp->accessdata.acc_accl;
*skb_put(skb, 1) = aux;
if (aux > 0)
- memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
+ memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
*skb_put(skb, 1) = aux;
if (aux > 0)
- memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux);
+ memcpy(skb_put(skb, aux), scp->conndata_out.opt_data, aux);
scp->persist = dn_nsp_persist(sk);
scp->persist_fxn = dn_nsp_retrans_conninit;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 6585ea6d118..df0f3e54ff8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -132,7 +132,6 @@ static struct dst_ops dn_dst_ops = {
.negative_advice = dn_dst_negative_advice,
.link_failure = dn_dst_link_failure,
.update_pmtu = dn_dst_update_pmtu,
- .entries = ATOMIC_INIT(0),
};
static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
@@ -1758,6 +1757,7 @@ void __init dn_route_init(void)
dn_dst_ops.kmem_cachep =
kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ dst_entries_init(&dn_dst_ops);
setup_timer(&dn_route_timer, dn_dst_check_expire, 0);
dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
add_timer(&dn_route_timer);
@@ -1816,5 +1816,6 @@ void __exit dn_route_cleanup(void)
dn_run_flush(0);
proc_net_remove(&init_net, "decnet_cache");
+ dst_entries_destroy(&dn_dst_ops);
}
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index dc54bd0d083..f8c1ae4b41f 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -392,7 +392,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
dev_queue_xmit(skb);
dev_put(dev);
mutex_unlock(&econet_mutex);
- return(len);
+ return len;
out_free:
kfree_skb(skb);
@@ -637,7 +637,7 @@ static int econet_create(struct net *net, struct socket *sock, int protocol,
eo->num = protocol;
econet_insert_socket(&econet_sklist, sk);
- return(0);
+ return 0;
out:
return err;
}
@@ -1009,7 +1009,6 @@ static int __init aun_udp_initialise(void)
struct sockaddr_in sin;
skb_queue_head_init(&aun_queue);
- spin_lock_init(&aun_queue_lock);
setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
ab_cleanup_timer.expires = jiffies + (HZ*2);
add_timer(&ab_cleanup_timer);
@@ -1167,7 +1166,6 @@ static int __init econet_proto_init(void)
goto out;
sock_register(&econet_family_ops);
#ifdef CONFIG_ECONET_AUNUDP
- spin_lock_init(&aun_queue_lock);
aun_udp_initialise();
#endif
#ifdef CONFIG_ECONET_NATIVE
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 215c83986a9..f00ef2f1d81 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -367,7 +367,7 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
EXPORT_SYMBOL(alloc_etherdev_mq);
static size_t _format_mac_addr(char *buf, int buflen,
- const unsigned char *addr, int len)
+ const unsigned char *addr, int len)
{
int i;
char *cp = buf;
@@ -376,7 +376,7 @@ static size_t _format_mac_addr(char *buf, int buflen,
cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
if (i == len - 1)
break;
- cp += strlcpy(cp, ":", buflen - (cp - buf));
+ cp += scnprintf(cp, buflen - (cp - buf), ":");
}
return cp - buf;
}
@@ -386,7 +386,7 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
size_t l;
l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
- l += strlcpy(buf + l, "\n", PAGE_SIZE - l);
- return ((ssize_t) l);
+ l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
+ return (ssize_t)l;
}
EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7cd7760144f..e848e6c062c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -215,9 +215,15 @@ config NET_IPIP
be inserted in and removed from the running kernel whenever you
want). Most people won't need this and can say N.
+config NET_IPGRE_DEMUX
+ tristate "IP: GRE demultiplexer"
+ help
+ This is helper module to demultiplex GRE packets on GRE version field criteria.
+ Required by ip_gre and pptp modules.
+
config NET_IPGRE
tristate "IP: GRE tunnels over IP"
- depends on IPV6 || IPV6=n
+ depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 80ff87ce43a..4978d22f9a7 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
obj-$(CONFIG_NET_IPIP) += ipip.o
+obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_AH) += ah4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6a1100c25a9..f581f77d109 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -227,18 +227,16 @@ EXPORT_SYMBOL(inet_ehash_secret);
/*
* inet_ehash_secret must be set exactly once
- * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
*/
void build_ehash_secret(void)
{
u32 rnd;
+
do {
get_random_bytes(&rnd, sizeof(rnd));
} while (rnd == 0);
- spin_lock_bh(&inetsw_lock);
- if (!inet_ehash_secret)
- inet_ehash_secret = rnd;
- spin_unlock_bh(&inetsw_lock);
+
+ cmpxchg(&inet_ehash_secret, 0, rnd);
}
EXPORT_SYMBOL(build_ehash_secret);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96c1955b3e2..d8e540c5b07 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -55,7 +55,7 @@
* Stuart Cheshire : Metricom and grat arp fixes
* *** FOR 2.1 clean this up ***
* Lawrence V. Stefani: (08/12/96) Added FDDI support.
- * Alan Cox : Took the AP1000 nasty FDDI hack and
+ * Alan Cox : Took the AP1000 nasty FDDI hack and
* folded into the mainstream FDDI code.
* Ack spit, Linus how did you allow that
* one in...
@@ -120,14 +120,14 @@ EXPORT_SYMBOL(clip_tbl_hook);
#endif
#include <asm/system.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/netfilter_arp.h>
/*
* Interface to generic neighbour cache.
*/
-static u32 arp_hash(const void *pkey, const struct net_device *dev);
+static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 rnd);
static int arp_constructor(struct neighbour *neigh);
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -161,7 +161,7 @@ static const struct neigh_ops arp_direct_ops = {
.queue_xmit = dev_queue_xmit,
};
-const struct neigh_ops arp_broken_ops = {
+static const struct neigh_ops arp_broken_ops = {
.family = AF_INET,
.solicit = arp_solicit,
.error_report = arp_error_report,
@@ -170,35 +170,34 @@ const struct neigh_ops arp_broken_ops = {
.hh_output = dev_queue_xmit,
.queue_xmit = dev_queue_xmit,
};
-EXPORT_SYMBOL(arp_broken_ops);
struct neigh_table arp_tbl = {
- .family = AF_INET,
- .entry_size = sizeof(struct neighbour) + 4,
- .key_len = 4,
- .hash = arp_hash,
- .constructor = arp_constructor,
- .proxy_redo = parp_redo,
- .id = "arp_cache",
- .parms = {
- .tbl = &arp_tbl,
- .base_reachable_time = 30 * HZ,
- .retrans_time = 1 * HZ,
- .gc_staletime = 60 * HZ,
- .reachable_time = 30 * HZ,
- .delay_probe_time = 5 * HZ,
- .queue_len = 3,
- .ucast_probes = 3,
- .mcast_probes = 3,
- .anycast_delay = 1 * HZ,
- .proxy_delay = (8 * HZ) / 10,
- .proxy_qlen = 64,
- .locktime = 1 * HZ,
+ .family = AF_INET,
+ .entry_size = sizeof(struct neighbour) + 4,
+ .key_len = 4,
+ .hash = arp_hash,
+ .constructor = arp_constructor,
+ .proxy_redo = parp_redo,
+ .id = "arp_cache",
+ .parms = {
+ .tbl = &arp_tbl,
+ .base_reachable_time = 30 * HZ,
+ .retrans_time = 1 * HZ,
+ .gc_staletime = 60 * HZ,
+ .reachable_time = 30 * HZ,
+ .delay_probe_time = 5 * HZ,
+ .queue_len = 3,
+ .ucast_probes = 3,
+ .mcast_probes = 3,
+ .anycast_delay = 1 * HZ,
+ .proxy_delay = (8 * HZ) / 10,
+ .proxy_qlen = 64,
+ .locktime = 1 * HZ,
},
- .gc_interval = 30 * HZ,
- .gc_thresh1 = 128,
- .gc_thresh2 = 512,
- .gc_thresh3 = 1024,
+ .gc_interval = 30 * HZ,
+ .gc_thresh1 = 128,
+ .gc_thresh2 = 512,
+ .gc_thresh3 = 1024,
};
EXPORT_SYMBOL(arp_tbl);
@@ -226,14 +225,16 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
}
-static u32 arp_hash(const void *pkey, const struct net_device *dev)
+static u32 arp_hash(const void *pkey,
+ const struct net_device *dev,
+ __u32 hash_rnd)
{
- return jhash_2words(*(u32 *)pkey, dev->ifindex, arp_tbl.hash_rnd);
+ return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd);
}
static int arp_constructor(struct neighbour *neigh)
{
- __be32 addr = *(__be32*)neigh->primary_key;
+ __be32 addr = *(__be32 *)neigh->primary_key;
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;
@@ -296,16 +297,19 @@ static int arp_constructor(struct neighbour *neigh)
neigh->ops = &arp_broken_ops;
neigh->output = neigh->ops->output;
return 0;
+#else
+ break;
#endif
- ;}
+ }
#endif
if (neigh->type == RTN_MULTICAST) {
neigh->nud_state = NUD_NOARP;
arp_mc_map(addr, neigh->ha, dev, 1);
- } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+ } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
- } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {
+ } else if (neigh->type == RTN_BROADCAST ||
+ (dev->flags & IFF_POINTOPOINT)) {
neigh->nud_state = NUD_NOARP;
memcpy(neigh->ha, dev->broadcast, dev->addr_len);
}
@@ -315,7 +319,7 @@ static int arp_constructor(struct neighbour *neigh)
else
neigh->ops = &arp_generic_ops;
- if (neigh->nud_state&NUD_VALID)
+ if (neigh->nud_state & NUD_VALID)
neigh->output = neigh->ops->connected_output;
else
neigh->output = neigh->ops->output;
@@ -334,7 +338,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
__be32 saddr = 0;
u8 *dst_ha = NULL;
struct net_device *dev = neigh->dev;
- __be32 target = *(__be32*)neigh->primary_key;
+ __be32 target = *(__be32 *)neigh->primary_key;
int probes = atomic_read(&neigh->probes);
struct in_device *in_dev;
@@ -347,7 +351,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
default:
case 0: /* By default announce any local IP */
- if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL)
+ if (skb && inet_addr_type(dev_net(dev),
+ ip_hdr(skb)->saddr) == RTN_LOCAL)
saddr = ip_hdr(skb)->saddr;
break;
case 1: /* Restrict announcements of saddr in same subnet */
@@ -369,16 +374,21 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
if (!saddr)
saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
- if ((probes -= neigh->parms->ucast_probes) < 0) {
- if (!(neigh->nud_state&NUD_VALID))
- printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
+ probes -= neigh->parms->ucast_probes;
+ if (probes < 0) {
+ if (!(neigh->nud_state & NUD_VALID))
+ printk(KERN_DEBUG
+ "trying to ucast probe in NUD_INVALID\n");
dst_ha = neigh->ha;
read_lock_bh(&neigh->lock);
- } else if ((probes -= neigh->parms->app_probes) < 0) {
+ } else {
+ probes -= neigh->parms->app_probes;
+ if (probes < 0) {
#ifdef CONFIG_ARPD
- neigh_app_ns(neigh);
+ neigh_app_ns(neigh);
#endif
- return;
+ return;
+ }
}
arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
@@ -451,7 +461,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
* is allowed to use this function, it is scheduled to be removed. --ANK
*/
-static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev)
+static int arp_set_predefined(int addr_hint, unsigned char *haddr,
+ __be32 paddr, struct net_device *dev)
{
switch (addr_hint) {
case RTN_LOCAL:
@@ -483,17 +494,16 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
paddr = skb_rtable(skb)->rt_gateway;
- if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev))
+ if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
+ paddr, dev))
return 0;
n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
if (n) {
n->used = jiffies;
- if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) {
- read_lock_bh(&n->lock);
- memcpy(haddr, n->ha, dev->addr_len);
- read_unlock_bh(&n->lock);
+ if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
+ neigh_ha_snapshot(haddr, n, dev);
neigh_release(n);
return 0;
}
@@ -515,13 +525,14 @@ int arp_bind_neighbour(struct dst_entry *dst)
return -EINVAL;
if (n == NULL) {
__be32 nexthop = ((struct rtable *)dst)->rt_gateway;
- if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT))
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
nexthop = 0;
n = __neigh_lookup_errno(
#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
- dev->type == ARPHRD_ATM ? clip_tbl_hook :
+ dev->type == ARPHRD_ATM ?
+ clip_tbl_hook :
#endif
- &arp_tbl, &nexthop, dev);
+ &arp_tbl, &nexthop, dev);
if (IS_ERR(n))
return PTR_ERR(n);
dst->neighbour = n;
@@ -543,8 +554,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
if (!IN_DEV_PROXY_ARP(in_dev))
return 0;
-
- if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0)
+ imi = IN_DEV_MEDIUM_ID(in_dev);
+ if (imi == 0)
return 1;
if (imi == -1)
return 0;
@@ -555,7 +566,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
if (out_dev)
omi = IN_DEV_MEDIUM_ID(out_dev);
- return (omi != imi && omi != -1);
+ return omi != imi && omi != -1;
}
/*
@@ -685,7 +696,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
arp->ar_pln = 4;
arp->ar_op = htons(type);
- arp_ptr=(unsigned char *)(arp+1);
+ arp_ptr = (unsigned char *)(arp + 1);
memcpy(arp_ptr, src_hw, dev->addr_len);
arp_ptr += dev->addr_len;
@@ -735,9 +746,8 @@ void arp_send(int type, int ptype, __be32 dest_ip,
skb = arp_create(type, ptype, dest_ip, dev, src_ip,
dest_hw, src_hw, target_hw);
- if (skb == NULL) {
+ if (skb == NULL)
return;
- }
arp_xmit(skb);
}
@@ -815,7 +825,7 @@ static int arp_process(struct sk_buff *skb)
/*
* Extract fields
*/
- arp_ptr= (unsigned char *)(arp+1);
+ arp_ptr = (unsigned char *)(arp + 1);
sha = arp_ptr;
arp_ptr += dev->addr_len;
memcpy(&sip, arp_ptr, 4);
@@ -869,16 +879,17 @@ static int arp_process(struct sk_buff *skb)
addr_type = rt->rt_type;
if (addr_type == RTN_LOCAL) {
- int dont_send = 0;
+ int dont_send;
- if (!dont_send)
- dont_send |= arp_ignore(in_dev,sip,tip);
+ dont_send = arp_ignore(in_dev, sip, tip);
if (!dont_send && IN_DEV_ARPFILTER(in_dev))
- dont_send |= arp_filter(sip,tip,dev);
+ dont_send |= arp_filter(sip, tip, dev);
if (!dont_send) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n) {
- arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
+ arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
+ dev, tip, sha, dev->dev_addr,
+ sha);
neigh_release(n);
}
}
@@ -887,8 +898,7 @@ static int arp_process(struct sk_buff *skb)
if (addr_type == RTN_UNICAST &&
(arp_fwd_proxy(in_dev, dev, rt) ||
arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
- pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))
- {
+ pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n)
neigh_release(n);
@@ -896,9 +906,12 @@ static int arp_process(struct sk_buff *skb)
if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
skb->pkt_type == PACKET_HOST ||
in_dev->arp_parms->proxy_delay == 0) {
- arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
+ arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
+ dev, tip, sha, dev->dev_addr,
+ sha);
} else {
- pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
+ pneigh_enqueue(&arp_tbl,
+ in_dev->arp_parms, skb);
return 0;
}
goto out;
@@ -939,7 +952,8 @@ static int arp_process(struct sk_buff *skb)
if (arp->ar_op != htons(ARPOP_REPLY) ||
skb->pkt_type != PACKET_HOST)
state = NUD_STALE;
- neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0);
+ neigh_update(n, sha, state,
+ override ? NEIGH_UPDATE_F_OVERRIDE : 0);
neigh_release(n);
}
@@ -975,7 +989,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
arp->ar_pln != 4)
goto freeskb;
- if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (skb == NULL)
goto out_of_mem;
memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
@@ -1019,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
return -EINVAL;
if (!dev && (r->arp_flags & ATF_COM)) {
dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
- r->arp_ha.sa_data);
+ r->arp_ha.sa_data);
if (!dev)
return -ENODEV;
}
@@ -1033,7 +1048,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
}
static int arp_req_set(struct net *net, struct arpreq *r,
- struct net_device * dev)
+ struct net_device *dev)
{
__be32 ip;
struct neighbour *neigh;
@@ -1046,10 +1061,11 @@ static int arp_req_set(struct net *net, struct arpreq *r,
if (r->arp_flags & ATF_PERM)
r->arp_flags |= ATF_COM;
if (dev == NULL) {
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
- .tos = RTO_ONLINK } } };
- struct rtable * rt;
- if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
+ struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
+ .tos = RTO_ONLINK } };
+ struct rtable *rt;
+ err = ip_route_output_key(net, &rt, &fl);
+ if (err != 0)
return err;
dev = rt->dst.dev;
ip_rt_put(rt);
@@ -1083,9 +1099,9 @@ static int arp_req_set(struct net *net, struct arpreq *r,
unsigned state = NUD_STALE;
if (r->arp_flags & ATF_PERM)
state = NUD_PERMANENT;
- err = neigh_update(neigh, (r->arp_flags&ATF_COM) ?
+ err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
r->arp_ha.sa_data : NULL, state,
- NEIGH_UPDATE_F_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE |
NEIGH_UPDATE_F_ADMIN);
neigh_release(neigh);
}
@@ -1094,12 +1110,12 @@ static int arp_req_set(struct net *net, struct arpreq *r,
static unsigned arp_state_to_flags(struct neighbour *neigh)
{
- unsigned flags = 0;
if (neigh->nud_state&NUD_PERMANENT)
- flags = ATF_PERM|ATF_COM;
+ return ATF_PERM | ATF_COM;
else if (neigh->nud_state&NUD_VALID)
- flags = ATF_COM;
- return flags;
+ return ATF_COM;
+ else
+ return 0;
}
/*
@@ -1142,7 +1158,7 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
}
static int arp_req_delete(struct net *net, struct arpreq *r,
- struct net_device * dev)
+ struct net_device *dev)
{
int err;
__be32 ip;
@@ -1153,10 +1169,11 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
if (dev == NULL) {
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
- .tos = RTO_ONLINK } } };
- struct rtable * rt;
- if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
+ struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
+ .tos = RTO_ONLINK } };
+ struct rtable *rt;
+ err = ip_route_output_key(net, &rt, &fl);
+ if (err != 0)
return err;
dev = rt->dst.dev;
ip_rt_put(rt);
@@ -1166,7 +1183,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
err = -ENXIO;
neigh = neigh_lookup(&arp_tbl, &ip, dev);
if (neigh) {
- if (neigh->nud_state&~NUD_NOARP)
+ if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_ADMIN);
@@ -1186,24 +1203,24 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
struct net_device *dev = NULL;
switch (cmd) {
- case SIOCDARP:
- case SIOCSARP:
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- case SIOCGARP:
- err = copy_from_user(&r, arg, sizeof(struct arpreq));
- if (err)
- return -EFAULT;
- break;
- default:
- return -EINVAL;
+ case SIOCDARP:
+ case SIOCSARP:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ case SIOCGARP:
+ err = copy_from_user(&r, arg, sizeof(struct arpreq));
+ if (err)
+ return -EFAULT;
+ break;
+ default:
+ return -EINVAL;
}
if (r.arp_pa.sa_family != AF_INET)
return -EPFNOSUPPORT;
if (!(r.arp_flags & ATF_PUBL) &&
- (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB)))
+ (r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
return -EINVAL;
if (!(r.arp_flags & ATF_NETMASK))
((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
@@ -1211,7 +1228,8 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
rtnl_lock();
if (r.arp_dev[0]) {
err = -ENODEV;
- if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL)
+ dev = __dev_get_by_name(net, r.arp_dev);
+ if (dev == NULL)
goto out;
/* Mmmm... It is wrong... ARPHRD_NETROM==0 */
@@ -1243,7 +1261,8 @@ out:
return err;
}
-static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int arp_netdev_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
{
struct net_device *dev = ptr;
@@ -1311,12 +1330,13 @@ static char *ax2asc2(ax25_address *a, char *buf)
for (n = 0, s = buf; n < 6; n++) {
c = (a->ax25_call[n] >> 1) & 0x7F;
- if (c != ' ') *s++ = c;
+ if (c != ' ')
+ *s++ = c;
}
*s++ = '-';
-
- if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) {
+ n = (a->ax25_call[6] >> 1) & 0x0F;
+ if (n > 9) {
*s++ = '1';
n -= 10;
}
@@ -1325,10 +1345,9 @@ static char *ax2asc2(ax25_address *a, char *buf)
*s++ = '\0';
if (*buf == '\0' || *buf == '-')
- return "*";
+ return "*";
return buf;
-
}
#endif /* CONFIG_AX25 */
@@ -1408,10 +1427,10 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
/* ------------------------------------------------------------------------ */
static const struct seq_operations arp_seq_ops = {
- .start = arp_seq_start,
- .next = neigh_seq_next,
- .stop = neigh_seq_stop,
- .show = arp_seq_show,
+ .start = arp_seq_start,
+ .next = neigh_seq_next,
+ .stop = neigh_seq_stop,
+ .show = arp_seq_show,
};
static int arp_seq_open(struct inode *inode, struct file *file)
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 721a8a37b45..174be6caa5c 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,6 +73,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_id = jiffies;
sk_dst_set(sk, &rt->dst);
- return(0);
+ return 0;
}
EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index da14c49284f..dc94b0316b7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -209,7 +209,7 @@ static void inetdev_destroy(struct in_device *in_dev)
inet_free_ifa(ifa);
}
- dev->ip_ptr = NULL;
+ rcu_assign_pointer(dev->ip_ptr, NULL);
devinet_sysctl_unregister(in_dev);
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
@@ -403,6 +403,9 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
return inet_insert_ifa(ifa);
}
+/* Caller must hold RCU or RTNL :
+ * We dont take a reference on found in_device
+ */
struct in_device *inetdev_by_index(struct net *net, int ifindex)
{
struct net_device *dev;
@@ -411,7 +414,7 @@ struct in_device *inetdev_by_index(struct net *net, int ifindex)
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
if (dev)
- in_dev = in_dev_get(dev);
+ in_dev = rcu_dereference_rtnl(dev->ip_ptr);
rcu_read_unlock();
return in_dev;
}
@@ -453,8 +456,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
goto errout;
}
- __in_dev_put(in_dev);
-
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next) {
if (tb[IFA_LOCAL] &&
@@ -1059,7 +1060,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_REGISTER:
printk(KERN_DEBUG "inetdev_event: bug\n");
- dev->ip_ptr = NULL;
+ rcu_assign_pointer(dev->ip_ptr, NULL);
break;
case NETDEV_UP:
if (!inetdev_valid_mtu(dev->mtu))
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7d02a9f999f..36e27c2107d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -147,35 +147,43 @@ static void fib_flush(struct net *net)
rt_cache_flush(net, -1);
}
-/*
- * Find the first device with a given source address.
+/**
+ * __ip_dev_find - find the first device with a given source address.
+ * @net: the net namespace
+ * @addr: the source address
+ * @devref: if true, take a reference on the found device
+ *
+ * If a caller uses devref=false, it should be protected by RCU, or RTNL
*/
-
-struct net_device * ip_dev_find(struct net *net, __be32 addr)
+struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
{
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
- struct fib_result res;
+ struct flowi fl = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = addr
+ }
+ },
+ .flags = FLOWI_FLAG_MATCH_ANY_IIF
+ };
+ struct fib_result res = { 0 };
struct net_device *dev = NULL;
- struct fib_table *local_table;
-#ifdef CONFIG_IP_MULTIPLE_TABLES
- res.r = NULL;
-#endif
-
- local_table = fib_get_table(net, RT_TABLE_LOCAL);
- if (!local_table || fib_table_lookup(local_table, &fl, &res))
+ rcu_read_lock();
+ if (fib_lookup(net, &fl, &res)) {
+ rcu_read_unlock();
return NULL;
+ }
if (res.type != RTN_LOCAL)
goto out;
dev = FIB_RES_DEV(res);
- if (dev)
+ if (dev && devref)
dev_hold(dev);
out:
- fib_res_put(&res);
+ rcu_read_unlock();
return dev;
}
-EXPORT_SYMBOL(ip_dev_find);
+EXPORT_SYMBOL(__ip_dev_find);
/*
* Find address type as if only "dev" was present in the system. If
@@ -202,11 +210,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
local_table = fib_get_table(net, RT_TABLE_LOCAL);
if (local_table) {
ret = RTN_UNICAST;
- if (!fib_table_lookup(local_table, &fl, &res)) {
+ rcu_read_lock();
+ if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
if (!dev || dev == res.fi->fib_dev)
ret = res.type;
- fib_res_put(&res);
}
+ rcu_read_unlock();
}
return ret;
}
@@ -220,30 +229,34 @@ EXPORT_SYMBOL(inet_addr_type);
unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
__be32 addr)
{
- return __inet_dev_addr_type(net, dev, addr);
+ return __inet_dev_addr_type(net, dev, addr);
}
EXPORT_SYMBOL(inet_dev_addr_type);
/* Given (packet source, input interface) and optional (dst, oif, tos):
- - (main) check, that source is valid i.e. not broadcast or our local
- address.
- - figure out what "logical" interface this packet arrived
- and calculate "specific destination" address.
- - check, that packet arrived from expected physical interface.
+ * - (main) check, that source is valid i.e. not broadcast or our local
+ * address.
+ * - figure out what "logical" interface this packet arrived
+ * and calculate "specific destination" address.
+ * - check, that packet arrived from expected physical interface.
+ * called with rcu_read_lock()
*/
-
int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
struct net_device *dev, __be32 *spec_dst,
u32 *itag, u32 mark)
{
struct in_device *in_dev;
- struct flowi fl = { .nl_u = { .ip4_u =
- { .daddr = src,
- .saddr = dst,
- .tos = tos } },
- .mark = mark,
- .iif = oif };
-
+ struct flowi fl = {
+ .nl_u = {
+ .ip4_u = {
+ .daddr = src,
+ .saddr = dst,
+ .tos = tos
+ }
+ },
+ .mark = mark,
+ .iif = oif
+ };
struct fib_result res;
int no_addr, rpf, accept_local;
bool dev_match;
@@ -251,7 +264,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
struct net *net;
no_addr = rpf = accept_local = 0;
- rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
no_addr = in_dev->ifa_list == NULL;
@@ -260,7 +272,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
if (mark && !IN_DEV_SRC_VMARK(in_dev))
fl.mark = 0;
}
- rcu_read_unlock();
if (in_dev == NULL)
goto e_inval;
@@ -270,7 +281,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
goto last_resort;
if (res.type != RTN_UNICAST) {
if (res.type != RTN_LOCAL || !accept_local)
- goto e_inval_res;
+ goto e_inval;
}
*spec_dst = FIB_RES_PREFSRC(res);
fib_combine_itag(itag, &res);
@@ -291,10 +302,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
#endif
if (dev_match) {
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
- fib_res_put(&res);
return ret;
}
- fib_res_put(&res);
if (no_addr)
goto last_resort;
if (rpf == 1)
@@ -307,7 +316,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
*spec_dst = FIB_RES_PREFSRC(res);
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
}
- fib_res_put(&res);
}
return ret;
@@ -318,8 +326,6 @@ last_resort:
*itag = 0;
return 0;
-e_inval_res:
- fib_res_put(&res);
e_inval:
return -EINVAL;
e_rpf:
@@ -472,9 +478,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
}
/*
- * Handle IP routing ioctl calls. These are used to manipulate the routing tables
+ * Handle IP routing ioctl calls.
+ * These are used to manipulate the routing tables
*/
-
int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct fib_config cfg;
@@ -518,7 +524,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
return -EINVAL;
}
-const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
+const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_DST] = { .type = NLA_U32 },
[RTA_SRC] = { .type = NLA_U32 },
[RTA_IIF] = { .type = NLA_U32 },
@@ -532,7 +538,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct fib_config *cfg)
+ struct nlmsghdr *nlh, struct fib_config *cfg)
{
struct nlattr *attr;
int err, remaining;
@@ -687,12 +693,11 @@ out:
}
/* Prepare and feed intra-kernel routing request.
- Really, it should be netlink message, but :-( netlink
- can be not configured, so that we feed it directly
- to fib engine. It is legal, because all events occur
- only when netlink is already locked.
+ * Really, it should be netlink message, but :-( netlink
+ * can be not configured, so that we feed it directly
+ * to fib engine. It is legal, because all events occur
+ * only when netlink is already locked.
*/
-
static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
@@ -738,9 +743,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
struct in_ifaddr *prim = ifa;
__be32 mask = ifa->ifa_mask;
__be32 addr = ifa->ifa_local;
- __be32 prefix = ifa->ifa_address&mask;
+ __be32 prefix = ifa->ifa_address & mask;
- if (ifa->ifa_flags&IFA_F_SECONDARY) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY) {
prim = inet_ifa_byprefix(in_dev, prefix, mask);
if (prim == NULL) {
printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
@@ -750,22 +755,24 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
- if (!(dev->flags&IFF_UP))
+ if (!(dev->flags & IFF_UP))
return;
/* Add broadcast address, if it is explicitly assigned. */
if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
- if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
+ if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
- fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
- RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
+ fib_magic(RTM_NEWROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, prim);
/* Add network specific broadcasts, when it takes a sense */
if (ifa->ifa_prefixlen < 31) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
+ 32, prim);
}
}
}
@@ -776,17 +783,18 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
struct net_device *dev = in_dev->dev;
struct in_ifaddr *ifa1;
struct in_ifaddr *prim = ifa;
- __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
- __be32 any = ifa->ifa_address&ifa->ifa_mask;
+ __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
+ __be32 any = ifa->ifa_address & ifa->ifa_mask;
#define LOCAL_OK 1
#define BRD_OK 2
#define BRD0_OK 4
#define BRD1_OK 8
unsigned ok = 0;
- if (!(ifa->ifa_flags&IFA_F_SECONDARY))
- fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
- RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
+ if (!(ifa->ifa_flags & IFA_F_SECONDARY))
+ fib_magic(RTM_DELROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ any, ifa->ifa_prefixlen, prim);
else {
prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
if (prim == NULL) {
@@ -796,9 +804,9 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
}
/* Deletion is more complicated than add.
- We should take care of not to delete too much :-)
-
- Scan address list to be sure that addresses are really gone.
+ * We should take care of not to delete too much :-)
+ *
+ * Scan address list to be sure that addresses are really gone.
*/
for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
@@ -812,23 +820,23 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
ok |= BRD0_OK;
}
- if (!(ok&BRD_OK))
+ if (!(ok & BRD_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
- if (!(ok&BRD1_OK))
+ if (!(ok & BRD1_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
- if (!(ok&BRD0_OK))
+ if (!(ok & BRD0_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
- if (!(ok&LOCAL_OK)) {
+ if (!(ok & LOCAL_OK)) {
fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
/* Check, that this local address finally disappeared. */
if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
/* And the last, but not the least thing.
- We must flush stray FIB entries.
-
- First of all, we scan fib_info list searching
- for stray nexthop entries, then ignite fib_flush.
- */
+ * We must flush stray FIB entries.
+ *
+ * First of all, we scan fib_info list searching
+ * for stray nexthop entries, then ignite fib_flush.
+ */
if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
fib_flush(dev_net(dev));
}
@@ -839,14 +847,20 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
#undef BRD1_OK
}
-static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
+static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
{
struct fib_result res;
- struct flowi fl = { .mark = frn->fl_mark,
- .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
- .tos = frn->fl_tos,
- .scope = frn->fl_scope } } };
+ struct flowi fl = {
+ .mark = frn->fl_mark,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = frn->fl_addr,
+ .tos = frn->fl_tos,
+ .scope = frn->fl_scope
+ }
+ }
+ };
#ifdef CONFIG_IP_MULTIPLE_TABLES
res.r = NULL;
@@ -857,15 +871,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
local_bh_disable();
frn->tb_id = tb->tb_id;
- frn->err = fib_table_lookup(tb, &fl, &res);
+ rcu_read_lock();
+ frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF);
if (!frn->err) {
frn->prefixlen = res.prefixlen;
frn->nh_sel = res.nh_sel;
frn->type = res.type;
frn->scope = res.scope;
- fib_res_put(&res);
}
+ rcu_read_unlock();
local_bh_enable();
}
}
@@ -894,8 +909,8 @@ static void nl_fib_input(struct sk_buff *skb)
nl_fib_lookup(frn, tb);
- pid = NETLINK_CB(skb).pid; /* pid of sending process */
- NETLINK_CB(skb).pid = 0; /* from kernel */
+ pid = NETLINK_CB(skb).pid; /* pid of sending process */
+ NETLINK_CB(skb).pid = 0; /* from kernel */
NETLINK_CB(skb).dst_group = 0; /* unicast */
netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
}
@@ -942,7 +957,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
fib_del_ifaddr(ifa);
if (ifa->ifa_dev->ifa_list == NULL) {
/* Last address was deleted from this interface.
- Disable IP.
+ * Disable IP.
*/
fib_disable_ip(dev, 1, 0);
} else {
@@ -1001,16 +1016,15 @@ static struct notifier_block fib_netdev_notifier = {
static int __net_init ip_fib_net_init(struct net *net)
{
int err;
- unsigned int i;
+ size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
- net->ipv4.fib_table_hash = kzalloc(
- sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
+ /* Avoid false sharing : Use at least a full cache line */
+ size = max_t(size_t, size, L1_CACHE_BYTES);
+
+ net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
if (net->ipv4.fib_table_hash == NULL)
return -ENOMEM;
- for (i = 0; i < FIB_TABLE_HASHSZ; i++)
- INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
-
err = fib4_rules_init(net);
if (err < 0)
goto fail;
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 4ed7e0dea1b..43e1c594ce8 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -54,36 +54,37 @@ struct fib_node {
struct fib_alias fn_embedded_alias;
};
-struct fn_zone {
- struct fn_zone *fz_next; /* Next not empty zone */
- struct hlist_head *fz_hash; /* Hash table pointer */
- int fz_nent; /* Number of entries */
+#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head))
- int fz_divisor; /* Hash divisor */
+struct fn_zone {
+ struct fn_zone __rcu *fz_next; /* Next not empty zone */
+ struct hlist_head __rcu *fz_hash; /* Hash table pointer */
+ seqlock_t fz_lock;
u32 fz_hashmask; /* (fz_divisor - 1) */
-#define FZ_HASHMASK(fz) ((fz)->fz_hashmask)
- int fz_order; /* Zone order */
- __be32 fz_mask;
+ u8 fz_order; /* Zone order (0..32) */
+ u8 fz_revorder; /* 32 - fz_order */
+ __be32 fz_mask; /* inet_make_mask(order) */
#define FZ_MASK(fz) ((fz)->fz_mask)
-};
-/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
- * can be cheaper than memory lookup, so that FZ_* macros are used.
- */
+ struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE];
+
+ int fz_nent; /* Number of entries */
+ int fz_divisor; /* Hash size (mask+1) */
+};
struct fn_hash {
- struct fn_zone *fn_zones[33];
- struct fn_zone *fn_zone_list;
+ struct fn_zone *fn_zones[33];
+ struct fn_zone __rcu *fn_zone_list;
};
static inline u32 fn_hash(__be32 key, struct fn_zone *fz)
{
- u32 h = ntohl(key)>>(32 - fz->fz_order);
+ u32 h = ntohl(key) >> fz->fz_revorder;
h ^= (h>>20);
h ^= (h>>10);
h ^= (h>>5);
- h &= FZ_HASHMASK(fz);
+ h &= fz->fz_hashmask;
return h;
}
@@ -92,7 +93,6 @@ static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
return dst & FZ_MASK(fz);
}
-static DEFINE_RWLOCK(fib_hash_lock);
static unsigned int fib_hash_genid;
#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
@@ -101,12 +101,11 @@ static struct hlist_head *fz_hash_alloc(int divisor)
{
unsigned long size = divisor * sizeof(struct hlist_head);
- if (size <= PAGE_SIZE) {
+ if (size <= PAGE_SIZE)
return kzalloc(size, GFP_KERNEL);
- } else {
- return (struct hlist_head *)
- __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
- }
+
+ return (struct hlist_head *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
}
/* The fib hash lock must be held when this is called. */
@@ -121,12 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
struct fib_node *f;
hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
- struct hlist_head *new_head;
+ struct hlist_head __rcu *new_head;
- hlist_del(&f->fn_hash);
+ hlist_del_rcu(&f->fn_hash);
new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
- hlist_add_head(&f->fn_hash, new_head);
+ hlist_add_head_rcu(&f->fn_hash, new_head);
}
}
}
@@ -147,14 +146,14 @@ static void fn_rehash_zone(struct fn_zone *fz)
int old_divisor, new_divisor;
u32 new_hashmask;
- old_divisor = fz->fz_divisor;
+ new_divisor = old_divisor = fz->fz_divisor;
switch (old_divisor) {
- case 16:
- new_divisor = 256;
+ case EMBEDDED_HASH_SIZE:
+ new_divisor *= EMBEDDED_HASH_SIZE;
break;
- case 256:
- new_divisor = 1024;
+ case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE:
+ new_divisor *= (EMBEDDED_HASH_SIZE/2);
break;
default:
if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
@@ -175,31 +174,55 @@ static void fn_rehash_zone(struct fn_zone *fz)
ht = fz_hash_alloc(new_divisor);
if (ht) {
- write_lock_bh(&fib_hash_lock);
+ struct fn_zone nfz;
+
+ memcpy(&nfz, fz, sizeof(nfz));
+
+ write_seqlock_bh(&fz->fz_lock);
old_ht = fz->fz_hash;
- fz->fz_hash = ht;
+ nfz.fz_hash = ht;
+ nfz.fz_hashmask = new_hashmask;
+ nfz.fz_divisor = new_divisor;
+ fn_rebuild_zone(&nfz, old_ht, old_divisor);
+ fib_hash_genid++;
+ rcu_assign_pointer(fz->fz_hash, ht);
fz->fz_hashmask = new_hashmask;
fz->fz_divisor = new_divisor;
- fn_rebuild_zone(fz, old_ht, old_divisor);
- fib_hash_genid++;
- write_unlock_bh(&fib_hash_lock);
+ write_sequnlock_bh(&fz->fz_lock);
- fz_hash_free(old_ht, old_divisor);
+ if (old_ht != fz->fz_embedded_hash) {
+ synchronize_rcu();
+ fz_hash_free(old_ht, old_divisor);
+ }
}
}
-static inline void fn_free_node(struct fib_node * f)
+static void fn_free_node_rcu(struct rcu_head *head)
{
+ struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu);
+
kmem_cache_free(fn_hash_kmem, f);
}
+static inline void fn_free_node(struct fib_node *f)
+{
+ call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu);
+}
+
+static void fn_free_alias_rcu(struct rcu_head *head)
+{
+ struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
+
+ kmem_cache_free(fn_alias_kmem, fa);
+}
+
static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
{
fib_release_info(fa->fa_info);
if (fa == &f->fn_embedded_alias)
fa->fa_info = NULL;
else
- kmem_cache_free(fn_alias_kmem, fa);
+ call_rcu(&fa->rcu, fn_free_alias_rcu);
}
static struct fn_zone *
@@ -210,68 +233,71 @@ fn_new_zone(struct fn_hash *table, int z)
if (!fz)
return NULL;
- if (z) {
- fz->fz_divisor = 16;
- } else {
- fz->fz_divisor = 1;
- }
- fz->fz_hashmask = (fz->fz_divisor - 1);
- fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
- if (!fz->fz_hash) {
- kfree(fz);
- return NULL;
- }
+ seqlock_init(&fz->fz_lock);
+ fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
+ fz->fz_hashmask = fz->fz_divisor - 1;
+ fz->fz_hash = fz->fz_embedded_hash;
fz->fz_order = z;
+ fz->fz_revorder = 32 - z;
fz->fz_mask = inet_make_mask(z);
/* Find the first not empty zone with more specific mask */
- for (i=z+1; i<=32; i++)
+ for (i = z + 1; i <= 32; i++)
if (table->fn_zones[i])
break;
- write_lock_bh(&fib_hash_lock);
- if (i>32) {
+ if (i > 32) {
/* No more specific masks, we are the first. */
- fz->fz_next = table->fn_zone_list;
- table->fn_zone_list = fz;
+ rcu_assign_pointer(fz->fz_next,
+ rtnl_dereference(table->fn_zone_list));
+ rcu_assign_pointer(table->fn_zone_list, fz);
} else {
- fz->fz_next = table->fn_zones[i]->fz_next;
- table->fn_zones[i]->fz_next = fz;
+ rcu_assign_pointer(fz->fz_next,
+ rtnl_dereference(table->fn_zones[i]->fz_next));
+ rcu_assign_pointer(table->fn_zones[i]->fz_next, fz);
}
table->fn_zones[z] = fz;
fib_hash_genid++;
- write_unlock_bh(&fib_hash_lock);
return fz;
}
int fib_table_lookup(struct fib_table *tb,
- const struct flowi *flp, struct fib_result *res)
+ const struct flowi *flp, struct fib_result *res,
+ int fib_flags)
{
int err;
struct fn_zone *fz;
struct fn_hash *t = (struct fn_hash *)tb->tb_data;
- read_lock(&fib_hash_lock);
- for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
- struct hlist_head *head;
+ rcu_read_lock();
+ for (fz = rcu_dereference(t->fn_zone_list);
+ fz != NULL;
+ fz = rcu_dereference(fz->fz_next)) {
+ struct hlist_head __rcu *head;
struct hlist_node *node;
struct fib_node *f;
- __be32 k = fz_key(flp->fl4_dst, fz);
+ __be32 k;
+ unsigned int seq;
- head = &fz->fz_hash[fn_hash(k, fz)];
- hlist_for_each_entry(f, node, head, fn_hash) {
- if (f->fn_key != k)
- continue;
+ do {
+ seq = read_seqbegin(&fz->fz_lock);
+ k = fz_key(flp->fl4_dst, fz);
+
+ head = &fz->fz_hash[fn_hash(k, fz)];
+ hlist_for_each_entry_rcu(f, node, head, fn_hash) {
+ if (f->fn_key != k)
+ continue;
- err = fib_semantic_match(&f->fn_alias,
+ err = fib_semantic_match(&f->fn_alias,
flp, res,
- fz->fz_order);
- if (err <= 0)
- goto out;
- }
+ fz->fz_order, fib_flags);
+ if (err <= 0)
+ goto out;
+ }
+ } while (read_seqretry(&fz->fz_lock, seq));
}
err = 1;
out:
- read_unlock(&fib_hash_lock);
+ rcu_read_unlock();
return err;
}
@@ -293,11 +319,11 @@ void fib_table_select_default(struct fib_table *tb,
last_resort = NULL;
order = -1;
- read_lock(&fib_hash_lock);
- hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) {
struct fib_alias *fa;
- list_for_each_entry(fa, &f->fn_alias, fa_list) {
+ list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
struct fib_info *next_fi = fa->fa_info;
if (fa->fa_scope != res->scope ||
@@ -309,7 +335,8 @@ void fib_table_select_default(struct fib_table *tb,
if (!next_fi->fib_nh[0].nh_gw ||
next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
continue;
- fa->fa_state |= FA_S_ACCESSED;
+
+ fib_alias_accessed(fa);
if (fi == NULL) {
if (next_fi != res->fi)
@@ -341,7 +368,7 @@ void fib_table_select_default(struct fib_table *tb,
fib_result_assign(res, last_resort);
tb->tb_default = last_idx;
out:
- read_unlock(&fib_hash_lock);
+ rcu_read_unlock();
}
/* Insert node F to FZ. */
@@ -349,7 +376,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
{
struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
- hlist_add_head(&f->fn_hash, head);
+ hlist_add_head_rcu(&f->fn_hash, head);
}
/* Return the node in FZ matching KEY. */
@@ -359,7 +386,7 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
struct hlist_node *node;
struct fib_node *f;
- hlist_for_each_entry(f, node, head, fn_hash) {
+ hlist_for_each_entry_rcu(f, node, head, fn_hash) {
if (f->fn_key == key)
return f;
}
@@ -367,6 +394,17 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
return NULL;
}
+
+static struct fib_alias *fib_fast_alloc(struct fib_node *f)
+{
+ struct fib_alias *fa = &f->fn_embedded_alias;
+
+ if (fa->fa_info != NULL)
+ fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
+ return fa;
+}
+
+/* Caller must hold RTNL. */
int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
{
struct fn_hash *table = (struct fn_hash *) tb->tb_data;
@@ -451,7 +489,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
}
if (cfg->fc_nlflags & NLM_F_REPLACE) {
- struct fib_info *fi_drop;
u8 state;
fa = fa_first;
@@ -460,21 +497,25 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
err = 0;
goto out;
}
- write_lock_bh(&fib_hash_lock);
- fi_drop = fa->fa_info;
- fa->fa_info = fi;
- fa->fa_type = cfg->fc_type;
- fa->fa_scope = cfg->fc_scope;
+ err = -ENOBUFS;
+ new_fa = fib_fast_alloc(f);
+ if (new_fa == NULL)
+ goto out;
+
+ new_fa->fa_tos = fa->fa_tos;
+ new_fa->fa_info = fi;
+ new_fa->fa_type = cfg->fc_type;
+ new_fa->fa_scope = cfg->fc_scope;
state = fa->fa_state;
- fa->fa_state &= ~FA_S_ACCESSED;
+ new_fa->fa_state = state & ~FA_S_ACCESSED;
fib_hash_genid++;
- write_unlock_bh(&fib_hash_lock);
+ list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
- fib_release_info(fi_drop);
+ fn_free_alias(fa, f);
if (state & FA_S_ACCESSED)
rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
- rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
- &cfg->fc_nlinfo, NLM_F_REPLACE);
+ rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len,
+ tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
return 0;
}
@@ -506,12 +547,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
f = new_f;
}
- new_fa = &f->fn_embedded_alias;
- if (new_fa->fa_info != NULL) {
- new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
- if (new_fa == NULL)
- goto out;
- }
+ new_fa = fib_fast_alloc(f);
+ if (new_fa == NULL)
+ goto out;
+
new_fa->fa_info = fi;
new_fa->fa_tos = tos;
new_fa->fa_type = cfg->fc_type;
@@ -522,13 +561,11 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
* Insert new entry to the list.
*/
- write_lock_bh(&fib_hash_lock);
if (new_f)
fib_insert_node(fz, new_f);
- list_add_tail(&new_fa->fa_list,
+ list_add_tail_rcu(&new_fa->fa_list,
(fa ? &fa->fa_list : &f->fn_alias));
fib_hash_genid++;
- write_unlock_bh(&fib_hash_lock);
if (new_f)
fz->fz_nent++;
@@ -603,14 +640,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
tb->tb_id, &cfg->fc_nlinfo, 0);
kill_fn = 0;
- write_lock_bh(&fib_hash_lock);
- list_del(&fa->fa_list);
+ list_del_rcu(&fa->fa_list);
if (list_empty(&f->fn_alias)) {
- hlist_del(&f->fn_hash);
+ hlist_del_rcu(&f->fn_hash);
kill_fn = 1;
}
fib_hash_genid++;
- write_unlock_bh(&fib_hash_lock);
if (fa->fa_state & FA_S_ACCESSED)
rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
@@ -641,14 +676,12 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
struct fib_info *fi = fa->fa_info;
if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
- write_lock_bh(&fib_hash_lock);
- list_del(&fa->fa_list);
+ list_del_rcu(&fa->fa_list);
if (list_empty(&f->fn_alias)) {
- hlist_del(&f->fn_hash);
+ hlist_del_rcu(&f->fn_hash);
kill_f = 1;
}
fib_hash_genid++;
- write_unlock_bh(&fib_hash_lock);
fn_free_alias(fa, f);
found++;
@@ -662,13 +695,16 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
return found;
}
+/* caller must hold RTNL. */
int fib_table_flush(struct fib_table *tb)
{
struct fn_hash *table = (struct fn_hash *) tb->tb_data;
struct fn_zone *fz;
int found = 0;
- for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
+ for (fz = rtnl_dereference(table->fn_zone_list);
+ fz != NULL;
+ fz = rtnl_dereference(fz->fz_next)) {
int i;
for (i = fz->fz_divisor - 1; i >= 0; i--)
@@ -690,10 +726,10 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
s_i = cb->args[4];
i = 0;
- hlist_for_each_entry(f, node, head, fn_hash) {
+ hlist_for_each_entry_rcu(f, node, head, fn_hash) {
struct fib_alias *fa;
- list_for_each_entry(fa, &f->fn_alias, fa_list) {
+ list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
if (i < s_i)
goto next;
@@ -711,7 +747,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
cb->args[4] = i;
return -1;
}
- next:
+next:
i++;
}
}
@@ -746,23 +782,26 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
struct netlink_callback *cb)
{
- int m, s_m;
+ int m = 0, s_m;
struct fn_zone *fz;
struct fn_hash *table = (struct fn_hash *)tb->tb_data;
s_m = cb->args[2];
- read_lock(&fib_hash_lock);
- for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
- if (m < s_m) continue;
+ rcu_read_lock();
+ for (fz = rcu_dereference(table->fn_zone_list);
+ fz != NULL;
+ fz = rcu_dereference(fz->fz_next), m++) {
+ if (m < s_m)
+ continue;
if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
cb->args[2] = m;
- read_unlock(&fib_hash_lock);
+ rcu_read_unlock();
return -1;
}
memset(&cb->args[3], 0,
sizeof(cb->args) - 3*sizeof(cb->args[0]));
}
- read_unlock(&fib_hash_lock);
+ rcu_read_unlock();
cb->args[2] = m;
return skb->len;
}
@@ -825,8 +864,9 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
iter->genid = fib_hash_genid;
iter->valid = 1;
- for (iter->zone = table->fn_zone_list; iter->zone;
- iter->zone = iter->zone->fz_next) {
+ for (iter->zone = rcu_dereference(table->fn_zone_list);
+ iter->zone != NULL;
+ iter->zone = rcu_dereference(iter->zone->fz_next)) {
int maxslot;
if (!iter->zone->fz_nent)
@@ -911,7 +951,7 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
}
}
- iter->zone = iter->zone->fz_next;
+ iter->zone = rcu_dereference(iter->zone->fz_next);
if (!iter->zone)
goto out;
@@ -950,11 +990,11 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
}
static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(fib_hash_lock)
+ __acquires(RCU)
{
void *v = NULL;
- read_lock(&fib_hash_lock);
+ rcu_read_lock();
if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
return v;
@@ -967,15 +1007,16 @@ static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void fib_seq_stop(struct seq_file *seq, void *v)
- __releases(fib_hash_lock)
+ __releases(RCU)
{
- read_unlock(&fib_hash_lock);
+ rcu_read_unlock();
}
static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
{
static const unsigned type2flags[RTN_MAX + 1] = {
- [7] = RTF_REJECT, [8] = RTF_REJECT,
+ [7] = RTF_REJECT,
+ [8] = RTF_REJECT,
};
unsigned flags = type2flags[type];
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 637b133973b..a29edf2219c 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -12,17 +12,22 @@ struct fib_alias {
u8 fa_type;
u8 fa_scope;
u8 fa_state;
-#ifdef CONFIG_IP_FIB_TRIE
struct rcu_head rcu;
-#endif
};
#define FA_S_ACCESSED 0x01
+/* Dont write on fa_state unless needed, to keep it shared on all cpus */
+static inline void fib_alias_accessed(struct fib_alias *fa)
+{
+ if (!(fa->fa_state & FA_S_ACCESSED))
+ fa->fa_state |= FA_S_ACCESSED;
+}
+
/* Exported by fib_semantics.c */
extern int fib_semantic_match(struct list_head *head,
const struct flowi *flp,
- struct fib_result *res, int prefixlen);
+ struct fib_result *res, int prefixlen, int fib_flags);
extern void fib_release_info(struct fib_info *);
extern struct fib_info *fib_create_info(struct fib_config *cfg);
extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 76daeb5ff56..7981a24f5c7 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -6,7 +6,7 @@
* IPv4 Forwarding Information Base: policy rules.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- * Thomas Graf <tgraf@suug.ch>
+ * Thomas Graf <tgraf@suug.ch>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -14,7 +14,7 @@
* 2 of the License, or (at your option) any later version.
*
* Fixes:
- * Rani Assaf : local_rule cannot be deleted
+ * Rani Assaf : local_rule cannot be deleted
* Marc Boucher : routing by fwmark
*/
@@ -32,8 +32,7 @@
#include <net/ip_fib.h>
#include <net/fib_rules.h>
-struct fib4_rule
-{
+struct fib4_rule {
struct fib_rule common;
u8 dst_len;
u8 src_len;
@@ -58,6 +57,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
{
struct fib_lookup_arg arg = {
.result = res,
+ .flags = FIB_LOOKUP_NOREF,
};
int err;
@@ -91,10 +91,11 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
goto errout;
}
- if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL)
+ tbl = fib_get_table(rule->fr_net, rule->table);
+ if (!tbl)
goto errout;
- err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result);
+ err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags);
if (err > 0)
err = -EAGAIN;
errout:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 20f09c5b31e..3e0da3ef611 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -60,21 +60,30 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
static DEFINE_SPINLOCK(fib_multipath_lock);
-#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
-for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \
-for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++)
+#define for_nexthops(fi) { \
+ int nhsel; const struct fib_nh *nh; \
+ for (nhsel = 0, nh = (fi)->fib_nh; \
+ nhsel < (fi)->fib_nhs; \
+ nh++, nhsel++)
+
+#define change_nexthops(fi) { \
+ int nhsel; struct fib_nh *nexthop_nh; \
+ for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
+ nhsel < (fi)->fib_nhs; \
+ nexthop_nh++, nhsel++)
#else /* CONFIG_IP_ROUTE_MULTIPATH */
/* Hope, that gcc will optimize it to get rid of dummy loop */
-#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \
-for (nhsel=0; nhsel < 1; nhsel++)
+#define for_nexthops(fi) { \
+ int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
+ for (nhsel = 0; nhsel < 1; nhsel++)
-#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
-for (nhsel=0; nhsel < 1; nhsel++)
+#define change_nexthops(fi) { \
+ int nhsel; \
+ struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
+ for (nhsel = 0; nhsel < 1; nhsel++)
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -86,63 +95,70 @@ static const struct
int error;
u8 scope;
} fib_props[RTN_MAX + 1] = {
- {
+ [RTN_UNSPEC] = {
.error = 0,
.scope = RT_SCOPE_NOWHERE,
- }, /* RTN_UNSPEC */
- {
+ },
+ [RTN_UNICAST] = {
.error = 0,
.scope = RT_SCOPE_UNIVERSE,
- }, /* RTN_UNICAST */
- {
+ },
+ [RTN_LOCAL] = {
.error = 0,
.scope = RT_SCOPE_HOST,
- }, /* RTN_LOCAL */
- {
+ },
+ [RTN_BROADCAST] = {
.error = 0,
.scope = RT_SCOPE_LINK,
- }, /* RTN_BROADCAST */
- {
+ },
+ [RTN_ANYCAST] = {
.error = 0,
.scope = RT_SCOPE_LINK,
- }, /* RTN_ANYCAST */
- {
+ },
+ [RTN_MULTICAST] = {
.error = 0,
.scope = RT_SCOPE_UNIVERSE,
- }, /* RTN_MULTICAST */
- {
+ },
+ [RTN_BLACKHOLE] = {
.error = -EINVAL,
.scope = RT_SCOPE_UNIVERSE,
- }, /* RTN_BLACKHOLE */
- {
+ },
+ [RTN_UNREACHABLE] = {
.error = -EHOSTUNREACH,
.scope = RT_SCOPE_UNIVERSE,
- }, /* RTN_UNREACHABLE */
- {
+ },
+ [RTN_PROHIBIT] = {
.error = -EACCES,
.scope = RT_SCOPE_UNIVERSE,
- }, /* RTN_PROHIBIT */
- {
+ },
+ [RTN_THROW] = {
.error = -EAGAIN,
.scope = RT_SCOPE_UNIVERSE,
- }, /* RTN_THROW */
- {
+ },
+ [RTN_NAT] = {
.error = -EINVAL,
.scope = RT_SCOPE_NOWHERE,
- }, /* RTN_NAT */
- {
+ },
+ [RTN_XRESOLVE] = {
.error = -EINVAL,
.scope = RT_SCOPE_NOWHERE,
- }, /* RTN_XRESOLVE */
+ },
};
/* Release a nexthop info record */
+static void free_fib_info_rcu(struct rcu_head *head)
+{
+ struct fib_info *fi = container_of(head, struct fib_info, rcu);
+
+ kfree(fi);
+}
+
void free_fib_info(struct fib_info *fi)
{
if (fi->fib_dead == 0) {
- printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
+ pr_warning("Freeing alive fib_info %p\n", fi);
return;
}
change_nexthops(fi) {
@@ -152,7 +168,7 @@ void free_fib_info(struct fib_info *fi)
} endfor_nexthops(fi);
fib_info_cnt--;
release_net(fi->fib_net);
- kfree(fi);
+ call_rcu(&fi->rcu, free_fib_info_rcu);
}
void fib_release_info(struct fib_info *fi)
@@ -173,7 +189,7 @@ void fib_release_info(struct fib_info *fi)
spin_unlock_bh(&fib_info_lock);
}
-static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
+static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
{
const struct fib_nh *onh = ofi->fib_nh;
@@ -187,7 +203,7 @@ static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *
#ifdef CONFIG_NET_CLS_ROUTE
nh->nh_tclassid != onh->nh_tclassid ||
#endif
- ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
+ ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
return -1;
onh++;
} endfor_nexthops(fi);
@@ -238,7 +254,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
nfi->fib_priority == fi->fib_priority &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(fi->fib_metrics)) == 0 &&
- ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
+ ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
(nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
return fi;
}
@@ -247,9 +263,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
}
/* Check, that the gateway is already configured.
- Used only by redirect accept routine.
+ * Used only by redirect accept routine.
*/
-
int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
@@ -264,7 +279,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
hlist_for_each_entry(nh, node, head, nh_hash) {
if (nh->nh_dev == dev &&
nh->nh_gw == gw &&
- !(nh->nh_flags&RTNH_F_DEAD)) {
+ !(nh->nh_flags & RTNH_F_DEAD)) {
spin_unlock(&fib_info_lock);
return 0;
}
@@ -362,10 +377,10 @@ int fib_detect_death(struct fib_info *fi, int order,
}
if (state == NUD_REACHABLE)
return 0;
- if ((state&NUD_VALID) && order != dflt)
+ if ((state & NUD_VALID) && order != dflt)
return 0;
- if ((state&NUD_VALID) ||
- (*last_idx<0 && order > dflt)) {
+ if ((state & NUD_VALID) ||
+ (*last_idx < 0 && order > dflt)) {
*last_resort = fi;
*last_idx = order;
}
@@ -476,75 +491,76 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
/*
- Picture
- -------
-
- Semantics of nexthop is very messy by historical reasons.
- We have to take into account, that:
- a) gateway can be actually local interface address,
- so that gatewayed route is direct.
- b) gateway must be on-link address, possibly
- described not by an ifaddr, but also by a direct route.
- c) If both gateway and interface are specified, they should not
- contradict.
- d) If we use tunnel routes, gateway could be not on-link.
-
- Attempt to reconcile all of these (alas, self-contradictory) conditions
- results in pretty ugly and hairy code with obscure logic.
-
- I chose to generalized it instead, so that the size
- of code does not increase practically, but it becomes
- much more general.
- Every prefix is assigned a "scope" value: "host" is local address,
- "link" is direct route,
- [ ... "site" ... "interior" ... ]
- and "universe" is true gateway route with global meaning.
-
- Every prefix refers to a set of "nexthop"s (gw, oif),
- where gw must have narrower scope. This recursion stops
- when gw has LOCAL scope or if "nexthop" is declared ONLINK,
- which means that gw is forced to be on link.
-
- Code is still hairy, but now it is apparently logically
- consistent and very flexible. F.e. as by-product it allows
- to co-exists in peace independent exterior and interior
- routing processes.
-
- Normally it looks as following.
-
- {universe prefix} -> (gw, oif) [scope link]
- |
- |-> {link prefix} -> (gw, oif) [scope local]
- |
- |-> {local prefix} (terminal node)
+ * Picture
+ * -------
+ *
+ * Semantics of nexthop is very messy by historical reasons.
+ * We have to take into account, that:
+ * a) gateway can be actually local interface address,
+ * so that gatewayed route is direct.
+ * b) gateway must be on-link address, possibly
+ * described not by an ifaddr, but also by a direct route.
+ * c) If both gateway and interface are specified, they should not
+ * contradict.
+ * d) If we use tunnel routes, gateway could be not on-link.
+ *
+ * Attempt to reconcile all of these (alas, self-contradictory) conditions
+ * results in pretty ugly and hairy code with obscure logic.
+ *
+ * I chose to generalized it instead, so that the size
+ * of code does not increase practically, but it becomes
+ * much more general.
+ * Every prefix is assigned a "scope" value: "host" is local address,
+ * "link" is direct route,
+ * [ ... "site" ... "interior" ... ]
+ * and "universe" is true gateway route with global meaning.
+ *
+ * Every prefix refers to a set of "nexthop"s (gw, oif),
+ * where gw must have narrower scope. This recursion stops
+ * when gw has LOCAL scope or if "nexthop" is declared ONLINK,
+ * which means that gw is forced to be on link.
+ *
+ * Code is still hairy, but now it is apparently logically
+ * consistent and very flexible. F.e. as by-product it allows
+ * to co-exists in peace independent exterior and interior
+ * routing processes.
+ *
+ * Normally it looks as following.
+ *
+ * {universe prefix} -> (gw, oif) [scope link]
+ * |
+ * |-> {link prefix} -> (gw, oif) [scope local]
+ * |
+ * |-> {local prefix} (terminal node)
*/
-
static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
struct fib_nh *nh)
{
int err;
struct net *net;
+ struct net_device *dev;
net = cfg->fc_nlinfo.nl_net;
if (nh->nh_gw) {
struct fib_result res;
- if (nh->nh_flags&RTNH_F_ONLINK) {
- struct net_device *dev;
+ if (nh->nh_flags & RTNH_F_ONLINK) {
if (cfg->fc_scope >= RT_SCOPE_LINK)
return -EINVAL;
if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
return -EINVAL;
- if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
+ dev = __dev_get_by_index(net, nh->nh_oif);
+ if (!dev)
return -ENODEV;
- if (!(dev->flags&IFF_UP))
+ if (!(dev->flags & IFF_UP))
return -ENETDOWN;
nh->nh_dev = dev;
dev_hold(dev);
nh->nh_scope = RT_SCOPE_LINK;
return 0;
}
+ rcu_read_lock();
{
struct flowi fl = {
.nl_u = {
@@ -559,50 +575,53 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
/* It is not necessary, but requires a bit of thinking */
if (fl.fl4_scope < RT_SCOPE_LINK)
fl.fl4_scope = RT_SCOPE_LINK;
- if ((err = fib_lookup(net, &fl, &res)) != 0)
+ err = fib_lookup(net, &fl, &res);
+ if (err) {
+ rcu_read_unlock();
return err;
+ }
}
err = -EINVAL;
if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
goto out;
nh->nh_scope = res.scope;
nh->nh_oif = FIB_RES_OIF(res);
- if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
+ nh->nh_dev = dev = FIB_RES_DEV(res);
+ if (!dev)
goto out;
- dev_hold(nh->nh_dev);
- err = -ENETDOWN;
- if (!(nh->nh_dev->flags & IFF_UP))
- goto out;
- err = 0;
-out:
- fib_res_put(&res);
- return err;
+ dev_hold(dev);
+ err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
} else {
struct in_device *in_dev;
- if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+ if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK))
return -EINVAL;
+ rcu_read_lock();
+ err = -ENODEV;
in_dev = inetdev_by_index(net, nh->nh_oif);
if (in_dev == NULL)
- return -ENODEV;
- if (!(in_dev->dev->flags&IFF_UP)) {
- in_dev_put(in_dev);
- return -ENETDOWN;
- }
+ goto out;
+ err = -ENETDOWN;
+ if (!(in_dev->dev->flags & IFF_UP))
+ goto out;
nh->nh_dev = in_dev->dev;
dev_hold(nh->nh_dev);
nh->nh_scope = RT_SCOPE_HOST;
- in_dev_put(in_dev);
+ err = 0;
}
- return 0;
+out:
+ rcu_read_unlock();
+ return err;
}
static inline unsigned int fib_laddr_hashfn(__be32 val)
{
unsigned int mask = (fib_hash_size - 1);
- return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
+ return ((__force u32)val ^
+ ((__force u32)val >> 7) ^
+ ((__force u32)val >> 14)) & mask;
}
static struct hlist_head *fib_hash_alloc(int bytes)
@@ -611,7 +630,8 @@ static struct hlist_head *fib_hash_alloc(int bytes)
return kzalloc(bytes, GFP_KERNEL);
else
return (struct hlist_head *)
- __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(bytes));
}
static void fib_hash_free(struct hlist_head *hash, int bytes)
@@ -806,7 +826,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto failure;
} else {
change_nexthops(fi) {
- if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0)
+ err = fib_check_nh(cfg, fi, nexthop_nh);
+ if (err != 0)
goto failure;
} endfor_nexthops(fi)
}
@@ -819,7 +840,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
}
link_it:
- if ((ofi = fib_find_info(fi)) != NULL) {
+ ofi = fib_find_info(fi);
+ if (ofi) {
fi->fib_dead = 1;
free_fib_info(fi);
ofi->fib_treeref++;
@@ -864,7 +886,7 @@ failure:
/* Note! fib_semantic_match intentionally uses RCU list functions. */
int fib_semantic_match(struct list_head *head, const struct flowi *flp,
- struct fib_result *res, int prefixlen)
+ struct fib_result *res, int prefixlen, int fib_flags)
{
struct fib_alias *fa;
int nh_sel = 0;
@@ -879,7 +901,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
if (fa->fa_scope < flp->fl4_scope)
continue;
- fa->fa_state |= FA_S_ACCESSED;
+ fib_alias_accessed(fa);
err = fib_props[fa->fa_type].error;
if (err == 0) {
@@ -895,7 +917,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
case RTN_ANYCAST:
case RTN_MULTICAST:
for_nexthops(fi) {
- if (nh->nh_flags&RTNH_F_DEAD)
+ if (nh->nh_flags & RTNH_F_DEAD)
continue;
if (!flp->oif || flp->oif == nh->nh_oif)
break;
@@ -906,16 +928,15 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
goto out_fill_res;
}
#else
- if (nhsel < 1) {
+ if (nhsel < 1)
goto out_fill_res;
- }
#endif
endfor_nexthops(fi);
continue;
default:
- printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
- fa->fa_type);
+ pr_warning("fib_semantic_match bad type %#x\n",
+ fa->fa_type);
return -EINVAL;
}
}
@@ -929,7 +950,8 @@ out_fill_res:
res->type = fa->fa_type;
res->scope = fa->fa_scope;
res->fi = fa->fa_info;
- atomic_inc(&res->fi->fib_clntref);
+ if (!(fib_flags & FIB_LOOKUP_NOREF))
+ atomic_inc(&res->fi->fib_clntref);
return 0;
}
@@ -1028,10 +1050,10 @@ nla_put_failure:
}
/*
- Update FIB if:
- - local address disappeared -> we must delete all the entries
- referring to it.
- - device went down -> we must shutdown all nexthops going via it.
+ * Update FIB if:
+ * - local address disappeared -> we must delete all the entries
+ * referring to it.
+ * - device went down -> we must shutdown all nexthops going via it.
*/
int fib_sync_down_addr(struct net *net, __be32 local)
{
@@ -1078,7 +1100,7 @@ int fib_sync_down_dev(struct net_device *dev, int force)
prev_fi = fi;
dead = 0;
change_nexthops(fi) {
- if (nexthop_nh->nh_flags&RTNH_F_DEAD)
+ if (nexthop_nh->nh_flags & RTNH_F_DEAD)
dead++;
else if (nexthop_nh->nh_dev == dev &&
nexthop_nh->nh_scope != scope) {
@@ -1110,10 +1132,9 @@ int fib_sync_down_dev(struct net_device *dev, int force)
#ifdef CONFIG_IP_ROUTE_MULTIPATH
/*
- Dead device goes up. We wake up dead nexthops.
- It takes sense only on multipath routes.
+ * Dead device goes up. We wake up dead nexthops.
+ * It takes sense only on multipath routes.
*/
-
int fib_sync_up(struct net_device *dev)
{
struct fib_info *prev_fi;
@@ -1123,7 +1144,7 @@ int fib_sync_up(struct net_device *dev)
struct fib_nh *nh;
int ret;
- if (!(dev->flags&IFF_UP))
+ if (!(dev->flags & IFF_UP))
return 0;
prev_fi = NULL;
@@ -1142,12 +1163,12 @@ int fib_sync_up(struct net_device *dev)
prev_fi = fi;
alive = 0;
change_nexthops(fi) {
- if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
+ if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
alive++;
continue;
}
if (nexthop_nh->nh_dev == NULL ||
- !(nexthop_nh->nh_dev->flags&IFF_UP))
+ !(nexthop_nh->nh_dev->flags & IFF_UP))
continue;
if (nexthop_nh->nh_dev != dev ||
!__in_dev_get_rtnl(dev))
@@ -1169,10 +1190,9 @@ int fib_sync_up(struct net_device *dev)
}
/*
- The algorithm is suboptimal, but it provides really
- fair weighted route distribution.
+ * The algorithm is suboptimal, but it provides really
+ * fair weighted route distribution.
*/
-
void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
{
struct fib_info *fi = res->fi;
@@ -1182,7 +1202,7 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
if (fi->fib_power <= 0) {
int power = 0;
change_nexthops(fi) {
- if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
+ if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
power += nexthop_nh->nh_weight;
nexthop_nh->nh_power = nexthop_nh->nh_weight;
}
@@ -1198,15 +1218,16 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
/* w should be random number [0..fi->fib_power-1],
- it is pretty bad approximation.
+ * it is pretty bad approximation.
*/
w = jiffies % fi->fib_power;
change_nexthops(fi) {
- if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) &&
+ if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
nexthop_nh->nh_power) {
- if ((w -= nexthop_nh->nh_power) <= 0) {
+ w -= nexthop_nh->nh_power;
+ if (w <= 0) {
nexthop_nh->nh_power--;
fi->fib_power--;
res->nh_sel = nhsel;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4a8e370862b..cd5e13aee7d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -186,9 +186,7 @@ static inline struct tnode *node_parent_rcu(struct node *node)
{
struct tnode *ret = node_parent(node);
- return rcu_dereference_check(ret,
- rcu_read_lock_held() ||
- lockdep_rtnl_is_held());
+ return rcu_dereference_rtnl(ret);
}
/* Same as rcu_assign_pointer
@@ -211,9 +209,7 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
{
struct node *ret = tnode_get_child(tn, i);
- return rcu_dereference_check(ret,
- rcu_read_lock_held() ||
- lockdep_rtnl_is_held());
+ return rcu_dereference_rtnl(ret);
}
static inline int tnode_child_length(const struct tnode *tn)
@@ -459,8 +455,8 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
tn->empty_children = 1<<bits;
}
- pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
- (unsigned long) (sizeof(struct node) << bits));
+ pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
+ sizeof(struct node) << bits);
return tn;
}
@@ -609,11 +605,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* Keep root node larger */
- if (!node_parent((struct node*) tn)) {
+ if (!node_parent((struct node *)tn)) {
inflate_threshold_use = inflate_threshold_root;
halve_threshold_use = halve_threshold_root;
- }
- else {
+ } else {
inflate_threshold_use = inflate_threshold;
halve_threshold_use = halve_threshold;
}
@@ -639,7 +634,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
check_tnode(tn);
/* Return if at least one inflate is run */
- if( max_work != MAX_WORK)
+ if (max_work != MAX_WORK)
return (struct node *) tn;
/*
@@ -966,9 +961,7 @@ fib_find_node(struct trie *t, u32 key)
struct node *n;
pos = 0;
- n = rcu_dereference_check(t->trie,
- rcu_read_lock_held() ||
- lockdep_rtnl_is_held());
+ n = rcu_dereference_rtnl(t->trie);
while (n != NULL && NODE_TYPE(n) == T_TNODE) {
tn = (struct tnode *) n;
@@ -1349,7 +1342,7 @@ err:
/* should be called with rcu_read_lock */
static int check_leaf(struct trie *t, struct leaf *l,
t_key key, const struct flowi *flp,
- struct fib_result *res)
+ struct fib_result *res, int fib_flags)
{
struct leaf_info *li;
struct hlist_head *hhead = &l->list;
@@ -1363,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l,
if (l->key != (key & ntohl(mask)))
continue;
- err = fib_semantic_match(&li->falh, flp, res, plen);
+ err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags);
#ifdef CONFIG_IP_FIB_TRIE_STATS
if (err <= 0)
@@ -1379,7 +1372,7 @@ static int check_leaf(struct trie *t, struct leaf *l,
}
int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
- struct fib_result *res)
+ struct fib_result *res, int fib_flags)
{
struct trie *t = (struct trie *) tb->tb_data;
int ret;
@@ -1391,8 +1384,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
t_key cindex = 0;
int current_prefix_length = KEYLENGTH;
struct tnode *cn;
- t_key node_prefix, key_prefix, pref_mismatch;
- int mp;
+ t_key pref_mismatch;
rcu_read_lock();
@@ -1406,7 +1398,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
/* Just a leaf? */
if (IS_LEAF(n)) {
- ret = check_leaf(t, (struct leaf *)n, key, flp, res);
+ ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
goto found;
}
@@ -1431,7 +1423,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
}
if (IS_LEAF(n)) {
- ret = check_leaf(t, (struct leaf *)n, key, flp, res);
+ ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
if (ret > 0)
goto backtrace;
goto found;
@@ -1507,10 +1499,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
* matching prefix.
*/
- node_prefix = mask_pfx(cn->key, cn->pos);
- key_prefix = mask_pfx(key, cn->pos);
- pref_mismatch = key_prefix^node_prefix;
- mp = 0;
+ pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
/*
* In short: If skipped bits in this node do not match
@@ -1518,13 +1507,9 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
* state.directly.
*/
if (pref_mismatch) {
- while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
- mp++;
- pref_mismatch = pref_mismatch << 1;
- }
- key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
+ int mp = KEYLENGTH - fls(pref_mismatch);
- if (key_prefix != 0)
+ if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
goto backtrace;
if (current_prefix_length >= cn->pos)
@@ -1748,16 +1733,14 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
/* Node empty, walk back up to parent */
c = (struct node *) p;
- } while ( (p = node_parent_rcu(c)) != NULL);
+ } while ((p = node_parent_rcu(c)) != NULL);
return NULL; /* Root of trie */
}
static struct leaf *trie_firstleaf(struct trie *t)
{
- struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie,
- rcu_read_lock_held() ||
- lockdep_rtnl_is_held());
+ struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
if (!n)
return NULL;
@@ -1855,7 +1838,8 @@ void fib_table_select_default(struct fib_table *tb,
if (!next_fi->fib_nh[0].nh_gw ||
next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
continue;
- fa->fa_state |= FA_S_ACCESSED;
+
+ fib_alias_accessed(fa);
if (fi == NULL) {
if (next_fi != res->fi)
@@ -2043,14 +2027,14 @@ struct fib_trie_iter {
struct seq_net_private p;
struct fib_table *tb;
struct tnode *tnode;
- unsigned index;
- unsigned depth;
+ unsigned int index;
+ unsigned int depth;
};
static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
{
struct tnode *tn = iter->tnode;
- unsigned cindex = iter->index;
+ unsigned int cindex = iter->index;
struct tnode *p;
/* A single entry routing table */
@@ -2159,7 +2143,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
*/
static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
{
- unsigned i, max, pointers, bytes, avdepth;
+ unsigned int i, max, pointers, bytes, avdepth;
if (stat->leaves)
avdepth = stat->totdepth*100 / stat->leaves;
@@ -2356,7 +2340,8 @@ static void fib_trie_seq_stop(struct seq_file *seq, void *v)
static void seq_indent(struct seq_file *seq, int n)
{
- while (n-- > 0) seq_puts(seq, " ");
+ while (n-- > 0)
+ seq_puts(seq, " ");
}
static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
@@ -2388,7 +2373,7 @@ static const char *const rtn_type_names[__RTN_MAX] = {
[RTN_XRESOLVE] = "XRESOLVE",
};
-static inline const char *rtn_type(char *buf, size_t len, unsigned t)
+static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
{
if (t < __RTN_MAX && rtn_type_names[t])
return rtn_type_names[t];
@@ -2544,13 +2529,12 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
rcu_read_unlock();
}
-static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
+static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
{
- static unsigned type2flags[RTN_MAX + 1] = {
- [7] = RTF_REJECT, [8] = RTF_REJECT,
- };
- unsigned flags = type2flags[type];
+ unsigned int flags = 0;
+ if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
+ flags = RTF_REJECT;
if (fi && fi->fib_nh->nh_gw)
flags |= RTF_GATEWAY;
if (mask == htonl(0xFFFFFFFF))
@@ -2562,7 +2546,7 @@ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
/*
* This outputs /proc/net/route.
* The format of the file is not supposed to be changed
- * and needs to be same as fib_hash output to avoid breaking
+ * and needs to be same as fib_hash output to avoid breaking
* legacy utilities
*/
static int fib_route_seq_show(struct seq_file *seq, void *v)
@@ -2587,7 +2571,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
list_for_each_entry_rcu(fa, &li->falh, fa_list) {
const struct fib_info *fi = fa->fa_info;
- unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
+ unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
int len;
if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
new file mode 100644
index 00000000000..caea6885fdb
--- /dev/null
+++ b/net/ipv4/gre.c
@@ -0,0 +1,151 @@
+/*
+ * GRE over IPv4 demultiplexer driver
+ *
+ * Authors: Dmitry Kozlov (xeb@mail.ru)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kmod.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <linux/spinlock.h>
+#include <net/protocol.h>
+#include <net/gre.h>
+
+
+static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
+static DEFINE_SPINLOCK(gre_proto_lock);
+
+int gre_add_protocol(const struct gre_protocol *proto, u8 version)
+{
+ if (version >= GREPROTO_MAX)
+ goto err_out;
+
+ spin_lock(&gre_proto_lock);
+ if (gre_proto[version])
+ goto err_out_unlock;
+
+ rcu_assign_pointer(gre_proto[version], proto);
+ spin_unlock(&gre_proto_lock);
+ return 0;
+
+err_out_unlock:
+ spin_unlock(&gre_proto_lock);
+err_out:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(gre_add_protocol);
+
+int gre_del_protocol(const struct gre_protocol *proto, u8 version)
+{
+ if (version >= GREPROTO_MAX)
+ goto err_out;
+
+ spin_lock(&gre_proto_lock);
+ if (gre_proto[version] != proto)
+ goto err_out_unlock;
+ rcu_assign_pointer(gre_proto[version], NULL);
+ spin_unlock(&gre_proto_lock);
+ synchronize_rcu();
+ return 0;
+
+err_out_unlock:
+ spin_unlock(&gre_proto_lock);
+err_out:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(gre_del_protocol);
+
+static int gre_rcv(struct sk_buff *skb)
+{
+ const struct gre_protocol *proto;
+ u8 ver;
+ int ret;
+
+ if (!pskb_may_pull(skb, 12))
+ goto drop;
+
+ ver = skb->data[1]&0x7f;
+ if (ver >= GREPROTO_MAX)
+ goto drop;
+
+ rcu_read_lock();
+ proto = rcu_dereference(gre_proto[ver]);
+ if (!proto || !proto->handler)
+ goto drop_unlock;
+ ret = proto->handler(skb);
+ rcu_read_unlock();
+ return ret;
+
+drop_unlock:
+ rcu_read_unlock();
+drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+static void gre_err(struct sk_buff *skb, u32 info)
+{
+ const struct gre_protocol *proto;
+ u8 ver;
+
+ if (!pskb_may_pull(skb, 12))
+ goto drop;
+
+ ver = skb->data[1]&0x7f;
+ if (ver >= GREPROTO_MAX)
+ goto drop;
+
+ rcu_read_lock();
+ proto = rcu_dereference(gre_proto[ver]);
+ if (!proto || !proto->err_handler)
+ goto drop_unlock;
+ proto->err_handler(skb, info);
+ rcu_read_unlock();
+ return;
+
+drop_unlock:
+ rcu_read_unlock();
+drop:
+ kfree_skb(skb);
+}
+
+static const struct net_protocol net_gre_protocol = {
+ .handler = gre_rcv,
+ .err_handler = gre_err,
+ .netns_ok = 1,
+};
+
+static int __init gre_init(void)
+{
+ pr_info("GRE over IPv4 demultiplexor driver");
+
+ if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
+ pr_err("gre: can't add protocol\n");
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void __exit gre_exit(void)
+{
+ inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
+}
+
+module_init(gre_init);
+module_exit(gre_exit);
+
+MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
+MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_LICENSE("GPL");
+
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a0d847c7cba..96bc7f9475a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
inet->tos = ip_hdr(skb)->tos;
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
- ipc.shtx.flags = 0;
+ ipc.tx_flags = 0;
if (icmp_param->replyopts.optlen) {
ipc.opt = &icmp_param->replyopts;
if (ipc.opt->srr)
@@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
inet_sk(sk)->tos = tos;
ipc.addr = iph->saddr;
ipc.opt = &icmp_param.replyopts;
- ipc.shtx.flags = 0;
+ ipc.tx_flags = 0;
{
struct flowi fl = {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2a4bb76f213..c8877c6c721 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1269,14 +1269,14 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
- igmp_mod_timer(im, IGMP_Initial_Report_Delay);
- return;
- }
- /* else, v3 */
- im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
- IGMP_Unsolicited_Report_Count;
- igmp_ifc_event(in_dev);
+ /* a failover is happening and switches
+ * must be notified immediately */
+ if (IGMP_V1_SEEN(in_dev))
+ igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
+ else if (IGMP_V2_SEEN(in_dev))
+ igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
+ else
+ igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
#endif
}
EXPORT_SYMBOL(ip_mc_rejoin_group);
@@ -1418,6 +1418,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
write_unlock_bh(&in_dev->mc_list_lock);
}
+/* RTNL is locked */
static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
{
struct flowi fl = { .nl_u = { .ip4_u =
@@ -1428,15 +1429,12 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
if (imr->imr_ifindex) {
idev = inetdev_by_index(net, imr->imr_ifindex);
- if (idev)
- __in_dev_put(idev);
return idev;
}
if (imr->imr_address.s_addr) {
- dev = ip_dev_find(net, imr->imr_address.s_addr);
+ dev = __ip_dev_find(net, imr->imr_address.s_addr, false);
if (!dev)
return NULL;
- dev_put(dev);
}
if (!dev && !ip_route_output_key(net, &rt, &fl)) {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e5fa2ddce32..ba804266584 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -425,7 +425,7 @@ static int inet_diag_bc_run(const void *bc, int len,
bc += op->no;
}
}
- return (len == 0);
+ return len == 0;
}
static int valid_cc(const void *bc, int len, int cc)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fb7ad5a21ff..1b344f30b46 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk)
}
EXPORT_SYMBOL(inet_put_port);
-void __inet_inherit_port(struct sock *sk, struct sock *child)
+int __inet_inherit_port(struct sock *sk, struct sock *child)
{
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
- const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num,
+ unsigned short port = inet_sk(child)->inet_num;
+ const int bhash = inet_bhashfn(sock_net(sk), port,
table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
+ if (tb->port != port) {
+ /* NOTE: using tproxy and redirecting skbs to a proxy
+ * on a different listener port breaks the assumption
+ * that the listener socket's icsk_bind_hash is the same
+ * as that of the child socket. We have to look up or
+ * create a new bind bucket for the child here. */
+ struct hlist_node *node;
+ inet_bind_bucket_for_each(tb, node, &head->chain) {
+ if (net_eq(ib_net(tb), sock_net(sk)) &&
+ tb->port == port)
+ break;
+ }
+ if (!node) {
+ tb = inet_bind_bucket_create(table->bind_bucket_cachep,
+ sock_net(sk), head, port);
+ if (!tb) {
+ spin_unlock(&head->lock);
+ return -ENOMEM;
+ }
+ }
+ }
sk_add_bind_node(child, &tb->owners);
inet_csk(child)->icsk_bind_hash = tb;
spin_unlock(&head->lock);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b7c41654dde..168440834ad 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -116,11 +116,11 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
struct ip4_create_arg *arg = a;
qp = container_of(q, struct ipq, q);
- return (qp->id == arg->iph->id &&
+ return qp->id == arg->iph->id &&
qp->saddr == arg->iph->saddr &&
qp->daddr == arg->iph->daddr &&
qp->protocol == arg->iph->protocol &&
- qp->user == arg->user);
+ qp->user == arg->user;
}
/* Memory Tracking Functions. */
@@ -542,7 +542,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
- if (skb_has_frags(head)) {
+ if (skb_has_frag_list(head)) {
struct sk_buff *clone;
int i, plen = 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 35c93e8b6a4..d0ffcbe369b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
+#include <net/gre.h>
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
#include <net/ipv6.h>
@@ -63,13 +64,13 @@
We cannot track such dead loops during route installation,
it is infeasible task. The most general solutions would be
to keep skb->encapsulation counter (sort of local ttl),
- and silently drop packet when it expires. It is the best
+ and silently drop packet when it expires. It is a good
solution, but it supposes maintaing new variable in ALL
skb, even if no tunneling is used.
- Current solution: HARD_TX_LOCK lock breaks dead loops.
-
-
+ Current solution: xmit_recursion breaks dead loops. This is a percpu
+ counter, since when we enter the first ndo_xmit(), cpu migration is
+ forbidden. We force an exit if this counter reaches RECURSION_LIMIT
2. Networking dead loops would not kill routers, but would really
kill network. IP hop limit plays role of "t->recursion" in this case,
@@ -128,7 +129,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev);
static int ipgre_net_id __read_mostly;
struct ipgre_net {
- struct ip_tunnel *tunnels[4][HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
struct net_device *fb_tunnel_dev;
};
@@ -158,13 +159,40 @@ struct ipgre_net {
#define tunnels_l tunnels[1]
#define tunnels_wc tunnels[0]
/*
- * Locking : hash tables are protected by RCU and a spinlock
+ * Locking : hash tables are protected by RCU and RTNL
*/
-static DEFINE_SPINLOCK(ipgre_lock);
#define for_each_ip_tunnel_rcu(start) \
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+};
+
+static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
+{
+ struct pcpu_tstats sum = { 0 };
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+
+ sum.rx_packets += tstats->rx_packets;
+ sum.rx_bytes += tstats->rx_bytes;
+ sum.tx_packets += tstats->tx_packets;
+ sum.tx_bytes += tstats->tx_bytes;
+ }
+ dev->stats.rx_packets = sum.rx_packets;
+ dev->stats.rx_bytes = sum.rx_bytes;
+ dev->stats.tx_packets = sum.tx_packets;
+ dev->stats.tx_bytes = sum.tx_bytes;
+ return &dev->stats;
+}
+
/* Given src, dst and key, find appropriate for input tunnel. */
static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
@@ -173,8 +201,8 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
{
struct net *net = dev_net(dev);
int link = dev->ifindex;
- unsigned h0 = HASH(remote);
- unsigned h1 = HASH(key);
+ unsigned int h0 = HASH(remote);
+ unsigned int h1 = HASH(key);
struct ip_tunnel *t, *cand = NULL;
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
@@ -289,13 +317,13 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
return NULL;
}
-static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
+static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
struct ip_tunnel_parm *parms)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
__be32 key = parms->i_key;
- unsigned h = HASH(key);
+ unsigned int h = HASH(key);
int prio = 0;
if (local)
@@ -308,7 +336,7 @@ static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
return &ign->tunnels[prio][h];
}
-static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
+static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
struct ip_tunnel *t)
{
return __ipgre_bucket(ign, &t->parms);
@@ -316,23 +344,22 @@ static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
{
- struct ip_tunnel **tp = ipgre_bucket(ign, t);
+ struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
- spin_lock_bh(&ipgre_lock);
- t->next = *tp;
+ rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
- spin_unlock_bh(&ipgre_lock);
}
static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
{
- struct ip_tunnel **tp;
-
- for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
- if (t == *tp) {
- spin_lock_bh(&ipgre_lock);
- *tp = t->next;
- spin_unlock_bh(&ipgre_lock);
+ struct ip_tunnel __rcu **tp;
+ struct ip_tunnel *iter;
+
+ for (tp = ipgre_bucket(ign, t);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
break;
}
}
@@ -346,10 +373,13 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
__be32 local = parms->iph.saddr;
__be32 key = parms->i_key;
int link = parms->link;
- struct ip_tunnel *t, **tp;
+ struct ip_tunnel *t;
+ struct ip_tunnel __rcu **tp;
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
- for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
+ for (tp = __ipgre_bucket(ign, parms);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next)
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
key == t->parms.i_key &&
@@ -360,7 +390,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
return t;
}
-static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
+static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
struct ip_tunnel_parm *parms, int create)
{
struct ip_tunnel *t, *nt;
@@ -582,7 +612,7 @@ static int ipgre_rcv(struct sk_buff *skb)
if ((tunnel = ipgre_tunnel_lookup(skb->dev,
iph->saddr, iph->daddr, key,
gre_proto))) {
- struct net_device_stats *stats = &tunnel->dev->stats;
+ struct pcpu_tstats *tstats;
secpath_reset(skb);
@@ -606,22 +636,22 @@ static int ipgre_rcv(struct sk_buff *skb)
/* Looped back packet, drop it! */
if (skb_rtable(skb)->fl.iif == 0)
goto drop;
- stats->multicast++;
+ tunnel->dev->stats.multicast++;
skb->pkt_type = PACKET_BROADCAST;
}
#endif
if (((flags&GRE_CSUM) && csum) ||
(!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
- stats->rx_crc_errors++;
- stats->rx_errors++;
+ tunnel->dev->stats.rx_crc_errors++;
+ tunnel->dev->stats.rx_errors++;
goto drop;
}
if (tunnel->parms.i_flags&GRE_SEQ) {
if (!(flags&GRE_SEQ) ||
(tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
- stats->rx_fifo_errors++;
- stats->rx_errors++;
+ tunnel->dev->stats.rx_fifo_errors++;
+ tunnel->dev->stats.rx_errors++;
goto drop;
}
tunnel->i_seqno = seqno + 1;
@@ -630,8 +660,8 @@ static int ipgre_rcv(struct sk_buff *skb)
/* Warning: All skb pointers will be invalidated! */
if (tunnel->dev->type == ARPHRD_ETHER) {
if (!pskb_may_pull(skb, ETH_HLEN)) {
- stats->rx_length_errors++;
- stats->rx_errors++;
+ tunnel->dev->stats.rx_length_errors++;
+ tunnel->dev->stats.rx_errors++;
goto drop;
}
@@ -640,14 +670,19 @@ static int ipgre_rcv(struct sk_buff *skb)
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
}
- skb_tunnel_rx(skb, tunnel->dev);
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+
+ __skb_tunnel_rx(skb, tunnel->dev);
skb_reset_network_header(skb);
ipgre_ecn_decapsulate(iph, skb);
netif_rx(skb);
+
rcu_read_unlock();
- return(0);
+ return 0;
}
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -655,20 +690,19 @@ drop:
rcu_read_unlock();
drop_nolock:
kfree_skb(skb);
- return(0);
+ return 0;
}
static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net_device_stats *stats = &dev->stats;
- struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
+ struct pcpu_tstats *tstats;
struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *tiph;
u8 tos;
__be16 df;
struct rtable *rt; /* Route to the other host */
- struct net_device *tdev; /* Device to other host */
+ struct net_device *tdev; /* Device to other host */
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int gre_hlen;
@@ -690,7 +724,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
/* NBMA tunnel */
if (skb_dst(skb) == NULL) {
- stats->tx_fifo_errors++;
+ dev->stats.tx_fifo_errors++;
goto tx_error;
}
@@ -736,14 +770,20 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
}
{
- struct flowi fl = { .oif = tunnel->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = dst,
- .saddr = tiph->saddr,
- .tos = RT_TOS(tos) } },
- .proto = IPPROTO_GRE };
+ struct flowi fl = {
+ .oif = tunnel->parms.link,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = dst,
+ .saddr = tiph->saddr,
+ .tos = RT_TOS(tos)
+ }
+ },
+ .proto = IPPROTO_GRE
+ }
+;
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
- stats->tx_carrier_errors++;
+ dev->stats.tx_carrier_errors++;
goto tx_error;
}
}
@@ -751,7 +791,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if (tdev == dev) {
ip_rt_put(rt);
- stats->collisions++;
+ dev->stats.collisions++;
goto tx_error;
}
@@ -814,7 +854,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
dev->needed_headroom = max_headroom;
if (!new_skb) {
ip_rt_put(rt);
- txq->tx_dropped++;
+ dev->stats.tx_dropped++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -881,15 +921,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
}
nf_reset(skb);
-
- IPTUNNEL_XMIT();
+ tstats = this_cpu_ptr(dev->tstats);
+ __IPTUNNEL_XMIT(tstats, &dev->stats);
return NETDEV_TX_OK;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
- stats->tx_errors++;
+ dev->stats.tx_errors++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -909,13 +949,19 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
/* Guess output device to choose reasonable mtu and needed_headroom */
if (iph->daddr) {
- struct flowi fl = { .oif = tunnel->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = iph->daddr,
- .saddr = iph->saddr,
- .tos = RT_TOS(iph->tos) } },
- .proto = IPPROTO_GRE };
+ struct flowi fl = {
+ .oif = tunnel->parms.link,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .tos = RT_TOS(iph->tos)
+ }
+ },
+ .proto = IPPROTO_GRE
+ };
struct rtable *rt;
+
if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
tdev = rt->dst.dev;
ip_rt_put(rt);
@@ -1012,7 +1058,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
} else {
- unsigned nflags = 0;
+ unsigned int nflags = 0;
t = netdev_priv(dev);
@@ -1125,7 +1171,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
- const void *daddr, const void *saddr, unsigned len)
+ const void *daddr, const void *saddr, unsigned int len)
{
struct ip_tunnel *t = netdev_priv(dev);
struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
@@ -1167,13 +1213,19 @@ static int ipgre_open(struct net_device *dev)
struct ip_tunnel *t = netdev_priv(dev);
if (ipv4_is_multicast(t->parms.iph.daddr)) {
- struct flowi fl = { .oif = t->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = t->parms.iph.daddr,
- .saddr = t->parms.iph.saddr,
- .tos = RT_TOS(t->parms.iph.tos) } },
- .proto = IPPROTO_GRE };
+ struct flowi fl = {
+ .oif = t->parms.link,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = t->parms.iph.daddr,
+ .saddr = t->parms.iph.saddr,
+ .tos = RT_TOS(t->parms.iph.tos)
+ }
+ },
+ .proto = IPPROTO_GRE
+ };
struct rtable *rt;
+
if (ip_route_output_key(dev_net(dev), &rt, &fl))
return -EADDRNOTAVAIL;
dev = rt->dst.dev;
@@ -1193,10 +1245,8 @@ static int ipgre_close(struct net_device *dev)
if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
struct in_device *in_dev;
in_dev = inetdev_by_index(dev_net(dev), t->mlink);
- if (in_dev) {
+ if (in_dev)
ip_mc_dec_group(in_dev, t->parms.iph.daddr);
- in_dev_put(in_dev);
- }
}
return 0;
}
@@ -1213,12 +1263,19 @@ static const struct net_device_ops ipgre_netdev_ops = {
.ndo_start_xmit = ipgre_tunnel_xmit,
.ndo_do_ioctl = ipgre_tunnel_ioctl,
.ndo_change_mtu = ipgre_tunnel_change_mtu,
+ .ndo_get_stats = ipgre_get_stats,
};
+static void ipgre_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
static void ipgre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipgre_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = ipgre_dev_free;
dev->type = ARPHRD_IPGRE;
dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
@@ -1256,6 +1313,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
} else
dev->header_ops = &ipgre_header_ops;
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
return 0;
}
@@ -1274,14 +1335,13 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
tunnel->hlen = sizeof(struct iphdr) + 4;
dev_hold(dev);
- ign->tunnels_wc[0] = tunnel;
+ rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
}
-static const struct net_protocol ipgre_protocol = {
- .handler = ipgre_rcv,
- .err_handler = ipgre_err,
- .netns_ok = 1,
+static const struct gre_protocol ipgre_protocol = {
+ .handler = ipgre_rcv,
+ .err_handler = ipgre_err,
};
static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
@@ -1291,11 +1351,13 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
for (prio = 0; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t = ign->tunnels[prio][h];
+ struct ip_tunnel *t;
+
+ t = rtnl_dereference(ign->tunnels[prio][h]);
while (t != NULL) {
unregister_netdevice_queue(t->dev, head);
- t = t->next;
+ t = rtnl_dereference(t->next);
}
}
}
@@ -1441,6 +1503,10 @@ static int ipgre_tap_init(struct net_device *dev)
ipgre_tunnel_bind_dev(dev);
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
return 0;
}
@@ -1451,6 +1517,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ipgre_tunnel_change_mtu,
+ .ndo_get_stats = ipgre_get_stats,
};
static void ipgre_tap_setup(struct net_device *dev)
@@ -1459,7 +1526,7 @@ static void ipgre_tap_setup(struct net_device *dev)
ether_setup(dev);
dev->netdev_ops = &ipgre_tap_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = ipgre_dev_free;
dev->iflink = 0;
dev->features |= NETIF_F_NETNS_LOCAL;
@@ -1487,6 +1554,10 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nla
if (!tb[IFLA_MTU])
dev->mtu = mtu;
+ /* Can use a lockless transmit, unless we generate output sequences */
+ if (!(nt->parms.o_flags & GRE_SEQ))
+ dev->features |= NETIF_F_LLTX;
+
err = register_netdevice(dev);
if (err)
goto out;
@@ -1522,7 +1593,7 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
t = nt;
if (dev->type != ARPHRD_ETHER) {
- unsigned nflags = 0;
+ unsigned int nflags = 0;
if (ipv4_is_multicast(p.iph.daddr))
nflags = IFF_BROADCAST;
@@ -1663,7 +1734,7 @@ static int __init ipgre_init(void)
if (err < 0)
return err;
- err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
+ err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
if (err < 0) {
printk(KERN_INFO "ipgre init: can't add protocol\n");
goto add_proto_failed;
@@ -1683,7 +1754,7 @@ out:
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
- inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
add_proto_failed:
unregister_pernet_device(&ipgre_net_ops);
goto out;
@@ -1693,7 +1764,7 @@ static void __exit ipgre_fini(void)
{
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops);
- if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
+ if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
printk(KERN_INFO "ipgre close: can't remove protocol\n");
unregister_pernet_device(&ipgre_net_ops);
}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ba9836c488e..1906fa35860 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -466,7 +466,7 @@ error:
}
return -EINVAL;
}
-
+EXPORT_SYMBOL(ip_options_compile);
/*
* Undo all the changes done by ip_options_compile().
@@ -646,3 +646,4 @@ int ip_options_rcv_srr(struct sk_buff *skb)
}
return 0;
}
+EXPORT_SYMBOL(ip_options_rcv_srr);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7649d775007..439d2a34ee4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -487,7 +487,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
* LATER: this step can be merged to real generation of fragments,
* we can switch to copy when see the first bad fragment.
*/
- if (skb_has_frags(skb)) {
+ if (skb_has_frag_list(skb)) {
struct sk_buff *frag, *frag2;
int first_len = skb_pagelen(skb);
@@ -844,10 +844,9 @@ int ip_append_data(struct sock *sk,
inet->cork.length = 0;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
- if ((exthdrlen = rt->dst.header_len) != 0) {
- length += exthdrlen;
- transhdrlen += exthdrlen;
- }
+ exthdrlen = rt->dst.header_len;
+ length += exthdrlen;
+ transhdrlen += exthdrlen;
} else {
rt = (struct rtable *)inet->cork.dst;
if (inet->cork.flags & IPCORK_OPT)
@@ -934,16 +933,19 @@ alloc_new_skb:
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
else
- alloclen = datalen + fragheaderlen;
+ alloclen = fraglen;
/* The last fragment gets additional space at tail.
* Note, with MSG_MORE we overallocate on fragments,
* because we have no idea what fragment will be
* the last.
*/
- if (datalen == length + fraggap)
+ if (datalen == length + fraggap) {
alloclen += rt->dst.trailer_len;
-
+ /* make sure mtu is not reached */
+ if (datalen > mtu - fragheaderlen - rt->dst.trailer_len)
+ datalen -= ALIGN(rt->dst.trailer_len, 8);
+ }
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len + 15,
@@ -960,7 +962,7 @@ alloc_new_skb:
else
/* only the initial fragment is
time stamped */
- ipc->shtx.flags = 0;
+ ipc->tx_flags = 0;
}
if (skb == NULL)
goto error;
@@ -971,7 +973,7 @@ alloc_new_skb:
skb->ip_summed = csummode;
skb->csum = 0;
skb_reserve(skb, hh_len);
- *skb_tx(skb) = ipc->shtx;
+ skb_shinfo(skb)->tx_flags = ipc->tx_flags;
/*
* Find where to start putting bytes.
@@ -1391,7 +1393,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
- ipc.shtx.flags = 0;
+ ipc.tx_flags = 0;
if (replyopts.opt.optlen) {
ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ec036731a70..e9b816e6cd7 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -122,31 +122,59 @@
static int ipip_net_id __read_mostly;
struct ipip_net {
- struct ip_tunnel *tunnels_r_l[HASH_SIZE];
- struct ip_tunnel *tunnels_r[HASH_SIZE];
- struct ip_tunnel *tunnels_l[HASH_SIZE];
- struct ip_tunnel *tunnels_wc[1];
- struct ip_tunnel **tunnels[4];
+ struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_wc[1];
+ struct ip_tunnel __rcu **tunnels[4];
struct net_device *fb_tunnel_dev;
};
-static void ipip_tunnel_init(struct net_device *dev);
+static int ipip_tunnel_init(struct net_device *dev);
static void ipip_tunnel_setup(struct net_device *dev);
+static void ipip_dev_free(struct net_device *dev);
/*
- * Locking : hash tables are protected by RCU and a spinlock
+ * Locking : hash tables are protected by RCU and RTNL
*/
-static DEFINE_SPINLOCK(ipip_lock);
#define for_each_ip_tunnel_rcu(start) \
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+ unsigned long rx_packets;
+ unsigned long rx_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+};
+
+static struct net_device_stats *ipip_get_stats(struct net_device *dev)
+{
+ struct pcpu_tstats sum = { 0 };
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+
+ sum.rx_packets += tstats->rx_packets;
+ sum.rx_bytes += tstats->rx_bytes;
+ sum.tx_packets += tstats->tx_packets;
+ sum.tx_bytes += tstats->tx_bytes;
+ }
+ dev->stats.rx_packets = sum.rx_packets;
+ dev->stats.rx_bytes = sum.rx_bytes;
+ dev->stats.tx_packets = sum.tx_packets;
+ dev->stats.tx_bytes = sum.tx_bytes;
+ return &dev->stats;
+}
+
static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
__be32 remote, __be32 local)
{
- unsigned h0 = HASH(remote);
- unsigned h1 = HASH(local);
+ unsigned int h0 = HASH(remote);
+ unsigned int h1 = HASH(local);
struct ip_tunnel *t;
struct ipip_net *ipn = net_generic(net, ipip_net_id);
@@ -169,12 +197,12 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
return NULL;
}
-static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
+static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
struct ip_tunnel_parm *parms)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
- unsigned h = 0;
+ unsigned int h = 0;
int prio = 0;
if (remote) {
@@ -188,7 +216,7 @@ static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
return &ipn->tunnels[prio][h];
}
-static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
+static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
struct ip_tunnel *t)
{
return __ipip_bucket(ipn, &t->parms);
@@ -196,13 +224,14 @@ static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
{
- struct ip_tunnel **tp;
-
- for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
- if (t == *tp) {
- spin_lock_bh(&ipip_lock);
- *tp = t->next;
- spin_unlock_bh(&ipip_lock);
+ struct ip_tunnel __rcu **tp;
+ struct ip_tunnel *iter;
+
+ for (tp = ipip_bucket(ipn, t);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
break;
}
}
@@ -210,12 +239,10 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
{
- struct ip_tunnel **tp = ipip_bucket(ipn, t);
+ struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
- spin_lock_bh(&ipip_lock);
- t->next = *tp;
+ rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
- spin_unlock_bh(&ipip_lock);
}
static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -223,12 +250,15 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
- struct ip_tunnel *t, **tp, *nt;
+ struct ip_tunnel *t, *nt;
+ struct ip_tunnel __rcu **tp;
struct net_device *dev;
char name[IFNAMSIZ];
struct ipip_net *ipn = net_generic(net, ipip_net_id);
- for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) {
+ for (tp = __ipip_bucket(ipn, parms);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next) {
if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
return t;
}
@@ -238,7 +268,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
if (parms->name[0])
strlcpy(name, parms->name, IFNAMSIZ);
else
- sprintf(name, "tunl%%d");
+ strcpy(name, "tunl%d");
dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
if (dev == NULL)
@@ -254,7 +284,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
nt = netdev_priv(dev);
nt->parms = *parms;
- ipip_tunnel_init(dev);
+ if (ipip_tunnel_init(dev) < 0)
+ goto failed_free;
if (register_netdevice(dev) < 0)
goto failed_free;
@@ -264,20 +295,19 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
return nt;
failed_free:
- free_netdev(dev);
+ ipip_dev_free(dev);
return NULL;
}
+/* called with RTNL */
static void ipip_tunnel_uninit(struct net_device *dev)
{
struct net *net = dev_net(dev);
struct ipip_net *ipn = net_generic(net, ipip_net_id);
- if (dev == ipn->fb_tunnel_dev) {
- spin_lock_bh(&ipip_lock);
- ipn->tunnels_wc[0] = NULL;
- spin_unlock_bh(&ipip_lock);
- } else
+ if (dev == ipn->fb_tunnel_dev)
+ rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
+ else
ipip_tunnel_unlink(ipn, netdev_priv(dev));
dev_put(dev);
}
@@ -359,8 +389,10 @@ static int ipip_rcv(struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
rcu_read_lock();
- if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
- iph->saddr, iph->daddr)) != NULL) {
+ tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
+ if (tunnel != NULL) {
+ struct pcpu_tstats *tstats;
+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
rcu_read_unlock();
kfree_skb(skb);
@@ -374,10 +406,16 @@ static int ipip_rcv(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IP);
skb->pkt_type = PACKET_HOST;
- skb_tunnel_rx(skb, tunnel->dev);
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+
+ __skb_tunnel_rx(skb, tunnel->dev);
ipip_ecn_decapsulate(iph, skb);
+
netif_rx(skb);
+
rcu_read_unlock();
return 0;
}
@@ -394,13 +432,12 @@ static int ipip_rcv(struct sk_buff *skb)
static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net_device_stats *stats = &dev->stats;
- struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
+ struct pcpu_tstats *tstats;
struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
__be16 df = tiph->frag_off;
struct rtable *rt; /* Route to the other host */
- struct net_device *tdev; /* Device to other host */
+ struct net_device *tdev; /* Device to other host */
struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
@@ -410,13 +447,13 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (skb->protocol != htons(ETH_P_IP))
goto tx_error;
- if (tos&1)
+ if (tos & 1)
tos = old_iph->tos;
if (!dst) {
/* NBMA tunnel */
if ((rt = skb_rtable(skb)) == NULL) {
- stats->tx_fifo_errors++;
+ dev->stats.tx_fifo_errors++;
goto tx_error;
}
if ((dst = rt->rt_gateway) == 0)
@@ -424,14 +461,20 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
}
{
- struct flowi fl = { .oif = tunnel->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = dst,
- .saddr = tiph->saddr,
- .tos = RT_TOS(tos) } },
- .proto = IPPROTO_IPIP };
+ struct flowi fl = {
+ .oif = tunnel->parms.link,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = dst,
+ .saddr = tiph->saddr,
+ .tos = RT_TOS(tos)
+ }
+ },
+ .proto = IPPROTO_IPIP
+ };
+
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
- stats->tx_carrier_errors++;
+ dev->stats.tx_carrier_errors++;
goto tx_error_icmp;
}
}
@@ -439,7 +482,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (tdev == dev) {
ip_rt_put(rt);
- stats->collisions++;
+ dev->stats.collisions++;
goto tx_error;
}
@@ -449,7 +492,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (mtu < 68) {
- stats->collisions++;
+ dev->stats.collisions++;
ip_rt_put(rt);
goto tx_error;
}
@@ -485,7 +528,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
if (!new_skb) {
ip_rt_put(rt);
- txq->tx_dropped++;
+ dev->stats.tx_dropped++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -522,14 +565,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
iph->ttl = old_iph->ttl;
nf_reset(skb);
-
- IPTUNNEL_XMIT();
+ tstats = this_cpu_ptr(dev->tstats);
+ __IPTUNNEL_XMIT(tstats, &dev->stats);
return NETDEV_TX_OK;
tx_error_icmp:
dst_link_failure(skb);
tx_error:
- stats->tx_errors++;
+ dev->stats.tx_errors++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -544,13 +587,19 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
iph = &tunnel->parms.iph;
if (iph->daddr) {
- struct flowi fl = { .oif = tunnel->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = iph->daddr,
- .saddr = iph->saddr,
- .tos = RT_TOS(iph->tos) } },
- .proto = IPPROTO_IPIP };
+ struct flowi fl = {
+ .oif = tunnel->parms.link,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .tos = RT_TOS(iph->tos)
+ }
+ },
+ .proto = IPPROTO_IPIP
+ };
struct rtable *rt;
+
if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
tdev = rt->dst.dev;
ip_rt_put(rt);
@@ -696,13 +745,19 @@ static const struct net_device_ops ipip_netdev_ops = {
.ndo_start_xmit = ipip_tunnel_xmit,
.ndo_do_ioctl = ipip_tunnel_ioctl,
.ndo_change_mtu = ipip_tunnel_change_mtu,
-
+ .ndo_get_stats = ipip_get_stats,
};
+static void ipip_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
static void ipip_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipip_netdev_ops;
- dev->destructor = free_netdev;
+ dev->destructor = ipip_dev_free;
dev->type = ARPHRD_TUNNEL;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -711,10 +766,11 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->features |= NETIF_F_LLTX;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
-static void ipip_tunnel_init(struct net_device *dev)
+static int ipip_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -725,9 +781,15 @@ static void ipip_tunnel_init(struct net_device *dev)
memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
ipip_tunnel_bind_dev(dev);
+
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ return 0;
}
-static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
+static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
@@ -740,11 +802,16 @@ static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
dev_hold(dev);
- ipn->tunnels_wc[0] = tunnel;
+ rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
+ return 0;
}
-static struct xfrm_tunnel ipip_handler = {
+static struct xfrm_tunnel ipip_handler __read_mostly = {
.handler = ipip_rcv,
.err_handler = ipip_err,
.priority = 1,
@@ -760,11 +827,12 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
for (prio = 1; prio < 4; prio++) {
int h;
for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t = ipn->tunnels[prio][h];
+ struct ip_tunnel *t;
+ t = rtnl_dereference(ipn->tunnels[prio][h]);
while (t != NULL) {
unregister_netdevice_queue(t->dev, head);
- t = t->next;
+ t = rtnl_dereference(t->next);
}
}
}
@@ -789,7 +857,9 @@ static int __net_init ipip_init_net(struct net *net)
}
dev_net_set(ipn->fb_tunnel_dev, net);
- ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
+ err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
+ if (err)
+ goto err_reg_dev;
if ((err = register_netdev(ipn->fb_tunnel_dev)))
goto err_reg_dev;
@@ -797,7 +867,7 @@ static int __net_init ipip_init_net(struct net *net)
return 0;
err_reg_dev:
- free_netdev(ipn->fb_tunnel_dev);
+ ipip_dev_free(ipn->fb_tunnel_dev);
err_alloc_dev:
/* nothing */
return err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 179fcab866f..86dd5691af4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -75,7 +75,7 @@ struct mr_table {
struct net *net;
#endif
u32 id;
- struct sock *mroute_sk;
+ struct sock __rcu *mroute_sk;
struct timer_list ipmr_expire_timer;
struct list_head mfc_unres_queue;
struct list_head mfc_cache_array[MFC_LINES];
@@ -98,7 +98,7 @@ struct ipmr_result {
};
/* Big lock, protecting vif table, mrt cache and mroute socket state.
- Note that the changes are semaphored via rtnl_lock.
+ * Note that the changes are semaphored via rtnl_lock.
*/
static DEFINE_RWLOCK(mrt_lock);
@@ -113,11 +113,11 @@ static DEFINE_RWLOCK(mrt_lock);
static DEFINE_SPINLOCK(mfc_unres_lock);
/* We return to original Alan's scheme. Hash table of resolved
- entries is changed only in process context and protected
- with weak lock mrt_lock. Queue of unresolved entries is protected
- with strong spinlock mfc_unres_lock.
-
- In this case data path is free of exclusive locks at all.
+ * entries is changed only in process context and protected
+ * with weak lock mrt_lock. Queue of unresolved entries is protected
+ * with strong spinlock mfc_unres_lock.
+ *
+ * In this case data path is free of exclusive locks at all.
*/
static struct kmem_cache *mrt_cachep __read_mostly;
@@ -396,9 +396,9 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
set_fs(KERNEL_DS);
err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
set_fs(oldfs);
- } else
+ } else {
err = -EOPNOTSUPP;
-
+ }
dev = NULL;
if (err == 0 &&
@@ -495,7 +495,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
dev->iflink = 0;
rcu_read_lock();
- if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
+ in_dev = __in_dev_get_rcu(dev);
+ if (!in_dev) {
rcu_read_unlock();
goto failure;
}
@@ -552,9 +553,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
mrt->mroute_reg_vif_num = -1;
#endif
- if (vifi+1 == mrt->maxvif) {
+ if (vifi + 1 == mrt->maxvif) {
int tmp;
- for (tmp=vifi-1; tmp>=0; tmp--) {
+
+ for (tmp = vifi - 1; tmp >= 0; tmp--) {
if (VIF_EXISTS(mrt, tmp))
break;
}
@@ -565,25 +567,33 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
dev_set_allmulti(dev, -1);
- if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
+ in_dev = __in_dev_get_rtnl(dev);
+ if (in_dev) {
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
ip_rt_multicast_event(in_dev);
}
- if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
+ if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
unregister_netdevice_queue(dev, head);
dev_put(dev);
return 0;
}
-static inline void ipmr_cache_free(struct mfc_cache *c)
+static void ipmr_cache_free_rcu(struct rcu_head *head)
{
+ struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+
kmem_cache_free(mrt_cachep, c);
}
+static inline void ipmr_cache_free(struct mfc_cache *c)
+{
+ call_rcu(&c->rcu, ipmr_cache_free_rcu);
+}
+
/* Destroy an unresolved cache entry, killing queued skbs
- and reporting error to netlink readers.
+ * and reporting error to netlink readers.
*/
static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
@@ -605,8 +615,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
memset(&e->msg, 0, sizeof(e->msg));
rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
- } else
+ } else {
kfree_skb(skb);
+ }
}
ipmr_cache_free(c);
@@ -724,13 +735,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
case 0:
if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
- if (dev && dev->ip_ptr == NULL) {
+ if (dev && __in_dev_get_rtnl(dev) == NULL) {
dev_put(dev);
return -EADDRNOTAVAIL;
}
- } else
+ } else {
dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
-
+ }
if (!dev)
return -EADDRNOTAVAIL;
err = dev_set_allmulti(dev, 1);
@@ -743,16 +754,16 @@ static int vif_add(struct net *net, struct mr_table *mrt,
return -EINVAL;
}
- if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
+ in_dev = __in_dev_get_rtnl(dev);
+ if (!in_dev) {
dev_put(dev);
return -EADDRNOTAVAIL;
}
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
ip_rt_multicast_event(in_dev);
- /*
- * Fill in the VIF structures
- */
+ /* Fill in the VIF structures */
+
v->rate_limit = vifc->vifc_rate_limit;
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -765,14 +776,14 @@ static int vif_add(struct net *net, struct mr_table *mrt,
v->pkt_in = 0;
v->pkt_out = 0;
v->link = dev->ifindex;
- if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+ if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
v->link = dev->iflink;
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
v->dev = dev;
#ifdef CONFIG_IP_PIMSM
- if (v->flags&VIFF_REGISTER)
+ if (v->flags & VIFF_REGISTER)
mrt->mroute_reg_vif_num = vifi;
#endif
if (vifi+1 > mrt->maxvif)
@@ -781,6 +792,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
return 0;
}
+/* called with rcu_read_lock() */
static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
__be32 origin,
__be32 mcastgrp)
@@ -788,7 +800,7 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
int line = MFC_HASH(mcastgrp, origin);
struct mfc_cache *c;
- list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
+ list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
return c;
}
@@ -801,19 +813,20 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
static struct mfc_cache *ipmr_cache_alloc(void)
{
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
- if (c == NULL)
- return NULL;
- c->mfc_un.res.minvif = MAXVIFS;
+
+ if (c)
+ c->mfc_un.res.minvif = MAXVIFS;
return c;
}
static struct mfc_cache *ipmr_cache_alloc_unres(void)
{
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
- if (c == NULL)
- return NULL;
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10*HZ;
+
+ if (c) {
+ skb_queue_head_init(&c->mfc_un.unres.unresolved);
+ c->mfc_un.unres.expires = jiffies + 10*HZ;
+ }
return c;
}
@@ -827,17 +840,15 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
struct sk_buff *skb;
struct nlmsgerr *e;
- /*
- * Play the pending entries through our router
- */
+ /* Play the pending entries through our router */
while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
- nlh->nlmsg_len = (skb_tail_pointer(skb) -
- (u8 *)nlh);
+ nlh->nlmsg_len = skb_tail_pointer(skb) -
+ (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -848,8 +859,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
}
rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
- } else
+ } else {
ip_mr_forward(net, mrt, skb, c, 0);
+ }
}
}
@@ -867,6 +879,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
const int ihl = ip_hdrlen(pkt);
struct igmphdr *igmp;
struct igmpmsg *msg;
+ struct sock *mroute_sk;
int ret;
#ifdef CONFIG_IP_PIMSM
@@ -882,9 +895,9 @@ static int ipmr_cache_report(struct mr_table *mrt,
#ifdef CONFIG_IP_PIMSM
if (assert == IGMPMSG_WHOLEPKT) {
/* Ugly, but we have no choice with this interface.
- Duplicate old header, fix ihl, length etc.
- And all this only to mangle msg->im_msgtype and
- to set msg->im_mbz to "mbz" :-)
+ * Duplicate old header, fix ihl, length etc.
+ * And all this only to mangle msg->im_msgtype and
+ * to set msg->im_mbz to "mbz" :-)
*/
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
@@ -901,39 +914,38 @@ static int ipmr_cache_report(struct mr_table *mrt,
#endif
{
- /*
- * Copy the IP header
- */
+ /* Copy the IP header */
skb->network_header = skb->tail;
skb_put(skb, ihl);
skb_copy_to_linear_data(skb, pkt->data, ihl);
- ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
+ ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
skb_dst_set(skb, dst_clone(skb_dst(pkt)));
- /*
- * Add our header
- */
+ /* Add our header */
- igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
+ igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
igmp->type =
msg->im_msgtype = assert;
- igmp->code = 0;
- ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
+ igmp->code = 0;
+ ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
skb->transport_header = skb->network_header;
}
- if (mrt->mroute_sk == NULL) {
+ rcu_read_lock();
+ mroute_sk = rcu_dereference(mrt->mroute_sk);
+ if (mroute_sk == NULL) {
+ rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
- /*
- * Deliver to mrouted
- */
- ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
+ /* Deliver to mrouted */
+
+ ret = sock_queue_rcv_skb(mroute_sk, skb);
+ rcu_read_unlock();
if (ret < 0) {
if (net_ratelimit())
printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -965,9 +977,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
}
if (!found) {
- /*
- * Create a new entry if allowable
- */
+ /* Create a new entry if allowable */
if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
(c = ipmr_cache_alloc_unres()) == NULL) {
@@ -977,16 +987,14 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
return -ENOBUFS;
}
- /*
- * Fill in the new cache entry
- */
+ /* Fill in the new cache entry */
+
c->mfc_parent = -1;
c->mfc_origin = iph->saddr;
c->mfc_mcastgrp = iph->daddr;
- /*
- * Reflect first query at mrouted.
- */
+ /* Reflect first query at mrouted. */
+
err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
if (err < 0) {
/* If the report failed throw the cache entry
@@ -1006,10 +1014,9 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
}
- /*
- * See if we can append the packet
- */
- if (c->mfc_un.unres.unresolved.qlen>3) {
+ /* See if we can append the packet */
+
+ if (c->mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
@@ -1035,9 +1042,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
+ list_del_rcu(&c->list);
ipmr_cache_free(c);
return 0;
@@ -1090,9 +1095,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
- write_lock_bh(&mrt_lock);
- list_add(&c->list, &mrt->mfc_cache_array[line]);
- write_unlock_bh(&mrt_lock);
+ list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
/*
* Check to see if we resolved a queued list. If so we
@@ -1130,26 +1133,21 @@ static void mroute_clean_tables(struct mr_table *mrt)
LIST_HEAD(list);
struct mfc_cache *c, *next;
- /*
- * Shut down all active vif entries
- */
+ /* Shut down all active vif entries */
+
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif_table[i].flags&VIFF_STATIC))
+ if (!(mrt->vif_table[i].flags & VIFF_STATIC))
vif_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
- /*
- * Wipe the cache
- */
+ /* Wipe the cache */
+
for (i = 0; i < MFC_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
- if (c->mfc_flags&MFC_STATIC)
+ if (c->mfc_flags & MFC_STATIC)
continue;
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
-
+ list_del_rcu(&c->list);
ipmr_cache_free(c);
}
}
@@ -1164,6 +1162,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
}
}
+/* called from ip_ra_control(), before an RCU grace period,
+ * we dont need to call synchronize_rcu() here
+ */
static void mrtsock_destruct(struct sock *sk)
{
struct net *net = sock_net(sk);
@@ -1171,13 +1172,9 @@ static void mrtsock_destruct(struct sock *sk)
rtnl_lock();
ipmr_for_each_table(mrt, net) {
- if (sk == mrt->mroute_sk) {
+ if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
-
- write_lock_bh(&mrt_lock);
- mrt->mroute_sk = NULL;
- write_unlock_bh(&mrt_lock);
-
+ rcu_assign_pointer(mrt->mroute_sk, NULL);
mroute_clean_tables(mrt);
}
}
@@ -1204,7 +1201,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
return -ENOENT;
if (optname != MRT_INIT) {
- if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
+ if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
+ !capable(CAP_NET_ADMIN))
return -EACCES;
}
@@ -1217,23 +1215,20 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
return -ENOPROTOOPT;
rtnl_lock();
- if (mrt->mroute_sk) {
+ if (rtnl_dereference(mrt->mroute_sk)) {
rtnl_unlock();
return -EADDRINUSE;
}
ret = ip_ra_control(sk, 1, mrtsock_destruct);
if (ret == 0) {
- write_lock_bh(&mrt_lock);
- mrt->mroute_sk = sk;
- write_unlock_bh(&mrt_lock);
-
+ rcu_assign_pointer(mrt->mroute_sk, sk);
IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
}
rtnl_unlock();
return ret;
case MRT_DONE:
- if (sk != mrt->mroute_sk)
+ if (sk != rcu_dereference_raw(mrt->mroute_sk))
return -EACCES;
return ip_ra_control(sk, 0, NULL);
case MRT_ADD_VIF:
@@ -1246,7 +1241,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
return -ENFILE;
rtnl_lock();
if (optname == MRT_ADD_VIF) {
- ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
+ ret = vif_add(net, mrt, &vif,
+ sk == rtnl_dereference(mrt->mroute_sk));
} else {
ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
}
@@ -1267,7 +1263,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
if (optname == MRT_DEL_MFC)
ret = ipmr_mfc_delete(mrt, &mfc);
else
- ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
+ ret = ipmr_mfc_add(net, mrt, &mfc,
+ sk == rtnl_dereference(mrt->mroute_sk));
rtnl_unlock();
return ret;
/*
@@ -1276,7 +1273,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
case MRT_ASSERT:
{
int v;
- if (get_user(v,(int __user *)optval))
+ if (get_user(v, (int __user *)optval))
return -EFAULT;
mrt->mroute_do_assert = (v) ? 1 : 0;
return 0;
@@ -1286,7 +1283,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
{
int v;
- if (get_user(v,(int __user *)optval))
+ if (get_user(v, (int __user *)optval))
return -EFAULT;
v = (v) ? 1 : 0;
@@ -1309,14 +1306,16 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
return -EINVAL;
if (get_user(v, (u32 __user *)optval))
return -EFAULT;
- if (sk == mrt->mroute_sk)
- return -EBUSY;
rtnl_lock();
ret = 0;
- if (!ipmr_new_table(net, v))
- ret = -ENOMEM;
- raw_sk(sk)->ipmr_table = v;
+ if (sk == rtnl_dereference(mrt->mroute_sk)) {
+ ret = -EBUSY;
+ } else {
+ if (!ipmr_new_table(net, v))
+ ret = -ENOMEM;
+ raw_sk(sk)->ipmr_table = v;
+ }
rtnl_unlock();
return ret;
}
@@ -1347,9 +1346,9 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
if (optname != MRT_VERSION &&
#ifdef CONFIG_IP_PIMSM
- optname!=MRT_PIM &&
+ optname != MRT_PIM &&
#endif
- optname!=MRT_ASSERT)
+ optname != MRT_ASSERT)
return -ENOPROTOOPT;
if (get_user(olr, optlen))
@@ -1416,19 +1415,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
if (c) {
sr.pktcnt = c->mfc_un.res.pkt;
sr.bytecnt = c->mfc_un.res.bytes;
sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -1465,7 +1464,7 @@ static struct notifier_block ip_mr_notifier = {
};
/*
- * Encapsulate a packet by attaching a valid IPIP header to it.
+ * Encapsulate a packet by attaching a valid IPIP header to it.
* This avoids tunnel drivers and other mess and gives us the speed so
* important for multicast video.
*/
@@ -1480,7 +1479,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
skb_reset_network_header(skb);
iph = ip_hdr(skb);
- iph->version = 4;
+ iph->version = 4;
iph->tos = old_iph->tos;
iph->ttl = old_iph->ttl;
iph->frag_off = 0;
@@ -1498,7 +1497,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
static inline int ipmr_forward_finish(struct sk_buff *skb)
{
- struct ip_options * opt = &(IPCB(skb)->opt);
+ struct ip_options *opt = &(IPCB(skb)->opt);
IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
@@ -1535,22 +1534,34 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
}
#endif
- if (vif->flags&VIFF_TUNNEL) {
- struct flowi fl = { .oif = vif->link,
- .nl_u = { .ip4_u =
- { .daddr = vif->remote,
- .saddr = vif->local,
- .tos = RT_TOS(iph->tos) } },
- .proto = IPPROTO_IPIP };
+ if (vif->flags & VIFF_TUNNEL) {
+ struct flowi fl = {
+ .oif = vif->link,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = vif->remote,
+ .saddr = vif->local,
+ .tos = RT_TOS(iph->tos)
+ }
+ },
+ .proto = IPPROTO_IPIP
+ };
+
if (ip_route_output_key(net, &rt, &fl))
goto out_free;
encap = sizeof(struct iphdr);
} else {
- struct flowi fl = { .oif = vif->link,
- .nl_u = { .ip4_u =
- { .daddr = iph->daddr,
- .tos = RT_TOS(iph->tos) } },
- .proto = IPPROTO_IPIP };
+ struct flowi fl = {
+ .oif = vif->link,
+ .nl_u = {
+ .ip4_u = {
+ .daddr = iph->daddr,
+ .tos = RT_TOS(iph->tos)
+ }
+ },
+ .proto = IPPROTO_IPIP
+ };
+
if (ip_route_output_key(net, &rt, &fl))
goto out_free;
}
@@ -1559,8 +1570,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
/* Do not fragment multicasts. Alas, IPv4 does not
- allow to send ICMP, so that packets will disappear
- to blackhole.
+ * allow to send ICMP, so that packets will disappear
+ * to blackhole.
*/
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -1583,7 +1594,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
ip_decrease_ttl(ip_hdr(skb));
/* FIXME: forward and output firewalls used to be called here.
- * What do we do with netfilter? -- RR */
+ * What do we do with netfilter? -- RR
+ */
if (vif->flags & VIFF_TUNNEL) {
ip_encap(skb, vif->local, vif->remote);
/* FIXME: extra output firewall step used to be here. --RR */
@@ -1644,15 +1656,15 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
if (skb_rtable(skb)->fl.iif == 0) {
/* It is our own packet, looped back.
- Very complicated situation...
-
- The best workaround until routing daemons will be
- fixed is not to redistribute packet, if it was
- send through wrong interface. It means, that
- multicast applications WILL NOT work for
- (S,G), which have default multicast route pointing
- to wrong oif. In any case, it is not a good
- idea to use multicasting applications on router.
+ * Very complicated situation...
+ *
+ * The best workaround until routing daemons will be
+ * fixed is not to redistribute packet, if it was
+ * send through wrong interface. It means, that
+ * multicast applications WILL NOT work for
+ * (S,G), which have default multicast route pointing
+ * to wrong oif. In any case, it is not a good
+ * idea to use multicasting applications on router.
*/
goto dont_forward;
}
@@ -1662,9 +1674,9 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
- so that we cannot check that packet arrived on an oif.
- It is bad, but otherwise we would need to move pretty
- large chunk of pimd to kernel. Ough... --ANK
+ * so that we cannot check that packet arrived on an oif.
+ * It is bad, but otherwise we would need to move pretty
+ * large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
cache->mfc_un.res.ttls[true_vifi] < 255) &&
@@ -1682,10 +1694,12 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
/*
* Forward the frame
*/
- for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = cache->mfc_un.res.maxvif - 1;
+ ct >= cache->mfc_un.res.minvif; ct--) {
if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
if (skb2)
ipmr_queue_xmit(net, mrt, skb2, cache,
psend);
@@ -1696,6 +1710,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
if (psend != -1) {
if (local) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
if (skb2)
ipmr_queue_xmit(net, mrt, skb2, cache, psend);
} else {
@@ -1713,6 +1728,7 @@ dont_forward:
/*
* Multicast packets for forwarding arrive here
+ * Called with rcu_read_lock();
*/
int ip_mr_input(struct sk_buff *skb)
@@ -1724,9 +1740,9 @@ int ip_mr_input(struct sk_buff *skb)
int err;
/* Packet is looped back after forward, it should not be
- forwarded second time, but still can be delivered locally.
+ * forwarded second time, but still can be delivered locally.
*/
- if (IPCB(skb)->flags&IPSKB_FORWARDED)
+ if (IPCB(skb)->flags & IPSKB_FORWARDED)
goto dont_forward;
err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
@@ -1736,28 +1752,28 @@ int ip_mr_input(struct sk_buff *skb)
}
if (!local) {
- if (IPCB(skb)->opt.router_alert) {
- if (ip_call_ra_chain(skb))
- return 0;
- } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
- /* IGMPv1 (and broken IGMPv2 implementations sort of
- Cisco IOS <= 11.2(8)) do not put router alert
- option to IGMP packets destined to routable
- groups. It is very bad, because it means
- that we can forward NO IGMP messages.
- */
- read_lock(&mrt_lock);
- if (mrt->mroute_sk) {
- nf_reset(skb);
- raw_rcv(mrt->mroute_sk, skb);
- read_unlock(&mrt_lock);
- return 0;
- }
- read_unlock(&mrt_lock);
+ if (IPCB(skb)->opt.router_alert) {
+ if (ip_call_ra_chain(skb))
+ return 0;
+ } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
+ /* IGMPv1 (and broken IGMPv2 implementations sort of
+ * Cisco IOS <= 11.2(8)) do not put router alert
+ * option to IGMP packets destined to routable
+ * groups. It is very bad, because it means
+ * that we can forward NO IGMP messages.
+ */
+ struct sock *mroute_sk;
+
+ mroute_sk = rcu_dereference(mrt->mroute_sk);
+ if (mroute_sk) {
+ nf_reset(skb);
+ raw_rcv(mroute_sk, skb);
+ return 0;
+ }
}
}
- read_lock(&mrt_lock);
+ /* already under rcu_read_lock() */
cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
/*
@@ -1769,13 +1785,12 @@ int ip_mr_input(struct sk_buff *skb)
if (local) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
ip_local_deliver(skb);
- if (skb2 == NULL) {
- read_unlock(&mrt_lock);
+ if (skb2 == NULL)
return -ENOBUFS;
- }
skb = skb2;
}
+ read_lock(&mrt_lock);
vif = ipmr_find_vif(mrt, skb->dev);
if (vif >= 0) {
int err2 = ipmr_cache_unresolved(mrt, vif, skb);
@@ -1788,8 +1803,8 @@ int ip_mr_input(struct sk_buff *skb)
return -ENODEV;
}
+ read_lock(&mrt_lock);
ip_mr_forward(net, mrt, skb, cache, local);
-
read_unlock(&mrt_lock);
if (local)
@@ -1805,6 +1820,7 @@ dont_forward:
}
#ifdef CONFIG_IP_PIMSM
+/* called with rcu_read_lock() */
static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
unsigned int pimlen)
{
@@ -1813,10 +1829,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
/*
- Check that:
- a. packet is really destinted to a multicast group
- b. packet is not a NULL-REGISTER
- c. packet is not truncated
+ * Check that:
+ * a. packet is really sent to a multicast group
+ * b. packet is not a NULL-REGISTER
+ * c. packet is not truncated
*/
if (!ipv4_is_multicast(encap->daddr) ||
encap->tot_len == 0 ||
@@ -1826,26 +1842,23 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
read_lock(&mrt_lock);
if (mrt->mroute_reg_vif_num >= 0)
reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
- if (reg_dev)
- dev_hold(reg_dev);
read_unlock(&mrt_lock);
if (reg_dev == NULL)
return 1;
skb->mac_header = skb->network_header;
- skb_pull(skb, (u8*)encap - skb->data);
+ skb_pull(skb, (u8 *)encap - skb->data);
skb_reset_network_header(skb);
skb->protocol = htons(ETH_P_IP);
- skb->ip_summed = 0;
+ skb->ip_summed = CHECKSUM_NONE;
skb->pkt_type = PACKET_HOST;
skb_tunnel_rx(skb, reg_dev);
netif_rx(skb);
- dev_put(reg_dev);
- return 0;
+ return NET_RX_SUCCESS;
}
#endif
@@ -1854,7 +1867,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
* Handle IGMP messages of PIMv1
*/
-int pim_rcv_v1(struct sk_buff * skb)
+int pim_rcv_v1(struct sk_buff *skb)
{
struct igmphdr *pim;
struct net *net = dev_net(skb->dev);
@@ -1881,7 +1894,7 @@ drop:
#endif
#ifdef CONFIG_IP_PIMSM_V2
-static int pim_rcv(struct sk_buff * skb)
+static int pim_rcv(struct sk_buff *skb)
{
struct pimreghdr *pim;
struct net *net = dev_net(skb->dev);
@@ -1891,8 +1904,8 @@ static int pim_rcv(struct sk_buff * skb)
goto drop;
pim = (struct pimreghdr *)skb_transport_header(skb);
- if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
- (pim->flags&PIM_NULL_REGISTER) ||
+ if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) ||
+ (pim->flags & PIM_NULL_REGISTER) ||
(ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
csum_fold(skb_checksum(skb, 0, skb->len, 0))))
goto drop;
@@ -1958,28 +1971,33 @@ int ipmr_get_route(struct net *net,
if (mrt == NULL)
return -ENOENT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
if (cache == NULL) {
struct sk_buff *skb2;
struct iphdr *iph;
struct net_device *dev;
- int vif;
+ int vif = -1;
if (nowait) {
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EAGAIN;
}
dev = skb->dev;
- if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
+ read_lock(&mrt_lock);
+ if (dev)
+ vif = ipmr_find_vif(mrt, dev);
+ if (vif < 0) {
read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -ENODEV;
}
skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2) {
read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -ENOMEM;
}
@@ -1992,13 +2010,16 @@ int ipmr_get_route(struct net *net,
iph->version = 0;
err = ipmr_cache_unresolved(mrt, vif, skb2);
read_unlock(&mrt_lock);
+ rcu_read_unlock();
return err;
}
- if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
+ read_lock(&mrt_lock);
+ if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
cache->mfc_flags |= MFC_NOTIFY;
err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
read_unlock(&mrt_lock);
+ rcu_read_unlock();
return err;
}
@@ -2050,14 +2071,14 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
s_h = cb->args[1];
s_e = cb->args[2];
- read_lock(&mrt_lock);
+ rcu_read_lock();
ipmr_for_each_table(mrt, net) {
if (t < s_t)
goto next_table;
if (t > s_t)
s_h = 0;
for (h = s_h; h < MFC_LINES; h++) {
- list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
if (e < s_e)
goto next_entry;
if (ipmr_fill_mroute(mrt, skb,
@@ -2075,7 +2096,7 @@ next_table:
t++;
}
done:
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
cb->args[2] = e;
cb->args[1] = h;
@@ -2086,7 +2107,8 @@ done:
#ifdef CONFIG_PROC_FS
/*
- * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
+ * The /proc interfaces to multicast routing :
+ * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
*/
struct ipmr_vif_iter {
struct seq_net_private p;
@@ -2208,14 +2230,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
struct mr_table *mrt = it->mrt;
struct mfc_cache *mfc;
- read_lock(&mrt_lock);
+ rcu_read_lock();
for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
it->cache = &mrt->mfc_cache_array[it->ct];
- list_for_each_entry(mfc, it->cache, list)
+ list_for_each_entry_rcu(mfc, it->cache, list)
if (pos-- == 0)
return mfc;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
spin_lock_bh(&mfc_unres_lock);
it->cache = &mrt->mfc_unres_queue;
@@ -2274,7 +2296,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
/* exhausted cache_array, show unresolved */
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
it->cache = &mrt->mfc_unres_queue;
it->ct = 0;
@@ -2282,7 +2304,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!list_empty(it->cache))
return list_first_entry(it->cache, struct mfc_cache, list);
- end_of_list:
+end_of_list:
spin_unlock_bh(&mfc_unres_lock);
it->cache = NULL;
@@ -2297,7 +2319,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
if (it->cache == &mrt->mfc_unres_queue)
spin_unlock_bh(&mfc_unres_lock);
else if (it->cache == &mrt->mfc_cache_array[it->ct])
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -2323,7 +2345,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
mfc->mfc_un.res.bytes,
mfc->mfc_un.res.wrong_if);
for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++ ) {
+ n < mfc->mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
mfc->mfc_un.res.ttls[n] < 255)
seq_printf(seq,
@@ -2421,7 +2443,7 @@ int __init ip_mr_init(void)
mrt_cachep = kmem_cache_create("ip_mrt_cache",
sizeof(struct mfc_cache),
- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
NULL);
if (!mrt_cachep)
return -ENOMEM;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1833bdbf980..8e3350643b6 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -324,10 +324,10 @@ config IP_NF_TARGET_ECN
config IP_NF_TARGET_TTL
tristate '"TTL" target support'
- depends on NETFILTER_ADVANCED
+ depends on NETFILTER_ADVANCED && IP_NF_MANGLE
select NETFILTER_XT_TARGET_HL
---help---
- This is a backwards-compat option for the user's convenience
+ This is a backwards-compatible option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_HL.
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8f4f9a57f1..3cad2591ace 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -72,7 +72,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
for (i = 0; i < len; i++)
ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
- return (ret != 0);
+ return ret != 0;
}
/*
@@ -228,7 +228,7 @@ arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
return NF_DROP;
}
-static inline const struct arpt_entry_target *
+static inline const struct xt_entry_target *
arpt_get_target_c(const struct arpt_entry *e)
{
return arpt_get_target((struct arpt_entry *)e);
@@ -282,7 +282,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
arp = arp_hdr(skb);
do {
- const struct arpt_entry_target *t;
+ const struct xt_entry_target *t;
if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
e = arpt_next_entry(e);
@@ -297,10 +297,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
if (!t->u.kernel.target->target) {
int v;
- v = ((struct arpt_standard_target *)t)->verdict;
+ v = ((struct xt_standard_target *)t)->verdict;
if (v < 0) {
/* Pop from stack? */
- if (v != ARPT_RETURN) {
+ if (v != XT_RETURN) {
verdict = (unsigned)(-v) - 1;
break;
}
@@ -332,7 +332,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
/* Target might have changed stuff. */
arp = arp_hdr(skb);
- if (verdict == ARPT_CONTINUE)
+ if (verdict == XT_CONTINUE)
e = arpt_next_entry(e);
else
/* Verdict */
@@ -377,7 +377,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
e->counters.pcnt = pos;
for (;;) {
- const struct arpt_standard_target *t
+ const struct xt_standard_target *t
= (void *)arpt_get_target_c(e);
int visited = e->comefrom & (1 << hook);
@@ -392,13 +392,13 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
/* Unconditional return/END. */
if ((e->target_offset == sizeof(struct arpt_entry) &&
(strcmp(t->target.u.user.name,
- ARPT_STANDARD_TARGET) == 0) &&
+ XT_STANDARD_TARGET) == 0) &&
t->verdict < 0 && unconditional(&e->arp)) ||
visited) {
unsigned int oldpos, size;
if ((strcmp(t->target.u.user.name,
- ARPT_STANDARD_TARGET) == 0) &&
+ XT_STANDARD_TARGET) == 0) &&
t->verdict < -NF_MAX_VERDICT - 1) {
duprintf("mark_source_chains: bad "
"negative verdict (%i)\n",
@@ -433,7 +433,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
int newpos = t->verdict;
if (strcmp(t->target.u.user.name,
- ARPT_STANDARD_TARGET) == 0 &&
+ XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
if (newpos > newinfo->size -
sizeof(struct arpt_entry)) {
@@ -464,14 +464,14 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
static inline int check_entry(const struct arpt_entry *e, const char *name)
{
- const struct arpt_entry_target *t;
+ const struct xt_entry_target *t;
if (!arp_checkentry(&e->arp)) {
duprintf("arp_tables: arp check failed %p %s.\n", e, name);
return -EINVAL;
}
- if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset)
+ if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
return -EINVAL;
t = arpt_get_target_c(e);
@@ -483,7 +483,7 @@ static inline int check_entry(const struct arpt_entry *e, const char *name)
static inline int check_target(struct arpt_entry *e, const char *name)
{
- struct arpt_entry_target *t = arpt_get_target(e);
+ struct xt_entry_target *t = arpt_get_target(e);
int ret;
struct xt_tgchk_param par = {
.table = name,
@@ -506,7 +506,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
static inline int
find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
{
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
int ret;
@@ -536,7 +536,7 @@ out:
static bool check_underflow(const struct arpt_entry *e)
{
- const struct arpt_entry_target *t;
+ const struct xt_entry_target *t;
unsigned int verdict;
if (!unconditional(&e->arp))
@@ -544,7 +544,7 @@ static bool check_underflow(const struct arpt_entry *e)
t = arpt_get_target_c(e);
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
return false;
- verdict = ((struct arpt_standard_target *)t)->verdict;
+ verdict = ((struct xt_standard_target *)t)->verdict;
verdict = -verdict - 1;
return verdict == NF_DROP || verdict == NF_ACCEPT;
}
@@ -566,7 +566,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
}
if (e->next_offset
- < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) {
+ < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
duprintf("checking: element %p size %u\n",
e, e->next_offset);
return -EINVAL;
@@ -598,7 +598,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
static inline void cleanup_entry(struct arpt_entry *e)
{
struct xt_tgdtor_param par;
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
t = arpt_get_target(e);
par.target = t->u.kernel.target;
@@ -794,7 +794,7 @@ static int copy_entries_to_user(unsigned int total_size,
/* FIXME: use iterator macros --RR */
/* ... then go back and fix counters and names */
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
- const struct arpt_entry_target *t;
+ const struct xt_entry_target *t;
e = (struct arpt_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
@@ -807,7 +807,7 @@ static int copy_entries_to_user(unsigned int total_size,
t = arpt_get_target_c(e);
if (copy_to_user(userptr + off + e->target_offset
- + offsetof(struct arpt_entry_target,
+ + offsetof(struct xt_entry_target,
u.user.name),
t->u.kernel.target->name,
strlen(t->u.kernel.target->name)+1) != 0) {
@@ -844,7 +844,7 @@ static int compat_calc_entry(const struct arpt_entry *e,
const struct xt_table_info *info,
const void *base, struct xt_table_info *newinfo)
{
- const struct arpt_entry_target *t;
+ const struct xt_entry_target *t;
unsigned int entry_offset;
int off, i, ret;
@@ -895,7 +895,7 @@ static int compat_table_info(const struct xt_table_info *info,
static int get_info(struct net *net, void __user *user,
const int *len, int compat)
{
- char name[ARPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
int ret;
@@ -908,7 +908,7 @@ static int get_info(struct net *net, void __user *user,
if (copy_from_user(name, user, sizeof(name)) != 0)
return -EFAULT;
- name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
+ name[XT_TABLE_MAXNAMELEN-1] = '\0';
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_lock(NFPROTO_ARP);
@@ -1204,7 +1204,7 @@ static int do_add_counters(struct net *net, const void __user *user,
#ifdef CONFIG_COMPAT
static inline void compat_release_entry(struct compat_arpt_entry *e)
{
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
t = compat_arpt_get_target(e);
module_put(t->u.kernel.target->me);
@@ -1220,7 +1220,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
const unsigned int *underflows,
const char *name)
{
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
int ret, off, h;
@@ -1288,7 +1288,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
unsigned int *size, const char *name,
struct xt_table_info *newinfo, unsigned char *base)
{
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
struct arpt_entry *de;
unsigned int origsize;
@@ -1474,7 +1474,7 @@ out_unlock:
}
struct compat_arpt_replace {
- char name[ARPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
u32 valid_hooks;
u32 num_entries;
u32 size;
@@ -1567,7 +1567,7 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
struct xt_counters *counters,
unsigned int i)
{
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
struct compat_arpt_entry __user *ce;
u_int16_t target_offset, next_offset;
compat_uint_t origsize;
@@ -1628,7 +1628,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
}
struct compat_arpt_get_entries {
- char name[ARPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
compat_uint_t size;
struct compat_arpt_entry entrytable[0];
};
@@ -1828,7 +1828,7 @@ void arpt_unregister_table(struct xt_table *table)
/* The built-in targets: standard (NULL) and error. */
static struct xt_target arpt_builtin_tg[] __read_mostly = {
{
- .name = ARPT_STANDARD_TARGET,
+ .name = XT_STANDARD_TARGET,
.targetsize = sizeof(int),
.family = NFPROTO_ARP,
#ifdef CONFIG_COMPAT
@@ -1838,9 +1838,9 @@ static struct xt_target arpt_builtin_tg[] __read_mostly = {
#endif
},
{
- .name = ARPT_ERROR_TARGET,
+ .name = XT_ERROR_TARGET,
.target = arpt_error,
- .targetsize = ARPT_FUNCTION_MAXNAMELEN,
+ .targetsize = XT_FUNCTION_MAXNAMELEN,
.family = NFPROTO_ARP,
},
};
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index e1be7dd1171..b8ddcc480ed 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -63,7 +63,7 @@ static int checkentry(const struct xt_tgchk_param *par)
return false;
if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
- mangle->target != ARPT_CONTINUE)
+ mangle->target != XT_CONTINUE)
return false;
return true;
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d163f2e3b2e..d31b007a6d8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -186,7 +186,7 @@ static inline bool unconditional(const struct ipt_ip *ip)
}
/* for const-correctness */
-static inline const struct ipt_entry_target *
+static inline const struct xt_entry_target *
ipt_get_target_c(const struct ipt_entry *e)
{
return ipt_get_target((struct ipt_entry *)e);
@@ -230,9 +230,9 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
const char *hookname, const char **chainname,
const char **comment, unsigned int *rulenum)
{
- const struct ipt_standard_target *t = (void *)ipt_get_target_c(s);
+ const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
- if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
+ if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
/* Head of user chain: ERROR target with chainname */
*chainname = t->target.data;
(*rulenum) = 0;
@@ -241,7 +241,7 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
if (s->target_offset == sizeof(struct ipt_entry) &&
strcmp(t->target.u.kernel.target->name,
- IPT_STANDARD_TARGET) == 0 &&
+ XT_STANDARD_TARGET) == 0 &&
t->verdict < 0 &&
unconditional(&s->ip)) {
/* Tail of chains: STANDARD target (return/policy) */
@@ -346,7 +346,7 @@ ipt_do_table(struct sk_buff *skb,
get_entry(table_base, private->underflow[hook]));
do {
- const struct ipt_entry_target *t;
+ const struct xt_entry_target *t;
const struct xt_entry_match *ematch;
IP_NF_ASSERT(e);
@@ -380,10 +380,10 @@ ipt_do_table(struct sk_buff *skb,
if (!t->u.kernel.target->target) {
int v;
- v = ((struct ipt_standard_target *)t)->verdict;
+ v = ((struct xt_standard_target *)t)->verdict;
if (v < 0) {
/* Pop from stack? */
- if (v != IPT_RETURN) {
+ if (v != XT_RETURN) {
verdict = (unsigned)(-v) - 1;
break;
}
@@ -421,7 +421,7 @@ ipt_do_table(struct sk_buff *skb,
verdict = t->u.kernel.target->target(skb, &acpar);
/* Target might have changed stuff. */
ip = ip_hdr(skb);
- if (verdict == IPT_CONTINUE)
+ if (verdict == XT_CONTINUE)
e = ipt_next_entry(e);
else
/* Verdict */
@@ -461,7 +461,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
e->counters.pcnt = pos;
for (;;) {
- const struct ipt_standard_target *t
+ const struct xt_standard_target *t
= (void *)ipt_get_target_c(e);
int visited = e->comefrom & (1 << hook);
@@ -475,13 +475,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* Unconditional return/END. */
if ((e->target_offset == sizeof(struct ipt_entry) &&
(strcmp(t->target.u.user.name,
- IPT_STANDARD_TARGET) == 0) &&
+ XT_STANDARD_TARGET) == 0) &&
t->verdict < 0 && unconditional(&e->ip)) ||
visited) {
unsigned int oldpos, size;
if ((strcmp(t->target.u.user.name,
- IPT_STANDARD_TARGET) == 0) &&
+ XT_STANDARD_TARGET) == 0) &&
t->verdict < -NF_MAX_VERDICT - 1) {
duprintf("mark_source_chains: bad "
"negative verdict (%i)\n",
@@ -524,7 +524,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
int newpos = t->verdict;
if (strcmp(t->target.u.user.name,
- IPT_STANDARD_TARGET) == 0 &&
+ XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
if (newpos > newinfo->size -
sizeof(struct ipt_entry)) {
@@ -552,7 +552,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
return 1;
}
-static void cleanup_match(struct ipt_entry_match *m, struct net *net)
+static void cleanup_match(struct xt_entry_match *m, struct net *net)
{
struct xt_mtdtor_param par;
@@ -568,14 +568,14 @@ static void cleanup_match(struct ipt_entry_match *m, struct net *net)
static int
check_entry(const struct ipt_entry *e, const char *name)
{
- const struct ipt_entry_target *t;
+ const struct xt_entry_target *t;
if (!ip_checkentry(&e->ip)) {
duprintf("ip check failed %p %s.\n", e, par->match->name);
return -EINVAL;
}
- if (e->target_offset + sizeof(struct ipt_entry_target) >
+ if (e->target_offset + sizeof(struct xt_entry_target) >
e->next_offset)
return -EINVAL;
@@ -587,7 +587,7 @@ check_entry(const struct ipt_entry *e, const char *name)
}
static int
-check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
+check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ipt_ip *ip = par->entryinfo;
int ret;
@@ -605,7 +605,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
}
static int
-find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
+find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
struct xt_match *match;
int ret;
@@ -630,7 +630,7 @@ err:
static int check_target(struct ipt_entry *e, struct net *net, const char *name)
{
- struct ipt_entry_target *t = ipt_get_target(e);
+ struct xt_entry_target *t = ipt_get_target(e);
struct xt_tgchk_param par = {
.net = net,
.table = name,
@@ -656,7 +656,7 @@ static int
find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
unsigned int size)
{
- struct ipt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
int ret;
unsigned int j;
@@ -707,7 +707,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
static bool check_underflow(const struct ipt_entry *e)
{
- const struct ipt_entry_target *t;
+ const struct xt_entry_target *t;
unsigned int verdict;
if (!unconditional(&e->ip))
@@ -715,7 +715,7 @@ static bool check_underflow(const struct ipt_entry *e)
t = ipt_get_target_c(e);
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
return false;
- verdict = ((struct ipt_standard_target *)t)->verdict;
+ verdict = ((struct xt_standard_target *)t)->verdict;
verdict = -verdict - 1;
return verdict == NF_DROP || verdict == NF_ACCEPT;
}
@@ -738,7 +738,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
}
if (e->next_offset
- < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
+ < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
duprintf("checking: element %p size %u\n",
e, e->next_offset);
return -EINVAL;
@@ -771,7 +771,7 @@ static void
cleanup_entry(struct ipt_entry *e, struct net *net)
{
struct xt_tgdtor_param par;
- struct ipt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_entry_match *ematch;
/* Cleanup all matches */
@@ -972,8 +972,8 @@ copy_entries_to_user(unsigned int total_size,
/* ... then go back and fix counters and names */
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
unsigned int i;
- const struct ipt_entry_match *m;
- const struct ipt_entry_target *t;
+ const struct xt_entry_match *m;
+ const struct xt_entry_target *t;
e = (struct ipt_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
@@ -990,7 +990,7 @@ copy_entries_to_user(unsigned int total_size,
m = (void *)e + i;
if (copy_to_user(userptr + off + i
- + offsetof(struct ipt_entry_match,
+ + offsetof(struct xt_entry_match,
u.user.name),
m->u.kernel.match->name,
strlen(m->u.kernel.match->name)+1)
@@ -1002,7 +1002,7 @@ copy_entries_to_user(unsigned int total_size,
t = ipt_get_target_c(e);
if (copy_to_user(userptr + off + e->target_offset
- + offsetof(struct ipt_entry_target,
+ + offsetof(struct xt_entry_target,
u.user.name),
t->u.kernel.target->name,
strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1040,7 +1040,7 @@ static int compat_calc_entry(const struct ipt_entry *e,
const void *base, struct xt_table_info *newinfo)
{
const struct xt_entry_match *ematch;
- const struct ipt_entry_target *t;
+ const struct xt_entry_target *t;
unsigned int entry_offset;
int off, i, ret;
@@ -1092,7 +1092,7 @@ static int compat_table_info(const struct xt_table_info *info,
static int get_info(struct net *net, void __user *user,
const int *len, int compat)
{
- char name[IPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
int ret;
@@ -1105,7 +1105,7 @@ static int get_info(struct net *net, void __user *user,
if (copy_from_user(name, user, sizeof(name)) != 0)
return -EFAULT;
- name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+ name[XT_TABLE_MAXNAMELEN-1] = '\0';
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_lock(AF_INET);
@@ -1400,14 +1400,14 @@ do_add_counters(struct net *net, const void __user *user,
#ifdef CONFIG_COMPAT
struct compat_ipt_replace {
- char name[IPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
u32 valid_hooks;
u32 num_entries;
u32 size;
u32 hook_entry[NF_INET_NUMHOOKS];
u32 underflow[NF_INET_NUMHOOKS];
u32 num_counters;
- compat_uptr_t counters; /* struct ipt_counters * */
+ compat_uptr_t counters; /* struct xt_counters * */
struct compat_ipt_entry entries[0];
};
@@ -1416,7 +1416,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
unsigned int *size, struct xt_counters *counters,
unsigned int i)
{
- struct ipt_entry_target *t;
+ struct xt_entry_target *t;
struct compat_ipt_entry __user *ce;
u_int16_t target_offset, next_offset;
compat_uint_t origsize;
@@ -1451,7 +1451,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
}
static int
-compat_find_calc_match(struct ipt_entry_match *m,
+compat_find_calc_match(struct xt_entry_match *m,
const char *name,
const struct ipt_ip *ip,
unsigned int hookmask,
@@ -1473,7 +1473,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
static void compat_release_entry(struct compat_ipt_entry *e)
{
- struct ipt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_entry_match *ematch;
/* Cleanup all matches */
@@ -1494,7 +1494,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
const char *name)
{
struct xt_entry_match *ematch;
- struct ipt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
unsigned int j;
@@ -1576,7 +1576,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
unsigned int *size, const char *name,
struct xt_table_info *newinfo, unsigned char *base)
{
- struct ipt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
struct ipt_entry *de;
unsigned int origsize;
@@ -1884,7 +1884,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
}
struct compat_ipt_get_entries {
- char name[IPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
compat_uint_t size;
struct compat_ipt_entry entrytable[0];
};
@@ -2039,7 +2039,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
case IPT_SO_GET_REVISION_MATCH:
case IPT_SO_GET_REVISION_TARGET: {
- struct ipt_get_revision rev;
+ struct xt_get_revision rev;
int target;
if (*len != sizeof(rev)) {
@@ -2176,7 +2176,7 @@ static int icmp_checkentry(const struct xt_mtchk_param *par)
static struct xt_target ipt_builtin_tg[] __read_mostly = {
{
- .name = IPT_STANDARD_TARGET,
+ .name = XT_STANDARD_TARGET,
.targetsize = sizeof(int),
.family = NFPROTO_IPV4,
#ifdef CONFIG_COMPAT
@@ -2186,9 +2186,9 @@ static struct xt_target ipt_builtin_tg[] __read_mostly = {
#endif
},
{
- .name = IPT_ERROR_TARGET,
+ .name = XT_ERROR_TARGET,
.target = ipt_error,
- .targetsize = IPT_FUNCTION_MAXNAMELEN,
+ .targetsize = XT_FUNCTION_MAXNAMELEN,
.family = NFPROTO_IPV4,
},
};
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 3a43cf36db8..1e26a489765 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/net_namespace.h>
#include <net/checksum.h>
+#include <net/ip.h>
#define CLUSTERIP_VERSION "0.8"
@@ -231,24 +232,22 @@ clusterip_hashfn(const struct sk_buff *skb,
{
const struct iphdr *iph = ip_hdr(skb);
unsigned long hashval;
- u_int16_t sport, dport;
- const u_int16_t *ports;
-
- switch (iph->protocol) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- case IPPROTO_ICMP:
- ports = (const void *)iph+iph->ihl*4;
- sport = ports[0];
- dport = ports[1];
- break;
- default:
+ u_int16_t sport = 0, dport = 0;
+ int poff;
+
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0) {
+ const u_int16_t *ports;
+ u16 _ports[2];
+
+ ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
+ if (ports) {
+ sport = ports[0];
+ dport = ports[1];
+ }
+ } else {
if (net_ratelimit())
pr_info("unknown protocol %u\n", iph->protocol);
- sport = dport = 0;
}
switch (config->hash_mode) {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 915fc17d7ce..72ffc8fda2e 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -24,16 +24,15 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ipt_LOG.h>
#include <net/netfilter/nf_log.h>
+#include <net/netfilter/xt_log.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
-/* Use lock to serialize, so printks don't overlap */
-static DEFINE_SPINLOCK(log_lock);
-
/* One level of recursion won't kill us */
-static void dump_packet(const struct nf_loginfo *info,
+static void dump_packet(struct sbuff *m,
+ const struct nf_loginfo *info,
const struct sk_buff *skb,
unsigned int iphoff)
{
@@ -48,32 +47,32 @@ static void dump_packet(const struct nf_loginfo *info,
ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
if (ih == NULL) {
- printk("TRUNCATED");
+ sb_add(m, "TRUNCATED");
return;
}
/* Important fields:
* TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
/* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
- printk("SRC=%pI4 DST=%pI4 ",
+ sb_add(m, "SRC=%pI4 DST=%pI4 ",
&ih->saddr, &ih->daddr);
/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
- printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+ sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
/* Max length: 6 "CE DF MF " */
if (ntohs(ih->frag_off) & IP_CE)
- printk("CE ");
+ sb_add(m, "CE ");
if (ntohs(ih->frag_off) & IP_DF)
- printk("DF ");
+ sb_add(m, "DF ");
if (ntohs(ih->frag_off) & IP_MF)
- printk("MF ");
+ sb_add(m, "MF ");
/* Max length: 11 "FRAG:65535 " */
if (ntohs(ih->frag_off) & IP_OFFSET)
- printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+ sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
if ((logflags & IPT_LOG_IPOPT) &&
ih->ihl * 4 > sizeof(struct iphdr)) {
@@ -85,15 +84,15 @@ static void dump_packet(const struct nf_loginfo *info,
op = skb_header_pointer(skb, iphoff+sizeof(_iph),
optsize, _opt);
if (op == NULL) {
- printk("TRUNCATED");
+ sb_add(m, "TRUNCATED");
return;
}
/* Max length: 127 "OPT (" 15*4*2chars ") " */
- printk("OPT (");
+ sb_add(m, "OPT (");
for (i = 0; i < optsize; i++)
- printk("%02X", op[i]);
- printk(") ");
+ sb_add(m, "%02X", op[i]);
+ sb_add(m, ") ");
}
switch (ih->protocol) {
@@ -102,7 +101,7 @@ static void dump_packet(const struct nf_loginfo *info,
const struct tcphdr *th;
/* Max length: 10 "PROTO=TCP " */
- printk("PROTO=TCP ");
+ sb_add(m, "PROTO=TCP ");
if (ntohs(ih->frag_off) & IP_OFFSET)
break;
@@ -111,41 +110,41 @@ static void dump_packet(const struct nf_loginfo *info,
th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
sizeof(_tcph), &_tcph);
if (th == NULL) {
- printk("INCOMPLETE [%u bytes] ",
+ sb_add(m, "INCOMPLETE [%u bytes] ",
skb->len - iphoff - ih->ihl*4);
break;
}
/* Max length: 20 "SPT=65535 DPT=65535 " */
- printk("SPT=%u DPT=%u ",
+ sb_add(m, "SPT=%u DPT=%u ",
ntohs(th->source), ntohs(th->dest));
/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
if (logflags & IPT_LOG_TCPSEQ)
- printk("SEQ=%u ACK=%u ",
+ sb_add(m, "SEQ=%u ACK=%u ",
ntohl(th->seq), ntohl(th->ack_seq));
/* Max length: 13 "WINDOW=65535 " */
- printk("WINDOW=%u ", ntohs(th->window));
+ sb_add(m, "WINDOW=%u ", ntohs(th->window));
/* Max length: 9 "RES=0x3F " */
- printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
+ sb_add(m, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
if (th->cwr)
- printk("CWR ");
+ sb_add(m, "CWR ");
if (th->ece)
- printk("ECE ");
+ sb_add(m, "ECE ");
if (th->urg)
- printk("URG ");
+ sb_add(m, "URG ");
if (th->ack)
- printk("ACK ");
+ sb_add(m, "ACK ");
if (th->psh)
- printk("PSH ");
+ sb_add(m, "PSH ");
if (th->rst)
- printk("RST ");
+ sb_add(m, "RST ");
if (th->syn)
- printk("SYN ");
+ sb_add(m, "SYN ");
if (th->fin)
- printk("FIN ");
+ sb_add(m, "FIN ");
/* Max length: 11 "URGP=65535 " */
- printk("URGP=%u ", ntohs(th->urg_ptr));
+ sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
if ((logflags & IPT_LOG_TCPOPT) &&
th->doff * 4 > sizeof(struct tcphdr)) {
@@ -158,15 +157,15 @@ static void dump_packet(const struct nf_loginfo *info,
iphoff+ih->ihl*4+sizeof(_tcph),
optsize, _opt);
if (op == NULL) {
- printk("TRUNCATED");
+ sb_add(m, "TRUNCATED");
return;
}
/* Max length: 127 "OPT (" 15*4*2chars ") " */
- printk("OPT (");
+ sb_add(m, "OPT (");
for (i = 0; i < optsize; i++)
- printk("%02X", op[i]);
- printk(") ");
+ sb_add(m, "%02X", op[i]);
+ sb_add(m, ") ");
}
break;
}
@@ -177,9 +176,9 @@ static void dump_packet(const struct nf_loginfo *info,
if (ih->protocol == IPPROTO_UDP)
/* Max length: 10 "PROTO=UDP " */
- printk("PROTO=UDP " );
+ sb_add(m, "PROTO=UDP " );
else /* Max length: 14 "PROTO=UDPLITE " */
- printk("PROTO=UDPLITE ");
+ sb_add(m, "PROTO=UDPLITE ");
if (ntohs(ih->frag_off) & IP_OFFSET)
break;
@@ -188,13 +187,13 @@ static void dump_packet(const struct nf_loginfo *info,
uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
sizeof(_udph), &_udph);
if (uh == NULL) {
- printk("INCOMPLETE [%u bytes] ",
+ sb_add(m, "INCOMPLETE [%u bytes] ",
skb->len - iphoff - ih->ihl*4);
break;
}
/* Max length: 20 "SPT=65535 DPT=65535 " */
- printk("SPT=%u DPT=%u LEN=%u ",
+ sb_add(m, "SPT=%u DPT=%u LEN=%u ",
ntohs(uh->source), ntohs(uh->dest),
ntohs(uh->len));
break;
@@ -221,7 +220,7 @@ static void dump_packet(const struct nf_loginfo *info,
[ICMP_ADDRESSREPLY] = 12 };
/* Max length: 11 "PROTO=ICMP " */
- printk("PROTO=ICMP ");
+ sb_add(m, "PROTO=ICMP ");
if (ntohs(ih->frag_off) & IP_OFFSET)
break;
@@ -230,19 +229,19 @@ static void dump_packet(const struct nf_loginfo *info,
ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
sizeof(_icmph), &_icmph);
if (ich == NULL) {
- printk("INCOMPLETE [%u bytes] ",
+ sb_add(m, "INCOMPLETE [%u bytes] ",
skb->len - iphoff - ih->ihl*4);
break;
}
/* Max length: 18 "TYPE=255 CODE=255 " */
- printk("TYPE=%u CODE=%u ", ich->type, ich->code);
+ sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
/* Max length: 25 "INCOMPLETE [65535 bytes] " */
if (ich->type <= NR_ICMP_TYPES &&
required_len[ich->type] &&
skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
- printk("INCOMPLETE [%u bytes] ",
+ sb_add(m, "INCOMPLETE [%u bytes] ",
skb->len - iphoff - ih->ihl*4);
break;
}
@@ -251,35 +250,35 @@ static void dump_packet(const struct nf_loginfo *info,
case ICMP_ECHOREPLY:
case ICMP_ECHO:
/* Max length: 19 "ID=65535 SEQ=65535 " */
- printk("ID=%u SEQ=%u ",
+ sb_add(m, "ID=%u SEQ=%u ",
ntohs(ich->un.echo.id),
ntohs(ich->un.echo.sequence));
break;
case ICMP_PARAMETERPROB:
/* Max length: 14 "PARAMETER=255 " */
- printk("PARAMETER=%u ",
+ sb_add(m, "PARAMETER=%u ",
ntohl(ich->un.gateway) >> 24);
break;
case ICMP_REDIRECT:
/* Max length: 24 "GATEWAY=255.255.255.255 " */
- printk("GATEWAY=%pI4 ", &ich->un.gateway);
+ sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
/* Fall through */
case ICMP_DEST_UNREACH:
case ICMP_SOURCE_QUENCH:
case ICMP_TIME_EXCEEDED:
/* Max length: 3+maxlen */
if (!iphoff) { /* Only recurse once. */
- printk("[");
- dump_packet(info, skb,
+ sb_add(m, "[");
+ dump_packet(m, info, skb,
iphoff + ih->ihl*4+sizeof(_icmph));
- printk("] ");
+ sb_add(m, "] ");
}
/* Max length: 10 "MTU=65535 " */
if (ich->type == ICMP_DEST_UNREACH &&
ich->code == ICMP_FRAG_NEEDED)
- printk("MTU=%u ", ntohs(ich->un.frag.mtu));
+ sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu));
}
break;
}
@@ -292,19 +291,19 @@ static void dump_packet(const struct nf_loginfo *info,
break;
/* Max length: 9 "PROTO=AH " */
- printk("PROTO=AH ");
+ sb_add(m, "PROTO=AH ");
/* Max length: 25 "INCOMPLETE [65535 bytes] " */
ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
sizeof(_ahdr), &_ahdr);
if (ah == NULL) {
- printk("INCOMPLETE [%u bytes] ",
+ sb_add(m, "INCOMPLETE [%u bytes] ",
skb->len - iphoff - ih->ihl*4);
break;
}
/* Length: 15 "SPI=0xF1234567 " */
- printk("SPI=0x%x ", ntohl(ah->spi));
+ sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
break;
}
case IPPROTO_ESP: {
@@ -312,7 +311,7 @@ static void dump_packet(const struct nf_loginfo *info,
const struct ip_esp_hdr *eh;
/* Max length: 10 "PROTO=ESP " */
- printk("PROTO=ESP ");
+ sb_add(m, "PROTO=ESP ");
if (ntohs(ih->frag_off) & IP_OFFSET)
break;
@@ -321,25 +320,25 @@ static void dump_packet(const struct nf_loginfo *info,
eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
sizeof(_esph), &_esph);
if (eh == NULL) {
- printk("INCOMPLETE [%u bytes] ",
+ sb_add(m, "INCOMPLETE [%u bytes] ",
skb->len - iphoff - ih->ihl*4);
break;
}
/* Length: 15 "SPI=0xF1234567 " */
- printk("SPI=0x%x ", ntohl(eh->spi));
+ sb_add(m, "SPI=0x%x ", ntohl(eh->spi));
break;
}
/* Max length: 10 "PROTO 255 " */
default:
- printk("PROTO=%u ", ih->protocol);
+ sb_add(m, "PROTO=%u ", ih->protocol);
}
/* Max length: 15 "UID=4294967295 " */
if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
read_lock_bh(&skb->sk->sk_callback_lock);
if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- printk("UID=%u GID=%u ",
+ sb_add(m, "UID=%u GID=%u ",
skb->sk->sk_socket->file->f_cred->fsuid,
skb->sk->sk_socket->file->f_cred->fsgid);
read_unlock_bh(&skb->sk->sk_callback_lock);
@@ -347,7 +346,7 @@ static void dump_packet(const struct nf_loginfo *info,
/* Max length: 16 "MARK=0xFFFFFFFF " */
if (!iphoff && skb->mark)
- printk("MARK=0x%x ", skb->mark);
+ sb_add(m, "MARK=0x%x ", skb->mark);
/* Proto Max log string length */
/* IP: 40+46+6+11+127 = 230 */
@@ -364,7 +363,8 @@ static void dump_packet(const struct nf_loginfo *info,
/* maxlen = 230+ 91 + 230 + 252 = 803 */
}
-static void dump_mac_header(const struct nf_loginfo *info,
+static void dump_mac_header(struct sbuff *m,
+ const struct nf_loginfo *info,
const struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
@@ -378,7 +378,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
switch (dev->type) {
case ARPHRD_ETHER:
- printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+ sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
ntohs(eth_hdr(skb)->h_proto));
return;
@@ -387,17 +387,17 @@ static void dump_mac_header(const struct nf_loginfo *info,
}
fallback:
- printk("MAC=");
+ sb_add(m, "MAC=");
if (dev->hard_header_len &&
skb->mac_header != skb->network_header) {
const unsigned char *p = skb_mac_header(skb);
unsigned int i;
- printk("%02x", *p++);
+ sb_add(m, "%02x", *p++);
for (i = 1; i < dev->hard_header_len; i++, p++)
- printk(":%02x", *p);
+ sb_add(m, ":%02x", *p);
}
- printk(" ");
+ sb_add(m, " ");
}
static struct nf_loginfo default_loginfo = {
@@ -419,11 +419,12 @@ ipt_log_packet(u_int8_t pf,
const struct nf_loginfo *loginfo,
const char *prefix)
{
+ struct sbuff *m = sb_open();
+
if (!loginfo)
loginfo = &default_loginfo;
- spin_lock_bh(&log_lock);
- printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
+ sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
prefix,
in ? in->name : "",
out ? out->name : "");
@@ -434,20 +435,20 @@ ipt_log_packet(u_int8_t pf,
physindev = skb->nf_bridge->physindev;
if (physindev && in != physindev)
- printk("PHYSIN=%s ", physindev->name);
+ sb_add(m, "PHYSIN=%s ", physindev->name);
physoutdev = skb->nf_bridge->physoutdev;
if (physoutdev && out != physoutdev)
- printk("PHYSOUT=%s ", physoutdev->name);
+ sb_add(m, "PHYSOUT=%s ", physoutdev->name);
}
#endif
/* MAC logging for input path only. */
if (in && !out)
- dump_mac_header(loginfo, skb);
+ dump_mac_header(m, loginfo, skb);
+
+ dump_packet(m, loginfo, skb, 0);
- dump_packet(loginfo, skb, 0);
- printk("\n");
- spin_unlock_bh(&log_lock);
+ sb_close(m);
}
static unsigned int
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index c31b8766825..0f23b3f06df 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,9 +44,16 @@ static unsigned int help(struct sk_buff *skb,
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ int ret;
+
exp->tuple.dst.u.tcp.port = htons(port);
- if (nf_ct_expect_related(exp) == 0)
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ port = 0;
break;
+ }
}
if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 957c9241fb0..295c97431e4 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,7 +47,7 @@ __nf_nat_proto_find(u_int8_t protonum)
return rcu_dereference(nf_nat_protos[protonum]);
}
-const struct nf_nat_protocol *
+static const struct nf_nat_protocol *
nf_nat_proto_find_get(u_int8_t protonum)
{
const struct nf_nat_protocol *p;
@@ -60,14 +60,12 @@ nf_nat_proto_find_get(u_int8_t protonum)
return p;
}
-EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
-void
+static void
nf_nat_proto_put(const struct nf_nat_protocol *p)
{
module_put(p->me);
}
-EXPORT_SYMBOL_GPL(nf_nat_proto_put);
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
@@ -262,11 +260,17 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
/* Only bother mapping if it's not already in range and unique */
- if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) &&
- (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
- proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
- !nf_nat_used_tuple(tuple, ct))
- goto out;
+ if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
+ if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
+ if (proto->in_range(tuple, maniptype, &range->min,
+ &range->max) &&
+ (range->min.all == range->max.all ||
+ !nf_nat_used_tuple(tuple, ct)))
+ goto out;
+ } else if (!nf_nat_used_tuple(tuple, ct)) {
+ goto out;
+ }
+ }
/* Last change: get protocol to try to obtain unique tuple. */
proto->unique_tuple(tuple, range, maniptype, ct);
@@ -458,6 +462,18 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
return 0;
}
+ if (manip == IP_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply dir. */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
pr_debug("icmp_reply_translation: translating error %p manip %u "
"dir %s\n", skb, manip,
dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
@@ -492,20 +508,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
/* Change outer to look the reply to an incoming packet
* (proto 0 means don't invert per-proto part). */
- if (manip == IP_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply dir. */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- if (ct->status & statusbit) {
- nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(0, skb, 0, &target, manip))
- return 0;
- }
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ if (!manip_pkt(0, skb, 0, &target, manip))
+ return 0;
return 1;
}
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 86e0e84ff0a..dc73abb3fe2 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -79,9 +79,16 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ int ret;
+
exp->tuple.dst.u.tcp.port = htons(port);
- if (nf_ct_expect_related(exp) == 0)
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ port = 0;
break;
+ }
}
if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 5045196d853..790f3160e01 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -222,13 +222,24 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
/* Try to get a pair of ports. */
for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
nated_port != 0; nated_port += 2) {
+ int ret;
+
rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
- if (nf_ct_expect_related(rtp_exp) == 0) {
+ ret = nf_ct_expect_related(rtp_exp);
+ if (ret == 0) {
rtcp_exp->tuple.dst.u.udp.port =
htons(nated_port + 1);
- if (nf_ct_expect_related(rtcp_exp) == 0)
+ ret = nf_ct_expect_related(rtcp_exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ nated_port = 0;
break;
- nf_ct_unexpect_related(rtp_exp);
+ }
+ } else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
}
}
@@ -284,9 +295,16 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
/* Try to get same port: if not, try to change it. */
for (; nated_port != 0; nated_port++) {
+ int ret;
+
exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (nf_ct_expect_related(exp) == 0)
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
break;
+ }
}
if (nated_port == 0) { /* No port available */
@@ -334,9 +352,16 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
/* Try to get same port: if not, try to change it. */
for (; nated_port != 0; nated_port++) {
+ int ret;
+