aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/8021q/vlan.h19
-rw-r--r--net/Kconfig1
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.c1
-rw-r--r--net/batman-adv/bat_v_elp.c1
-rw-r--r--net/batman-adv/bat_v_ogm.c1
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c147
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h4
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/hard-interface.c19
-rw-r--r--net/batman-adv/hard-interface.h1
-rw-r--r--net/batman-adv/main.c1
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c62
-rw-r--r--net/batman-adv/multicast.h15
-rw-r--r--net/batman-adv/netlink.c6
-rw-r--r--net/batman-adv/network-coding.c4
-rw-r--r--net/batman-adv/routing.c4
-rw-r--r--net/batman-adv/send.c2
-rw-r--r--net/batman-adv/soft-interface.c15
-rw-r--r--net/batman-adv/types.h4
-rw-r--r--net/bluetooth/Kconfig1
-rw-r--r--net/bluetooth/a2mp.c22
-rw-r--r--net/bluetooth/hci_conn.c2
-rw-r--r--net/bluetooth/hci_core.c43
-rw-r--r--net/bluetooth/hci_event.c89
-rw-r--r--net/bluetooth/hci_request.c85
-rw-r--r--net/bluetooth/l2cap_core.c7
-rw-r--r--net/bluetooth/l2cap_sock.c21
-rw-r--r--net/bluetooth/mgmt.c57
-rw-r--r--net/bluetooth/sco.c6
-rw-r--r--net/bpf/test_run.c88
-rw-r--r--net/bpfilter/Kconfig1
-rw-r--r--net/bridge/br.c5
-rw-r--r--net/bridge/br_arp_nd_proxy.c26
-rw-r--r--net/bridge/br_device.c21
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/bridge/br_forward.c17
-rw-r--r--net/bridge/br_ioctl.c2
-rw-r--r--net/bridge/br_mdb.c573
-rw-r--r--net/bridge/br_multicast.c1825
-rw-r--r--net/bridge/br_netlink.c30
-rw-r--r--net/bridge/br_private.h117
-rw-r--r--net/bridge/br_vlan.c53
-rw-r--r--net/bridge/netfilter/ebt_stp.c1
-rw-r--r--net/caif/cfsrvl.c1
-rw-r--r--net/can/Kconfig14
-rw-r--r--net/can/Makefile3
-rw-r--r--net/can/af_can.c8
-rw-r--r--net/can/bcm.c6
-rw-r--r--net/can/gw.c6
-rw-r--r--net/can/isotp.c1424
-rw-r--r--net/can/j1939/transport.c2
-rw-r--r--net/can/proc.c14
-rw-r--r--net/can/raw.c34
-rw-r--r--net/ceph/messenger.c2
-rw-r--r--net/compat.c4
-rw-r--r--net/core/bpf_sk_storage.c836
-rw-r--r--net/core/datagram.c33
-rw-r--r--net/core/dev.c345
-rw-r--r--net/core/dev_addr_lists.c12
-rw-r--r--net/core/devlink.c896
-rw-r--r--net/core/drop_monitor.c139
-rw-r--r--net/core/dst.c2
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/filter.c987
-rw-r--r--net/core/flow_dissector.c10
-rw-r--r--net/core/net-procfs.c15
-rw-r--r--net/core/net-sysfs.c4
-rw-r--r--net/core/net_namespace.c34
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c10
-rw-r--r--net/core/ptp_classifier.c30
-rw-r--r--net/core/skbuff.c96
-rw-r--r--net/core/skmsg.c195
-rw-r--r--net/core/sock.c32
-rw-r--r--net/core/sock_diag.c9
-rw-r--r--net/core/sock_map.c441
-rw-r--r--net/core/sysctl_net_core.c17
-rw-r--r--net/dcb/dcbnl.c8
-rw-r--r--net/dccp/ackvec.c2
-rw-r--r--net/dccp/ipv4.c8
-rw-r--r--net/dccp/timer.c3
-rw-r--r--net/dsa/dsa.c51
-rw-r--r--net/dsa/dsa2.c134
-rw-r--r--net/dsa/dsa_priv.h62
-rw-r--r--net/dsa/master.c20
-rw-r--r--net/dsa/port.c104
-rw-r--r--net/dsa/slave.c230
-rw-r--r--net/dsa/switch.c50
-rw-r--r--net/dsa/tag_8021q.c158
-rw-r--r--net/dsa/tag_brcm.c35
-rw-r--r--net/dsa/tag_dsa.c9
-rw-r--r--net/dsa/tag_edsa.c9
-rw-r--r--net/dsa/tag_ksz.c1
-rw-r--r--net/dsa/tag_mtk.c10
-rw-r--r--net/dsa/tag_ocelot.c67
-rw-r--r--net/dsa/tag_qca.c10
-rw-r--r--net/dsa/tag_rtl4_a.c11
-rw-r--r--net/dsa/tag_sja1105.c33
-rw-r--r--net/dsa/tag_trailer.c1
-rw-r--r--net/ethtool/bitset.c26
-rw-r--r--net/ethtool/cabletest.c41
-rw-r--r--net/ethtool/channels.c37
-rw-r--r--net/ethtool/coalesce.c45
-rw-r--r--net/ethtool/common.c2
-rw-r--r--net/ethtool/debug.c24
-rw-r--r--net/ethtool/eee.c32
-rw-r--r--net/ethtool/features.c30
-rw-r--r--net/ethtool/ioctl.c67
-rw-r--r--net/ethtool/linkinfo.c30
-rw-r--r--net/ethtool/linkmodes.c34
-rw-r--r--net/ethtool/linkstate.c14
-rw-r--r--net/ethtool/netlink.c126
-rw-r--r--net/ethtool/netlink.h35
-rw-r--r--net/ethtool/pause.c86
-rw-r--r--net/ethtool/privflags.c24
-rw-r--r--net/ethtool/rings.c35
-rw-r--r--net/ethtool/strset.c26
-rw-r--r--net/ethtool/tsinfo.c13
-rw-r--r--net/ethtool/tunnels.c46
-rw-r--r--net/ethtool/wol.c24
-rw-r--r--net/hsr/hsr_debugfs.c21
-rw-r--r--net/hsr/hsr_netlink.c12
-rw-r--r--net/ieee802154/netlink.c6
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/bpf_tcp_ca.c34
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fou.c10
-rw-r--r--net/ipv4/icmp.c39
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c37
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/ip_gre.c15
-rw-r--r--net/ipv4/ip_options.c35
-rw-r--r--net/ipv4/ip_output.c20
-rw-r--r--net/ipv4/ip_sockglue.c5
-rw-r--r--net/ipv4/ip_tunnel.c8
-rw-r--r--net/ipv4/ip_tunnel_core.c24
-rw-r--r--net/ipv4/ip_vti.c11
-rw-r--r--net/ipv4/ipmr.c14
-rw-r--r--net/ipv4/netfilter/nf_log_arp.c19
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c6
-rw-r--r--net/ipv4/nexthop.c66
-rw-r--r--net/ipv4/ping.c29
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c7
-rw-r--r--net/ipv4/route.c37
-rw-r--r--net/ipv4/syncookies.c8
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c54
-rw-r--r--net/ipv4/tcp_bpf.c13
-rw-r--r--net/ipv4/tcp_cong.c27
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_input.c258
-rw-r--r--net/ipv4/tcp_ipv4.c24
-rw-r--r--net/ipv4/tcp_metrics.c6
-rw-r--r--net/ipv4/tcp_output.c212
-rw-r--r--net/ipv4/tcp_recovery.c16
-rw-r--r--net/ipv4/tcp_scalable.c2
-rw-r--r--net/ipv4/tcp_timer.c1
-rw-r--r--net/ipv4/tcp_vegas.c8
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/udp_bpf.c9
-rw-r--r--net/ipv4/udp_tunnel_nic.c96
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf_core.c8
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/calipso.c2
-rw-r--r--net/ipv6/icmp.c11
-rw-r--r--net/ipv6/inet6_hashtables.c6
-rw-r--r--net/ipv6/ip6_fib.c29
-rw-r--r--net/ipv6/ip6_gre.c33
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_vti.c8
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c39
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c8
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/tcp_ipv6.c27
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/iucv/iucv.c8
-rw-r--r--net/l2tp/Makefile2
-rw-r--r--net/l2tp/l2tp_core.c329
-rw-r--r--net/l2tp/l2tp_core.h33
-rw-r--r--net/l2tp/l2tp_debugfs.c4
-rw-r--r--net/l2tp/l2tp_eth.c13
-rw-r--r--net/l2tp/l2tp_ip.c17
-rw-r--r--net/l2tp/l2tp_ip6.c17
-rw-r--r--net/l2tp/l2tp_netlink.c30
-rw-r--r--net/l2tp/l2tp_ppp.c70
-rw-r--r--net/l2tp/trace.h211
-rw-r--r--net/mac80211/Makefile1
-rw-r--r--net/mac80211/agg-rx.c2
-rw-r--r--net/mac80211/airtime.c20
-rw-r--r--net/mac80211/cfg.c118
-rw-r--r--net/mac80211/chan.c9
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/driver-ops.h29
-rw-r--r--net/mac80211/ibss.c7
-rw-r--r--net/mac80211/ieee80211_i.h47
-rw-r--r--net/mac80211/iface.c1025
-rw-r--r--net/mac80211/key.c15
-rw-r--r--net/mac80211/main.c2
-rw-r--r--net/mac80211/mesh.c6
-rw-r--r--net/mac80211/mesh_hwmp.c4
-rw-r--r--net/mac80211/mesh_plink.c1
-rw-r--r--net/mac80211/mesh_ps.c6
-rw-r--r--net/mac80211/mlme.c236
-rw-r--r--net/mac80211/offchannel.c40
-rw-r--r--net/mac80211/rate.c40
-rw-r--r--net/mac80211/rx.c101
-rw-r--r--net/mac80211/s1g.c16
-rw-r--r--net/mac80211/scan.c43
-rw-r--r--net/mac80211/sta_info.c4
-rw-r--r--net/mac80211/sta_info.h3
-rw-r--r--net/mac80211/status.c229
-rw-r--r--net/mac80211/trace.h33
-rw-r--r--net/mac80211/tx.c249
-rw-r--r--net/mac80211/util.c200
-rw-r--r--net/mac80211/vht.c12
-rw-r--r--net/mac802154/tx.c8
-rw-r--r--net/mptcp/mib.c9
-rw-r--r--net/mptcp/mib.h9
-rw-r--r--net/mptcp/options.c141
-rw-r--r--net/mptcp/pm.c94
-rw-r--r--net/mptcp/pm_netlink.c344
-rw-r--r--net/mptcp/protocol.c576
-rw-r--r--net/mptcp/protocol.h73
-rw-r--r--net/mptcp/subflow.c145
-rw-r--r--net/ncsi/ncsi-netlink.c6
-rw-r--r--net/netfilter/Kconfig1
-rw-r--r--net/netfilter/core.c129
-rw-r--r--net/netfilter/ipset/ip_set_core.c17
-rw-r--r--net/netfilter/ipvs/Kconfig1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c19
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c25
-rw-r--r--net/netfilter/nf_conntrack_netlink.c27
-rw-r--r--net/netfilter/nf_conntrack_proto.c2
-rw-r--r--net/netfilter/nf_conntrack_standalone.c6
-rw-r--r--net/netfilter/nf_flow_table_core.c12
-rw-r--r--net/netfilter/nf_flow_table_ip.c45
-rw-r--r--net/netfilter/nf_log_common.c12
-rw-r--r--net/netfilter/nf_tables_api.c191
-rw-r--r--net/netfilter/nf_tables_core.c15
-rw-r--r--net/netfilter/nf_tables_offload.c2
-rw-r--r--net/netfilter/nfnetlink.c19
-rw-r--r--net/netfilter/nft_bitwise.c141
-rw-r--r--net/netfilter/nft_chain_filter.c35
-rw-r--r--net/netfilter/nft_cmp.c13
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/nft_payload.c28
-rw-r--r--net/netfilter/nft_socket.c27
-rw-r--r--net/netfilter/xt_HMARK.c2
-rw-r--r--net/netlabel/netlabel_calipso.c10
-rw-r--r--net/netlabel/netlabel_cipso_v4.c6
-rw-r--r--net/netlabel/netlabel_domainhash.c5
-rw-r--r--net/netlabel/netlabel_mgmt.c6
-rw-r--r--net/netlabel/netlabel_unlabeled.c6
-rw-r--r--net/netlink/af_netlink.c68
-rw-r--r--net/netlink/genetlink.c382
-rw-r--r--net/netlink/policy.c298
-rw-r--r--net/nfc/digital_dep.c3
-rw-r--r--net/openvswitch/actions.c40
-rw-r--r--net/openvswitch/conntrack.c32
-rw-r--r--net/openvswitch/datapath.c70
-rw-r--r--net/openvswitch/flow_table.c70
-rw-r--r--net/openvswitch/flow_table.h1
-rw-r--r--net/openvswitch/meter.c6
-rw-r--r--net/openvswitch/vport-internal_dev.c28
-rw-r--r--net/openvswitch/vport.c7
-rw-r--r--net/packet/af_packet.c41
-rw-r--r--net/psample/psample.c6
-rw-r--r--net/qrtr/ns.c68
-rw-r--r--net/qrtr/qrtr.c21
-rw-r--r--net/rds/cong.c2
-rw-r--r--net/rds/ib_cm.c2
-rw-r--r--net/rds/ib_recv.c6
-rw-r--r--net/rds/rdma.c2
-rw-r--r--net/rxrpc/af_rxrpc.c7
-rw-r--r--net/rxrpc/ar-internal.h78
-rw-r--r--net/rxrpc/call_accept.c263
-rw-r--r--net/rxrpc/call_object.c48
-rw-r--r--net/rxrpc/conn_client.c1092
-rw-r--r--net/rxrpc/conn_event.c28
-rw-r--r--net/rxrpc/conn_object.c12
-rw-r--r--net/rxrpc/conn_service.c7
-rw-r--r--net/rxrpc/key.c20
-rw-r--r--net/rxrpc/local_object.c4
-rw-r--r--net/rxrpc/net_ns.c5
-rw-r--r--net/rxrpc/output.c6
-rw-r--r--net/rxrpc/proc.c2
-rw-r--r--net/rxrpc/recvmsg.c36
-rw-r--r--net/rxrpc/rtt.c1
-rw-r--r--net/rxrpc/rxkad.c8
-rw-r--r--net/rxrpc/sendmsg.c15
-rw-r--r--net/rxrpc/sysctl.c10
-rw-r--r--net/sched/act_api.c59
-rw-r--r--net/sched/act_bpf.c4
-rw-r--r--net/sched/act_connmark.c1
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_ct.c10
-rw-r--r--net/sched/act_ctinfo.c8
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_gate.c7
-rw-r--r--net/sched/act_ife.c47
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_mpls.c20
-rw-r--r--net/sched/act_nat.c3
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c4
-rw-r--r--net/sched/act_vlan.c42
-rw-r--r--net/sched/cls_flower.c5
-rw-r--r--net/sched/cls_u32.c8
-rw-r--r--net/sched/sch_generic.c71
-rw-r--r--net/sched/sch_taprio.c28
-rw-r--r--net/sctp/associola.c4
-rw-r--r--net/sctp/auth.c5
-rw-r--r--net/sctp/bind_addr.c2
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/protocol.c8
-rw-r--r--net/sctp/sm_make_chunk.c6
-rw-r--r--net/sctp/socket.c9
-rw-r--r--net/sctp/ulpqueue.c2
-rw-r--r--net/smc/af_smc.c881
-rw-r--r--net/smc/smc.h19
-rw-r--r--net/smc/smc_cdc.c4
-rw-r--r--net/smc/smc_clc.c500
-rw-r--r--net/smc/smc_clc.h250
-rw-r--r--net/smc/smc_close.c4
-rw-r--r--net/smc/smc_core.c82
-rw-r--r--net/smc/smc_core.h24
-rw-r--r--net/smc/smc_diag.c30
-rw-r--r--net/smc/smc_ism.c32
-rw-r--r--net/smc/smc_ism.h8
-rw-r--r--net/smc/smc_llc.c21
-rw-r--r--net/smc/smc_netns.h1
-rw-r--r--net/smc/smc_pnet.c174
-rw-r--r--net/smc/smc_pnet.h15
-rw-r--r--net/smc/smc_tx.c10
-rw-r--r--net/socket.c14
-rw-r--r--net/sunrpc/Kconfig1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c276
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c95
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c87
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c65
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/svcsock.c2
-rw-r--r--net/sunrpc/sysctl.c6
-rw-r--r--net/switchdev/switchdev.c2
-rw-r--r--net/tipc/core.c6
-rw-r--r--net/tipc/core.h8
-rw-r--r--net/tipc/crypto.c981
-rw-r--r--net/tipc/crypto.h43
-rw-r--r--net/tipc/group.c14
-rw-r--r--net/tipc/link.c13
-rw-r--r--net/tipc/msg.c6
-rw-r--r--net/tipc/msg.h8
-rw-r--r--net/tipc/name_distr.c10
-rw-r--r--net/tipc/net.c20
-rw-r--r--net/tipc/net.h1
-rw-r--r--net/tipc/netlink.c2
-rw-r--r--net/tipc/netlink_compat.c6
-rw-r--r--net/tipc/node.c96
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/socket.c8
-rw-r--r--net/tipc/sysctl.c9
-rw-r--r--net/tipc/topsrv.c1
-rw-r--r--net/tipc/udp_media.c1
-rw-r--r--net/tls/tls_device.c11
-rw-r--r--net/tls/tls_main.c27
-rw-r--r--net/tls/tls_sw.c9
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/unix/scm.c1
-rw-r--r--net/wimax/stack.c6
-rw-r--r--net/wireless/Kconfig1
-rw-r--r--net/wireless/chan.c135
-rw-r--r--net/wireless/core.c8
-rw-r--r--net/wireless/core.h9
-rw-r--r--net/wireless/lib80211.c2
-rw-r--r--net/wireless/mlme.c14
-rw-r--r--net/wireless/nl80211.c520
-rw-r--r--net/wireless/radiotap.c1
-rw-r--r--net/wireless/reg.c329
-rw-r--r--net/wireless/scan.c585
-rw-r--r--net/wireless/sme.c2
-rw-r--r--net/wireless/util.c34
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--net/xdp/xdp_umem.c242
-rw-r--r--net/xdp/xdp_umem.h6
-rw-r--r--net/xdp/xsk.c236
-rw-r--r--net/xdp/xsk.h11
-rw-r--r--net/xdp/xsk_buff_pool.c377
-rw-r--r--net/xdp/xsk_diag.c20
-rw-r--r--net/xdp/xsk_queue.h18
-rw-r--r--net/xdp/xskmap.c15
-rw-r--r--net/xfrm/Kconfig11
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/espintcp.c6
-rw-r--r--net/xfrm/xfrm_compat.c625
-rw-r--r--net/xfrm/xfrm_interface.c33
-rw-r--r--net/xfrm/xfrm_state.c119
-rw-r--r--net/xfrm/xfrm_user.c110
418 files changed, 19725 insertions, 8601 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index d4bcfd8f95bf..f292e0267bb9 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -51,12 +51,16 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
__be16 vlan_proto, u16 vlan_id)
{
struct net_device **array;
- unsigned int pidx, vidx;
+ unsigned int vidx;
unsigned int size;
+ int pidx;
ASSERT_RTNL();
pidx = vlan_proto_idx(vlan_proto);
+ if (pidx < 0)
+ return -EINVAL;
+
vidx = vlan_id / VLAN_GROUP_ARRAY_PART_LEN;
array = vg->vlan_devices_arrays[pidx][vidx];
if (array != NULL)
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index bb7ec1a3915d..953405362795 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -36,7 +36,7 @@ struct vlan_info {
struct rcu_head rcu;
};
-static inline unsigned int vlan_proto_idx(__be16 proto)
+static inline int vlan_proto_idx(__be16 proto)
{
switch (proto) {
case htons(ETH_P_8021Q):
@@ -44,8 +44,8 @@ static inline unsigned int vlan_proto_idx(__be16 proto)
case htons(ETH_P_8021AD):
return VLAN_PROTO_8021AD;
default:
- BUG();
- return 0;
+ WARN(1, "invalid VLAN protocol: 0x%04x\n", ntohs(proto));
+ return -EINVAL;
}
}
@@ -64,17 +64,24 @@ static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
__be16 vlan_proto,
u16 vlan_id)
{
- return __vlan_group_get_device(vg, vlan_proto_idx(vlan_proto), vlan_id);
+ int pidx = vlan_proto_idx(vlan_proto);
+
+ if (pidx < 0)
+ return NULL;
+
+ return __vlan_group_get_device(vg, pidx, vlan_id);
}
static inline void vlan_group_set_device(struct vlan_group *vg,
__be16 vlan_proto, u16 vlan_id,
struct net_device *dev)
{
+ int pidx = vlan_proto_idx(vlan_proto);
struct net_device **array;
- if (!vg)
+
+ if (!vg || pidx < 0)
return;
- array = vg->vlan_devices_arrays[vlan_proto_idx(vlan_proto)]
+ array = vg->vlan_devices_arrays[pidx]
[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
}
diff --git a/net/Kconfig b/net/Kconfig
index 3831206977a1..d6567162c1cf 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -434,7 +434,6 @@ config NET_SOCK_MSG
config NET_DEVLINK
bool
default n
- imply NET_DROP_MONITOR
config PAGE_POOL
bool
diff --git a/net/atm/lec.c b/net/atm/lec.c
index b570ef919c28..dbabb65d8b67 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1070,7 +1070,7 @@ module_exit(lane_module_cleanup);
/*
* LANE2: 3.1.3, LE_RESOLVE.request
* Non force allocates memory and fills in *tlvs, fills in *sizeoftlvs.
- * If sizeoftlvs == NULL the default TLVs associated with with this
+ * If sizeoftlvs == NULL the default TLVs associated with this
* lec will be used.
* If dst_mac == NULL, targetless LE_ARP will be sent
*/
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index fbd0c5e7b299..5de06ab8ed75 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -52,7 +52,7 @@ static void modify_qos(struct atm_vcc *vcc, struct atmsvc_msg *msg)
msg->type = as_okay;
}
/*
- * Should probably just turn around the old skb. But the, the buffer
+ * Should probably just turn around the old skb. But then, the buffer
* space accounting needs to follow the change too. Maybe later.
*/
while (!(skb = alloc_skb(sizeof(struct atmsvc_msg), GFP_KERNEL)))
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a4faf5f904d9..206d0b424712 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -27,6 +27,7 @@
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/pkt_sched.h>
+#include <linux/prandom.h>
#include <linux/printk.h>
#include <linux/random.h>
#include <linux/rculist.h>
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index d35aca0e969a..79a7dfc32e76 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -20,6 +20,7 @@
#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
+#include <linux/prandom.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 717fe657561d..8c1148fc73d7 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -20,6 +20,7 @@
#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
+#include <linux/prandom.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 8500f56cbd10..ba0027d1f2df 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -25,6 +25,7 @@
#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
+#include <linux/preempt.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
@@ -83,11 +84,12 @@ static inline u32 batadv_choose_claim(const void *data, u32 size)
*/
static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
{
- const struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
+ const struct batadv_bla_backbone_gw *gw;
u32 hash = 0;
- hash = jhash(&claim->addr, sizeof(claim->addr), hash);
- hash = jhash(&claim->vid, sizeof(claim->vid), hash);
+ gw = (struct batadv_bla_backbone_gw *)data;
+ hash = jhash(&gw->orig, sizeof(gw->orig), hash);
+ hash = jhash(&gw->vid, sizeof(gw->vid), hash);
return hash % size;
}
@@ -1579,13 +1581,16 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
}
/**
- * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup.
+ * batadv_bla_check_duplist() - Check if a frame is in the broadcast dup.
* @bat_priv: the bat priv with all the soft interface information
- * @skb: contains the bcast_packet to be checked
+ * @skb: contains the multicast packet to be checked
+ * @payload_ptr: pointer to position inside the head buffer of the skb
+ * marking the start of the data to be CRC'ed
+ * @orig: originator mac address, NULL if unknown
*
- * check if it is on our broadcast list. Another gateway might
- * have sent the same packet because it is connected to the same backbone,
- * so we have to remove this duplicate.
+ * Check if it is on our broadcast list. Another gateway might have sent the
+ * same packet because it is connected to the same backbone, so we have to
+ * remove this duplicate.
*
* This is performed by checking the CRC, which will tell us
* with a good chance that it is the same packet. If it is furthermore
@@ -1594,19 +1599,17 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
*
* Return: true if a packet is in the duplicate list, false otherwise.
*/
-bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
- struct sk_buff *skb)
+static bool batadv_bla_check_duplist(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, u8 *payload_ptr,
+ const u8 *orig)
{
- int i, curr;
- __be32 crc;
- struct batadv_bcast_packet *bcast_packet;
struct batadv_bcast_duplist_entry *entry;
bool ret = false;
-
- bcast_packet = (struct batadv_bcast_packet *)skb->data;
+ int i, curr;
+ __be32 crc;
/* calculate the crc ... */
- crc = batadv_skb_crc32(skb, (u8 *)(bcast_packet + 1));
+ crc = batadv_skb_crc32(skb, payload_ptr);
spin_lock_bh(&bat_priv->bla.bcast_duplist_lock);
@@ -1625,8 +1628,21 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
if (entry->crc != crc)
continue;
- if (batadv_compare_eth(entry->orig, bcast_packet->orig))
- continue;
+ /* are the originators both known and not anonymous? */
+ if (orig && !is_zero_ether_addr(orig) &&
+ !is_zero_ether_addr(entry->orig)) {
+ /* If known, check if the new frame came from
+ * the same originator:
+ * We are safe to take identical frames from the
+ * same orig, if known, as multiplications in
+ * the mesh are detected via the (orig, seqno) pair.
+ * So we can be a bit more liberal here and allow
+ * identical frames from the same orig which the source
+ * host might have sent multiple times on purpose.
+ */
+ if (batadv_compare_eth(entry->orig, orig))
+ continue;
+ }
/* this entry seems to match: same crc, not too old,
* and from another gw. therefore return true to forbid it.
@@ -1642,7 +1658,14 @@ bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
entry = &bat_priv->bla.bcast_duplist[curr];
entry->crc = crc;
entry->entrytime = jiffies;
- ether_addr_copy(entry->orig, bcast_packet->orig);
+
+ /* known originator */
+ if (orig)
+ ether_addr_copy(entry->orig, orig);
+ /* anonymous originator */
+ else
+ eth_zero_addr(entry->orig);
+
bat_priv->bla.bcast_duplist_curr = curr;
out:
@@ -1652,6 +1675,48 @@ out:
}
/**
+ * batadv_bla_check_ucast_duplist() - Check if a frame is in the broadcast dup.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: contains the multicast packet to be checked, decapsulated from a
+ * unicast_packet
+ *
+ * Check if it is on our broadcast list. Another gateway might have sent the
+ * same packet because it is connected to the same backbone, so we have to
+ * remove this duplicate.
+ *
+ * Return: true if a packet is in the duplicate list, false otherwise.
+ */
+static bool batadv_bla_check_ucast_duplist(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return batadv_bla_check_duplist(bat_priv, skb, (u8 *)skb->data, NULL);
+}
+
+/**
+ * batadv_bla_check_bcast_duplist() - Check if a frame is in the broadcast dup.
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: contains the bcast_packet to be checked
+ *
+ * Check if it is on our broadcast list. Another gateway might have sent the
+ * same packet because it is connected to the same backbone, so we have to
+ * remove this duplicate.
+ *
+ * Return: true if a packet is in the duplicate list, false otherwise.
+ */
+bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ struct batadv_bcast_packet *bcast_packet;
+ u8 *payload_ptr;
+
+ bcast_packet = (struct batadv_bcast_packet *)skb->data;
+ payload_ptr = (u8 *)(bcast_packet + 1);
+
+ return batadv_bla_check_duplist(bat_priv, skb, payload_ptr,
+ bcast_packet->orig);
+}
+
+/**
* batadv_bla_is_backbone_gw_orig() - Check if the originator is a gateway for
* the VLAN identified by vid.
* @bat_priv: the bat priv with all the soft interface information
@@ -1798,7 +1863,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
ret = queue_work(batadv_event_workqueue, &backbone_gw->report_work);
- /* backbone_gw is unreferenced in the report work function function
+ /* backbone_gw is unreferenced in the report work function
* if queue_work() call was successful
*/
if (!ret)
@@ -1812,7 +1877,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
* @bat_priv: the bat priv with all the soft interface information
* @skb: the frame to be checked
* @vid: the VLAN ID of the frame
- * @is_bcast: the packet came in a broadcast packet type.
+ * @packet_type: the batman packet type this frame came in
*
* batadv_bla_rx avoidance checks if:
* * we have to race for a claim
@@ -1824,7 +1889,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
* further process the skb.
*/
bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
- unsigned short vid, bool is_bcast)
+ unsigned short vid, int packet_type)
{
struct batadv_bla_backbone_gw *backbone_gw;
struct ethhdr *ethhdr;
@@ -1846,9 +1911,32 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
goto handled;
if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
- /* don't allow broadcasts while requests are in flight */
- if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)
- goto handled;
+ /* don't allow multicast packets while requests are in flight */
+ if (is_multicast_ether_addr(ethhdr->h_dest))
+ /* Both broadcast flooding or multicast-via-unicasts
+ * delivery might send to multiple backbone gateways
+ * sharing the same LAN and therefore need to coordinate
+ * which backbone gateway forwards into the LAN,
+ * by claiming the payload source address.
+ *
+ * Broadcast flooding and multicast-via-unicasts
+ * delivery use the following two batman packet types.
+ * Note: explicitly exclude BATADV_UNICAST_4ADDR,
+ * as the DHCP gateway feature will send explicitly
+ * to only one BLA gateway, so the claiming process
+ * should be avoided there.
+ */
+ if (packet_type == BATADV_BCAST ||
+ packet_type == BATADV_UNICAST)
+ goto handled;
+
+ /* potential duplicates from foreign BLA backbone gateways via
+ * multicast-in-unicast packets
+ */
+ if (is_multicast_ether_addr(ethhdr->h_dest) &&
+ packet_type == BATADV_UNICAST &&
+ batadv_bla_check_ucast_duplist(bat_priv, skb))
+ goto handled;
ether_addr_copy(search_claim.addr, ethhdr->h_source);
search_claim.vid = vid;
@@ -1883,13 +1971,14 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
goto allow;
}
- /* if it is a broadcast ... */
- if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast) {
+ /* if it is a multicast ... */
+ if (is_multicast_ether_addr(ethhdr->h_dest) &&
+ (packet_type == BATADV_BCAST || packet_type == BATADV_UNICAST)) {
/* ... drop it. the responsible gateway is in charge.
*
- * We need to check is_bcast because with the gateway
+ * We need to check packet type because with the gateway
* feature, broadcasts (like DHCP requests) may be sent
- * using a unicast packet type.
+ * using a unicast 4 address packet type. See comment above.
*/
goto handled;
} else {
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 41edb2c4a327..a81c41b636f9 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -35,7 +35,7 @@ static inline bool batadv_bla_is_loopdetect_mac(const uint8_t *mac)
#ifdef CONFIG_BATMAN_ADV_BLA
bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
- unsigned short vid, bool is_bcast);
+ unsigned short vid, int packet_type);
bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid);
bool batadv_bla_is_backbone_gw(struct sk_buff *skb,
@@ -66,7 +66,7 @@ bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
static inline bool batadv_bla_rx(struct batadv_priv *bat_priv,
struct sk_buff *skb, unsigned short vid,
- bool is_bcast)
+ int packet_type)
{
return false;
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 9fdbe3068153..9a47ef8b95c4 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -306,7 +306,7 @@ free:
* set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
* to NULL; 3) Error: Return false and free skb.
*
- * Return: true when the packet is merged or buffered, false when skb is not not
+ * Return: true when the packet is merged or buffered, false when skb is not
* used.
*/
bool batadv_frag_skb_buffer(struct sk_buff **skb,
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index fa06b51c0144..dad99641df2a 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -599,7 +599,7 @@ out:
/* report to the other components the maximum amount of bytes that
* batman-adv can send over the wire (without considering the payload
* overhead). For example, this value is used by TT to compute the
- * maximum local table table size
+ * maximum local table size
*/
atomic_set(&bat_priv->packet_size_max, min_mtu);
@@ -977,23 +977,6 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_hardif_remove_interfaces() - Remove all hard interfaces
- */
-void batadv_hardif_remove_interfaces(void)
-{
- struct batadv_hard_iface *hard_iface, *hard_iface_tmp;
-
- rtnl_lock();
- list_for_each_entry_safe(hard_iface, hard_iface_tmp,
- &batadv_hardif_list, list) {
- list_del_rcu(&hard_iface->list);
- batadv_hardif_generation++;
- batadv_hardif_remove_interface(hard_iface);
- }
- rtnl_unlock();
-}
-
-/**
* batadv_hard_if_event_softif() - Handle events for soft interfaces
* @event: NETDEV_* event to handle
* @net_dev: net_device which generated an event
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index bad2e50135e8..b1855d9d0b06 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -100,7 +100,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
struct net *net, const char *iface_name);
void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
enum batadv_hard_if_cleanup autodel);
-void batadv_hardif_remove_interfaces(void);
int batadv_hardif_min_mtu(struct net_device *soft_iface);
void batadv_update_min_mtu(struct net_device *soft_iface);
void batadv_hardif_release(struct kref *ref);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 519c08c2cfba..70fee9b42e25 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -137,7 +137,6 @@ static void __exit batadv_exit(void)
batadv_netlink_unregister();
rtnl_link_unregister(&batadv_link_ops);
unregister_netdevice_notifier(&batadv_hard_if_notifier);
- batadv_hardif_remove_interfaces();
flush_workqueue(batadv_event_workqueue);
destroy_workqueue(batadv_event_workqueue);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 0393bb9ed3d0..a47dc332d796 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2020.3"
+#define BATADV_SOURCE_VERSION "2020.4"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index bdc4a1fba1c6..9af99c39b9fd 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -51,6 +51,7 @@
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
+#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
@@ -207,7 +208,7 @@ static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
/* TODO: ask the bridge if a multicast router is present (the bridge
- * is capable of performing proper RFC4286 multicast multicast router
+ * is capable of performing proper RFC4286 multicast router
* discovery) instead of searching for a ff02::2 listener here
*/
ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
@@ -220,7 +221,7 @@ static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
* address here, only IPv6 ones
*/
if (br_ip_entry->addr.proto == htons(ETH_P_IPV6) &&
- ipv6_addr_is_ll_all_routers(&br_ip_entry->addr.u.ip6))
+ ipv6_addr_is_ll_all_routers(&br_ip_entry->addr.dst.ip6))
flags &= ~BATADV_MCAST_WANT_NO_RTR6;
list_del(&br_ip_entry->list);
@@ -561,10 +562,10 @@ out:
static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src)
{
if (src->proto == htons(ETH_P_IP))
- ip_eth_mc_map(src->u.ip4, dst);
+ ip_eth_mc_map(src->dst.ip4, dst);
#if IS_ENABLED(CONFIG_IPV6)
else if (src->proto == htons(ETH_P_IPV6))
- ipv6_eth_mc_map(&src->u.ip6, dst);
+ ipv6_eth_mc_map(&src->dst.ip6, dst);
#endif
else
eth_zero_addr(dst);
@@ -608,11 +609,11 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
continue;
if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
- ipv4_is_local_multicast(br_ip_entry->addr.u.ip4))
+ ipv4_is_local_multicast(br_ip_entry->addr.dst.ip4))
continue;
if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR4) &&
- !ipv4_is_local_multicast(br_ip_entry->addr.u.ip4))
+ !ipv4_is_local_multicast(br_ip_entry->addr.dst.ip4))
continue;
}
@@ -622,11 +623,11 @@ static int batadv_mcast_mla_bridge_get(struct net_device *dev,
continue;
if (tvlv_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
- ipv6_addr_is_ll_all_nodes(&br_ip_entry->addr.u.ip6))
+ ipv6_addr_is_ll_all_nodes(&br_ip_entry->addr.dst.ip6))
continue;
if (!(tvlv_flags & BATADV_MCAST_WANT_NO_RTR6) &&
- IPV6_ADDR_MC_SCOPE(&br_ip_entry->addr.u.ip6) >
+ IPV6_ADDR_MC_SCOPE(&br_ip_entry->addr.dst.ip6) >
IPV6_ADDR_SCOPE_LINKLOCAL)
continue;
}
@@ -1435,6 +1436,35 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
/**
+ * batadv_mcast_forw_send_orig() - send a multicast packet to an originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to send
+ * @vid: the vlan identifier
+ * @orig_node: the originator to send the packet to
+ *
+ * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ */
+int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node)
+{
+ /* Avoid sending multicast-in-unicast packets to other BLA
+ * gateways - they already got the frame from the LAN side
+ * we share with them.
+ * TODO: Refactor to take BLA into account earlier, to avoid
+ * reducing the mcast_fanout count.
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) {
+ dev_kfree_skb(skb);
+ return NET_XMIT_SUCCESS;
+ }
+
+ return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0,
+ orig_node, vid);
+}
+
+/**
* batadv_mcast_forw_tt() - forwards a packet to multicast listeners
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast packet to transmit
@@ -1471,8 +1501,8 @@ batadv_mcast_forw_tt(struct batadv_priv *bat_priv, struct sk_buff *skb,
break;
}
- batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
- orig_entry->orig_node, vid);
+ batadv_mcast_forw_send_orig(bat_priv, newskb, vid,
+ orig_entry->orig_node);
}
rcu_read_unlock();
@@ -1513,8 +1543,7 @@ batadv_mcast_forw_want_all_ipv4(struct batadv_priv *bat_priv,
break;
}
- batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
- orig_node, vid);
+ batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node);
}
rcu_read_unlock();
return ret;
@@ -1551,8 +1580,7 @@ batadv_mcast_forw_want_all_ipv6(struct batadv_priv *bat_priv,
break;
}
- batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
- orig_node, vid);
+ batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node);
}
rcu_read_unlock();
return ret;
@@ -1618,8 +1646,7 @@ batadv_mcast_forw_want_all_rtr4(struct batadv_priv *bat_priv,
break;
}
- batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
- orig_node, vid);
+ batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node);
}
rcu_read_unlock();
return ret;
@@ -1656,8 +1683,7 @@ batadv_mcast_forw_want_all_rtr6(struct batadv_priv *bat_priv,
break;
}
- batadv_send_skb_unicast(bat_priv, newskb, BATADV_UNICAST, 0,
- orig_node, vid);
+ batadv_mcast_forw_send_orig(bat_priv, newskb, vid, orig_node);
}
rcu_read_unlock();
return ret;
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index ebf825991ecd..3e114bc5ca3b 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -46,6 +46,11 @@ enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_orig_node **mcast_single_orig);
+int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node);
+
int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid);
@@ -72,6 +77,16 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
static inline int
+batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+
+static inline int
batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid)
{
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index dc193618a761..c7a55647b520 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1350,7 +1350,7 @@ static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
}
}
-static const struct genl_ops batadv_netlink_ops[] = {
+static const struct genl_small_ops batadv_netlink_ops[] = {
{
.cmd = BATADV_CMD_GET_MESH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -1484,8 +1484,8 @@ struct genl_family batadv_netlink_family __ro_after_init = {
.pre_doit = batadv_pre_doit,
.post_doit = batadv_post_doit,
.module = THIS_MODULE,
- .ops = batadv_netlink_ops,
- .n_ops = ARRAY_SIZE(batadv_netlink_ops),
+ .small_ops = batadv_netlink_ops,
+ .n_small_ops = ARRAY_SIZE(batadv_netlink_ops),
.mcgrps = batadv_netlink_mcgrps,
.n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps),
};
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 48d707850f3e..61ddd6d709a0 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -26,8 +26,8 @@
#include <linux/lockdep.h>
#include <linux/net.h>
#include <linux/netdevice.h>
+#include <linux/prandom.h>
#include <linux/printk.h>
-#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
@@ -250,7 +250,7 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
/**
* batadv_nc_packet_free() - frees nc packet
* @nc_packet: the nc packet to free
- * @dropped: whether the packet is freed because is is dropped
+ * @dropped: whether the packet is freed because is dropped
*/
static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet,
bool dropped)
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 27cdf5e4349a..9e5c71e406ff 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -826,6 +826,10 @@ static bool batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
vid = batadv_get_vid(skb, hdr_len);
ethhdr = (struct ethhdr *)(skb->data + hdr_len);
+ /* do not reroute multicast frames in a unicast header */
+ if (is_multicast_ether_addr(ethhdr->h_dest))
+ return true;
+
/* check if the destination client was served by this node and it is now
* roaming. In this case, it means that the node has got a ROAM_ADV
* message and that it knows the new destination in the mesh to re-route
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index d267b94800d6..87017332b567 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -461,7 +461,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
/**
* batadv_forw_packet_free() - free a forwarding packet
* @forw_packet: The packet to free
- * @dropped: whether the packet is freed because is is dropped
+ * @dropped: whether the packet is freed because is dropped
*
* This frees a forwarding packet and releases any resources it might
* have claimed.
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 23833a0ba5e6..82e7ca886605 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -364,9 +364,8 @@ send:
goto dropped;
ret = batadv_send_skb_via_gw(bat_priv, skb, vid);
} else if (mcast_single_orig) {
- ret = batadv_send_skb_unicast(bat_priv, skb,
- BATADV_UNICAST, 0,
- mcast_single_orig, vid);
+ ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid,
+ mcast_single_orig);
} else if (forw_mode == BATADV_FORW_SOME) {
ret = batadv_mcast_forw_send(bat_priv, skb, vid);
} else {
@@ -425,10 +424,10 @@ void batadv_interface_rx(struct net_device *soft_iface,
struct vlan_ethhdr *vhdr;
struct ethhdr *ethhdr;
unsigned short vid;
- bool is_bcast;
+ int packet_type;
batadv_bcast_packet = (struct batadv_bcast_packet *)skb->data;
- is_bcast = (batadv_bcast_packet->packet_type == BATADV_BCAST);
+ packet_type = batadv_bcast_packet->packet_type;
skb_pull_rcsum(skb, hdr_size);
skb_reset_mac_header(skb);
@@ -471,7 +470,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
/* Let the bridge loop avoidance check the packet. If will
* not handle it, we can safely push it up.
*/
- if (batadv_bla_rx(bat_priv, skb, vid, is_bcast))
+ if (batadv_bla_rx(bat_priv, skb, vid, packet_type))
goto out;
if (orig_node)
@@ -649,7 +648,7 @@ static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
/**
* batadv_interface_add_vid() - ndo_add_vid API implementation
* @dev: the netdev of the mesh interface
- * @proto: protocol of the the vlan id
+ * @proto: protocol of the vlan id
* @vid: identifier of the new vlan
*
* Set up all the internal structures for handling the new vlan on top of the
@@ -707,7 +706,7 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
/**
* batadv_interface_kill_vid() - ndo_kill_vid API implementation
* @dev: the netdev of the mesh interface
- * @proto: protocol of the the vlan id
+ * @proto: protocol of the vlan id
* @vid: identifier of the deleted vlan
*
* Destroy all the internal structures used to handle the vlan identified by vid
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ed519efa3c36..965336a3b89d 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1492,7 +1492,7 @@ struct batadv_tp_vars {
/** @unacked_lock: protect unacked_list */
spinlock_t unacked_lock;
- /** @last_recv_time: time time (jiffies) a msg was received */
+ /** @last_recv_time: time (jiffies) a msg was received */
unsigned long last_recv_time;
/** @refcount: number of context where the object is used */
@@ -1996,7 +1996,7 @@ struct batadv_tt_change_node {
*/
struct batadv_tt_req_node {
/**
- * @addr: mac address address of the originator this request was sent to
+ * @addr: mac address of the originator this request was sent to
*/
u8 addr[ETH_ALEN];
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index e2497d764e97..64e669acd42f 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -64,7 +64,6 @@ source "net/bluetooth/hidp/Kconfig"
config BT_HS
bool "Bluetooth High Speed (HS) features"
depends on BT_BREDR
- default y
help
Bluetooth High Speed includes support for off-loading
Bluetooth connections via 802.11 (wifi) physical layer
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 26526be579c7..da7fd7c8c2dc 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -226,6 +226,9 @@ static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_info_req req;
found = true;
+
+ memset(&req, 0, sizeof(req));
+
req.id = cl->id;
a2mp_send(mgr, A2MP_GETINFO_REQ, __next_ident(mgr),
sizeof(req), &req);
@@ -305,6 +308,8 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb,
if (!hdev || hdev->dev_type != HCI_AMP) {
struct a2mp_info_rsp rsp;
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.id = req->id;
rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
@@ -348,6 +353,8 @@ static int a2mp_getinfo_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
if (!ctrl)
return -ENOMEM;
+ memset(&req, 0, sizeof(req));
+
req.id = rsp->id;
a2mp_send(mgr, A2MP_GETAMPASSOC_REQ, __next_ident(mgr), sizeof(req),
&req);
@@ -376,6 +383,8 @@ static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_amp_assoc_rsp rsp;
rsp.id = req->id;
+ memset(&rsp, 0, sizeof(rsp));
+
if (tmp) {
rsp.status = A2MP_STATUS_COLLISION_OCCURED;
amp_mgr_put(tmp);
@@ -464,7 +473,6 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_cmd *hdr)
{
struct a2mp_physlink_req *req = (void *) skb->data;
-
struct a2mp_physlink_rsp rsp;
struct hci_dev *hdev;
struct hci_conn *hcon;
@@ -475,6 +483,8 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
BT_DBG("local_id %d, remote_id %d", req->local_id, req->remote_id);
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.local_id = req->remote_id;
rsp.remote_id = req->local_id;
@@ -553,6 +563,8 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
BT_DBG("local_id %d remote_id %d", req->local_id, req->remote_id);
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.local_id = req->remote_id;
rsp.remote_id = req->local_id;
rsp.status = A2MP_STATUS_SUCCESS;
@@ -675,6 +687,8 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
if (err) {
struct a2mp_cmd_rej rej;
+ memset(&rej, 0, sizeof(rej));
+
rej.reason = cpu_to_le16(0);
hdr = (void *) skb->data;
@@ -898,6 +912,8 @@ void a2mp_send_getinfo_rsp(struct hci_dev *hdev)
BT_DBG("%s mgr %p", hdev->name, mgr);
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.id = hdev->id;
rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
@@ -995,6 +1011,8 @@ void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status)
if (!mgr)
return;
+ memset(&rsp, 0, sizeof(rsp));
+
hs_hcon = hci_conn_hash_lookup_state(hdev, AMP_LINK, BT_CONNECT);
if (!hs_hcon) {
rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION;
@@ -1027,6 +1045,8 @@ void a2mp_discover_amp(struct l2cap_chan *chan)
mgr->bredr_chan = chan;
+ memset(&req, 0, sizeof(req));
+
req.mtu = cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU);
req.ext_feat = 0;
a2mp_send(mgr, A2MP_DISCOVER_REQ, 1, sizeof(req), &req);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 9832f8445d43..d0c1024bf600 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1388,7 +1388,7 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
return 0;
}
-/* Encrypt the the link */
+/* Encrypt the link */
static void hci_conn_encrypt(struct hci_conn *conn)
{
BT_DBG("hcon %p", conn);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 68bfe57b6625..502552d6e9af 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -808,7 +808,7 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
* Delete Stored Link Key command. They are clearly indicating its
* absence in the bit mask of supported commands.
*
- * Check the supported commands and only if the the command is marked
+ * Check the supported commands and only if the command is marked
* as supported send it. If not supported assume that the controller
* does not have actual support for stored link keys which makes this
* command redundant anyway.
@@ -2963,7 +2963,7 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
sizeof(adv_instance->scan_rsp_data));
} else {
if (hdev->adv_instance_cnt >= hdev->le_num_of_adv_sets ||
- instance < 1 || instance > HCI_MAX_ADV_INSTANCES)
+ instance < 1 || instance > hdev->le_num_of_adv_sets)
return -EOVERFLOW;
adv_instance = kzalloc(sizeof(*adv_instance), GFP_KERNEL);
@@ -3061,6 +3061,7 @@ static int free_adv_monitor(int id, void *ptr, void *data)
idr_remove(&hdev->adv_monitors_idr, monitor->handle);
hci_free_adv_monitor(monitor);
+ hdev->adv_monitors_cnt--;
return 0;
}
@@ -3077,6 +3078,7 @@ int hci_remove_adv_monitor(struct hci_dev *hdev, u16 handle)
idr_remove(&hdev->adv_monitors_idr, monitor->handle);
hci_free_adv_monitor(monitor);
+ hdev->adv_monitors_cnt--;
} else {
/* Remove all monitors if handle is 0. */
idr_for_each(&hdev->adv_monitors_idr, &free_adv_monitor, hdev);
@@ -3442,6 +3444,16 @@ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
}
}
+static void hci_suspend_clear_tasks(struct hci_dev *hdev)
+{
+ int i;
+
+ for (i = 0; i < __SUSPEND_NUM_TASKS; i++)
+ clear_bit(i, hdev->suspend_tasks);
+
+ wake_up(&hdev->suspend_wait_q);
+}
+
static int hci_suspend_wait_event(struct hci_dev *hdev)
{
#define WAKE_COND \
@@ -3487,12 +3499,24 @@ static int hci_change_suspend_state(struct hci_dev *hdev,
return hci_suspend_wait_event(hdev);
}
+static void hci_clear_wake_reason(struct hci_dev *hdev)
+{
+ hci_dev_lock(hdev);
+
+ hdev->wake_reason = 0;
+ bacpy(&hdev->wake_addr, BDADDR_ANY);
+ hdev->wake_addr_type = 0;
+
+ hci_dev_unlock(hdev);
+}
+
static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
void *data)
{
struct hci_dev *hdev =
container_of(nb, struct hci_dev, suspend_notifier);
int ret = 0;
+ u8 state = BT_RUNNING;
/* If powering down, wait for completion. */
if (mgmt_powering_down(hdev)) {
@@ -3513,15 +3537,27 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
* - Second, program event filter/whitelist and enable scan
*/
ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT);
+ if (!ret)
+ state = BT_SUSPEND_DISCONNECT;
/* Only configure whitelist if disconnect succeeded and wake
* isn't being prevented.
*/
- if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev)))
+ if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev))) {
ret = hci_change_suspend_state(hdev,
BT_SUSPEND_CONFIGURE_WAKE);
+ if (!ret)
+ state = BT_SUSPEND_CONFIGURE_WAKE;
+ }
+
+ hci_clear_wake_reason(hdev);
+ mgmt_suspending(hdev, state);
+
} else if (action == PM_POST_SUSPEND) {
ret = hci_change_suspend_state(hdev, BT_RUNNING);
+
+ mgmt_resuming(hdev, hdev->wake_reason, &hdev->wake_addr,
+ hdev->wake_addr_type);
}
done:
@@ -3784,6 +3820,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
cancel_work_sync(&hdev->power_on);
+ hci_suspend_clear_tasks(hdev);
unregister_pm_notifier(&hdev->suspend_notifier);
cancel_work_sync(&hdev->suspend_prepare);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4b7fc430793c..f04963914366 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2569,7 +2569,6 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_conn_complete *ev = (void *) skb->data;
- struct inquiry_entry *ie;
struct hci_conn *conn;
BT_DBG("%s", hdev->name);
@@ -2578,13 +2577,19 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
if (!conn) {
- /* Connection may not exist if auto-connected. Check the inquiry
- * cache to see if we've already discovered this bdaddr before.
- * If found and link is an ACL type, create a connection class
+ /* Connection may not exist if auto-connected. Check the bredr
+ * allowlist to see if this device is allowed to auto connect.
+ * If link is an ACL type, create a connection class
* automatically.
+ *
+ * Auto-connect will only occur if the event filter is
+ * programmed with a given address. Right now, event filter is
+ * only used during suspend.
*/
- ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
- if (ie && ev->link_type == ACL_LINK) {
+ if (ev->link_type == ACL_LINK &&
+ hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+ &ev->bdaddr,
+ BDADDR_BREDR)) {
conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
HCI_ROLE_SLAVE);
if (!conn) {
@@ -6012,6 +6017,75 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
return true;
}
+static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
+ struct sk_buff *skb)
+{
+ struct hci_ev_le_advertising_info *adv;
+ struct hci_ev_le_direct_adv_info *direct_adv;
+ struct hci_ev_le_ext_adv_report *ext_adv;
+ const struct hci_ev_conn_complete *conn_complete = (void *)skb->data;
+ const struct hci_ev_conn_request *conn_request = (void *)skb->data;
+
+ hci_dev_lock(hdev);
+
+ /* If we are currently suspended and this is the first BT event seen,
+ * save the wake reason associated with the event.
+ */
+ if (!hdev->suspended || hdev->wake_reason)
+ goto unlock;
+
+ /* Default to remote wake. Values for wake_reason are documented in the
+ * Bluez mgmt api docs.
+ */
+ hdev->wake_reason = MGMT_WAKE_REASON_REMOTE_WAKE;
+
+ /* Once configured for remote wakeup, we should only wake up for
+ * reconnections. It's useful to see which device is waking us up so
+ * keep track of the bdaddr of the connection event that woke us up.
+ */
+ if (event == HCI_EV_CONN_REQUEST) {
+ bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+ hdev->wake_addr_type = BDADDR_BREDR;
+ } else if (event == HCI_EV_CONN_COMPLETE) {
+ bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+ hdev->wake_addr_type = BDADDR_BREDR;
+ } else if (event == HCI_EV_LE_META) {
+ struct hci_ev_le_meta *le_ev = (void *)skb->data;
+ u8 subevent = le_ev->subevent;
+ u8 *ptr = &skb->data[sizeof(*le_ev)];
+ u8 num_reports = *ptr;
+
+ if ((subevent == HCI_EV_LE_ADVERTISING_REPORT ||
+ subevent == HCI_EV_LE_DIRECT_ADV_REPORT ||
+ subevent == HCI_EV_LE_EXT_ADV_REPORT) &&
+ num_reports) {
+ adv = (void *)(ptr + 1);
+ direct_adv = (void *)(ptr + 1);
+ ext_adv = (void *)(ptr + 1);
+
+ switch (subevent) {
+ case HCI_EV_LE_ADVERTISING_REPORT:
+ bacpy(&hdev->wake_addr, &adv->bdaddr);
+ hdev->wake_addr_type = adv->bdaddr_type;
+ break;
+ case HCI_EV_LE_DIRECT_ADV_REPORT:
+ bacpy(&hdev->wake_addr, &direct_adv->bdaddr);
+ hdev->wake_addr_type = direct_adv->bdaddr_type;
+ break;
+ case HCI_EV_LE_EXT_ADV_REPORT:
+ bacpy(&hdev->wake_addr, &ext_adv->bdaddr);
+ hdev->wake_addr_type = ext_adv->bdaddr_type;
+ break;
+ }
+ }
+ } else {
+ hdev->wake_reason = MGMT_WAKE_REASON_UNEXPECTED;
+ }
+
+unlock:
+ hci_dev_unlock(hdev);
+}
+
void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_event_hdr *hdr = (void *) skb->data;
@@ -6045,6 +6119,9 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
skb_pull(skb, HCI_EVENT_HDR_SIZE);
+ /* Store wake reason if we're suspended */
+ hci_store_wake_reason(hdev, event, skb);
+
switch (event) {
case HCI_EV_INQUIRY_COMPLETE:
hci_inquiry_complete_evt(hdev, skb);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index e0269192f2e5..6f12bab4d2fa 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1027,6 +1027,9 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
} else if (hci_is_le_conn_scanning(hdev)) {
window = hdev->le_scan_window_connect;
interval = hdev->le_scan_int_connect;
+ } else if (hci_is_adv_monitoring(hdev)) {
+ window = hdev->le_scan_window_adv_monitor;
+ interval = hdev->le_scan_int_adv_monitor;
} else {
window = hdev->le_scan_window;
interval = hdev->le_scan_interval;
@@ -1111,6 +1114,53 @@ static void hci_req_config_le_suspend_scan(struct hci_request *req)
set_bit(SUSPEND_SCAN_ENABLE, req->hdev->suspend_tasks);
}
+static void cancel_adv_timeout(struct hci_dev *hdev)
+{
+ if (hdev->adv_instance_timeout) {
+ hdev->adv_instance_timeout = 0;
+ cancel_delayed_work(&hdev->adv_instance_expire);
+ }
+}
+
+/* This function requires the caller holds hdev->lock */
+static void hci_suspend_adv_instances(struct hci_request *req)
+{
+ bt_dev_dbg(req->hdev, "Suspending advertising instances");
+
+ /* Call to disable any advertisements active on the controller.
+ * This will succeed even if no advertisements are configured.
+ */
+ __hci_req_disable_advertising(req);
+
+ /* If we are using software rotation, pause the loop */
+ if (!ext_adv_capable(req->hdev))
+ cancel_adv_timeout(req->hdev);
+}
+
+/* This function requires the caller holds hdev->lock */
+static void hci_resume_adv_instances(struct hci_request *req)
+{
+ struct adv_info *adv;
+
+ bt_dev_dbg(req->hdev, "Resuming advertising instances");
+
+ if (ext_adv_capable(req->hdev)) {
+ /* Call for each tracked instance to be re-enabled */
+ list_for_each_entry(adv, &req->hdev->adv_instances, list) {
+ __hci_req_enable_ext_advertising(req,
+ adv->instance);
+ }
+
+ } else {
+ /* Schedule for most recent instance to be restarted and begin
+ * the software rotation loop
+ */
+ __hci_req_schedule_adv_instance(req,
+ req->hdev->cur_adv_instance,
+ true);
+ }
+}
+
static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode)
{
bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode,
@@ -1153,7 +1203,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
hdev->discovery_paused = true;
hdev->discovery_old_state = old_state;
- /* Stop advertising */
+ /* Stop directed advertising */
old_state = hci_dev_test_flag(hdev, HCI_ADVERTISING);
if (old_state) {
set_bit(SUSPEND_PAUSE_ADVERTISING, hdev->suspend_tasks);
@@ -1162,6 +1212,10 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
&hdev->discov_off, 0);
}
+ /* Pause other advertisements */
+ if (hdev->adv_instance_cnt)
+ hci_suspend_adv_instances(&req);
+
hdev->advertising_paused = true;
hdev->advertising_old_state = old_state;
/* Disable page scan */
@@ -1212,7 +1266,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
/* Reset passive/background scanning to normal */
hci_req_config_le_suspend_scan(&req);
- /* Unpause advertising */
+ /* Unpause directed advertising */
hdev->advertising_paused = false;
if (hdev->advertising_old_state) {
set_bit(SUSPEND_UNPAUSE_ADVERTISING,
@@ -1223,6 +1277,10 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
hdev->advertising_old_state = 0;
}
+ /* Resume other advertisements */
+ if (hdev->adv_instance_cnt)
+ hci_resume_adv_instances(&req);
+
/* Unpause discovery */
hdev->discovery_paused = false;
if (hdev->discovery_old_state != DISCOVERY_STOPPED &&
@@ -1533,11 +1591,14 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
memset(&cp, 0, sizeof(cp));
- if (instance)
+ /* Extended scan response data doesn't allow a response to be
+ * set if the instance isn't scannable.
+ */
+ if (get_adv_instance_scan_rsp_len(hdev, instance))
len = create_instance_scan_rsp_data(hdev, instance,
cp.data);
else
- len = create_default_scan_rsp_data(hdev, cp.data);
+ len = 0;
if (hdev->scan_rsp_data_len == len &&
!memcmp(cp.data, hdev->scan_rsp_data, len))
@@ -1824,7 +1885,13 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
if (use_rpa) {
int to;
- *own_addr_type = ADDR_LE_DEV_RANDOM;
+ /* If Controller supports LL Privacy use own address type is
+ * 0x03
+ */
+ if (use_ll_privacy(hdev))
+ *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
+ else
+ *own_addr_type = ADDR_LE_DEV_RANDOM;
if (adv_instance) {
if (!adv_instance->rpa_expired &&
@@ -2183,14 +2250,6 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance,
return 0;
}
-static void cancel_adv_timeout(struct hci_dev *hdev)
-{
- if (hdev->adv_instance_timeout) {
- hdev->adv_instance_timeout = 0;
- cancel_delayed_work(&hdev->adv_instance_expire);
- }
-}
-
/* For a single instance:
* - force == true: The instance will be removed even when its remaining
* lifetime is not zero.
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index ade83e224567..1ab27b90ddcb 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7301,9 +7301,10 @@ static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
goto drop;
}
- if ((chan->mode == L2CAP_MODE_ERTM ||
- chan->mode == L2CAP_MODE_STREAMING) && sk_filter(chan->data, skb))
- goto drop;
+ if (chan->ops->filter) {
+ if (chan->ops->filter(chan, skb))
+ goto drop;
+ }
if (!control->sframe) {
int err;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index e1a3e66b1754..f1b1edd0b697 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1521,8 +1521,6 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
parent = bt_sk(sk)->parent;
- sock_set_flag(sk, SOCK_ZAPPED);
-
switch (chan->state) {
case BT_OPEN:
case BT_BOUND:
@@ -1549,8 +1547,11 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
break;
}
-
release_sock(sk);
+
+ /* Only zap after cleanup to avoid use after free race */
+ sock_set_flag(sk, SOCK_ZAPPED);
+
}
static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state,
@@ -1663,6 +1664,19 @@ static void l2cap_sock_suspend_cb(struct l2cap_chan *chan)
sk->sk_state_change(sk);
}
+static int l2cap_sock_filter(struct l2cap_chan *chan, struct sk_buff *skb)
+{
+ struct sock *sk = chan->data;
+
+ switch (chan->mode) {
+ case L2CAP_MODE_ERTM:
+ case L2CAP_MODE_STREAMING:
+ return sk_filter(sk, skb);
+ }
+
+ return 0;
+}
+
static const struct l2cap_ops l2cap_chan_ops = {
.name = "L2CAP Socket Interface",
.new_connection = l2cap_sock_new_connection_cb,
@@ -1678,6 +1692,7 @@ static const struct l2cap_ops l2cap_chan_ops = {
.get_sndtimeo = l2cap_sock_get_sndtimeo_cb,
.get_peer_pid = l2cap_sock_get_peer_pid_cb,
.alloc_skb = l2cap_sock_alloc_skb_cb,
+ .filter = l2cap_sock_filter,
};
static void l2cap_sock_destruct(struct sock *sk)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 5bbe71002fb9..12d7b368b428 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -163,6 +163,8 @@ static const u16 mgmt_events[] = {
MGMT_EV_PHY_CONFIGURATION_CHANGED,
MGMT_EV_EXP_FEATURE_CHANGED,
MGMT_EV_DEVICE_FLAGS_CHANGED,
+ MGMT_EV_CONTROLLER_SUSPEND,
+ MGMT_EV_CONTROLLER_RESUME,
};
static const u16 mgmt_untrusted_commands[] = {
@@ -782,7 +784,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (lmp_ssp_capable(hdev)) {
settings |= MGMT_SETTING_SSP;
- settings |= MGMT_SETTING_HS;
+ if (IS_ENABLED(CONFIG_BT_HS))
+ settings |= MGMT_SETTING_HS;
}
if (lmp_sc_capable(hdev))
@@ -1815,6 +1818,10 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
bt_dev_dbg(hdev, "sock %p", sk);
+ if (!IS_ENABLED(CONFIG_BT_HS))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
+ MGMT_STATUS_NOT_SUPPORTED);
+
status = mgmt_bredr_support(hdev);
if (status)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status);
@@ -4157,7 +4164,7 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
{
struct adv_monitor *monitor = NULL;
struct mgmt_rp_read_adv_monitor_features *rp = NULL;
- int handle;
+ int handle, err;
size_t rp_size = 0;
__u32 supported = 0;
__u16 num_handles = 0;
@@ -4192,9 +4199,13 @@ static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
if (num_handles)
memcpy(&rp->handles, &handles, (num_handles * sizeof(u16)));
- return mgmt_cmd_complete(sk, hdev->id,
- MGMT_OP_READ_ADV_MONITOR_FEATURES,
- MGMT_STATUS_SUCCESS, rp, rp_size);
+ err = mgmt_cmd_complete(sk, hdev->id,
+ MGMT_OP_READ_ADV_MONITOR_FEATURES,
+ MGMT_STATUS_SUCCESS, rp, rp_size);
+
+ kfree(rp);
+
+ return err;
}
static int add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
@@ -7202,6 +7213,8 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev)
if (ext_adv_capable(hdev)) {
flags |= MGMT_ADV_FLAG_SEC_1M;
+ flags |= MGMT_ADV_FLAG_HW_OFFLOAD;
+ flags |= MGMT_ADV_FLAG_CAN_SET_TX_POWER;
if (hdev->le_features[1] & HCI_LE_PHY_2M)
flags |= MGMT_ADV_FLAG_SEC_2M;
@@ -7250,7 +7263,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
rp->supported_flags = cpu_to_le32(supported_flags);
rp->max_adv_data_len = HCI_MAX_AD_LENGTH;
rp->max_scan_rsp_len = HCI_MAX_AD_LENGTH;
- rp->max_instances = HCI_MAX_ADV_INSTANCES;
+ rp->max_instances = hdev->le_num_of_adv_sets;
rp->num_instances = hdev->adv_instance_cnt;
instance = rp->instance;
@@ -7446,7 +7459,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
MGMT_STATUS_NOT_SUPPORTED);
- if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES)
+ if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
MGMT_STATUS_INVALID_PARAMS);
@@ -7699,7 +7712,7 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
MGMT_STATUS_REJECTED);
- if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES)
+ if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
MGMT_STATUS_INVALID_PARAMS);
@@ -8262,6 +8275,10 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
ev.addr.type = link_to_bdaddr(link_type, addr_type);
ev.reason = reason;
+ /* Report disconnects due to suspend */
+ if (hdev->suspended)
+ ev.reason = MGMT_DEV_DISCONN_LOCAL_HOST_SUSPEND;
+
mgmt_event(MGMT_EV_DEVICE_DISCONNECTED, hdev, &ev, sizeof(ev), sk);
if (sk)
@@ -8868,6 +8885,30 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering)
mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL);
}
+void mgmt_suspending(struct hci_dev *hdev, u8 state)
+{
+ struct mgmt_ev_controller_suspend ev;
+
+ ev.suspend_state = state;
+ mgmt_event(MGMT_EV_CONTROLLER_SUSPEND, hdev, &ev, sizeof(ev), NULL);
+}
+
+void mgmt_resuming(struct hci_dev *hdev, u8 reason, bdaddr_t *bdaddr,
+ u8 addr_type)
+{
+ struct mgmt_ev_controller_resume ev;
+
+ ev.wake_reason = reason;
+ if (bdaddr) {
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = addr_type;
+ } else {
+ memset(&ev.addr, 0, sizeof(ev.addr));
+ }
+
+ mgmt_event(MGMT_EV_CONTROLLER_RESUME, hdev, &ev, sizeof(ev), NULL);
+}
+
static struct hci_mgmt_chan chan = {
.channel = HCI_CHANNEL_CONTROL,
.handler_count = ARRAY_SIZE(mgmt_handlers),
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index dcf7f96ff417..79ffcdef0b7a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1001,6 +1001,12 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
err = -EFAULT;
break;
+ case BT_SNDMTU:
+ case BT_RCVMTU:
+ if (put_user(sco_pi(sk)->conn->mtu, (u32 __user *)optval))
+ err = -EFAULT;
+ break;
+
default:
err = -ENOPROTOOPT;
break;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a66f211726e7..c1c30a9f76f3 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -11,6 +11,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <linux/error-injection.h>
+#include <linux/smp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
@@ -204,6 +205,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
int b = 2, err = -EFAULT;
u32 retval = 0;
+ if (kattr->test.flags || kattr->test.cpu)
+ return -EINVAL;
+
switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
@@ -236,6 +240,84 @@ out:
return err;
}
+struct bpf_raw_tp_test_run_info {
+ struct bpf_prog *prog;
+ void *ctx;
+ u32 retval;
+};
+
+static void
+__bpf_prog_test_run_raw_tp(void *data)
+{
+ struct bpf_raw_tp_test_run_info *info = data;
+
+ rcu_read_lock();
+ info->retval = BPF_PROG_RUN(info->prog, info->ctx);
+ rcu_read_unlock();
+}
+
+int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+ __u32 ctx_size_in = kattr->test.ctx_size_in;
+ struct bpf_raw_tp_test_run_info info;
+ int cpu = kattr->test.cpu, err = 0;
+ int current_cpu;
+
+ /* doesn't support data_in/out, ctx_out, duration, or repeat */
+ if (kattr->test.data_in || kattr->test.data_out ||
+ kattr->test.ctx_out || kattr->test.duration ||
+ kattr->test.repeat)
+ return -EINVAL;
+
+ if (ctx_size_in < prog->aux->max_ctx_offset)
+ return -EINVAL;
+
+ if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
+ return -EINVAL;
+
+ if (ctx_size_in) {
+ info.ctx = kzalloc(ctx_size_in, GFP_USER);
+ if (!info.ctx)
+ return -ENOMEM;
+ if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) {
+ err = -EFAULT;
+ goto out;
+ }
+ } else {
+ info.ctx = NULL;
+ }
+
+ info.prog = prog;
+
+ current_cpu = get_cpu();
+ if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
+ cpu == current_cpu) {
+ __bpf_prog_test_run_raw_tp(&info);
+ } else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
+ /* smp_call_function_single() also checks cpu_online()
+ * after csd_lock(). However, since cpu is from user
+ * space, let's do an extra quick check to filter out
+ * invalid value before smp_call_function_single().
+ */
+ err = -ENXIO;
+ } else {
+ err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
+ &info, 1);
+ }
+ put_cpu();
+
+ if (!err &&
+ copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
+ err = -EFAULT;
+
+out:
+ kfree(info.ctx);
+ return err;
+}
+
static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
{
void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
@@ -410,6 +492,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
void *data;
int ret;
+ if (kattr->test.flags || kattr->test.cpu)
+ return -EINVAL;
+
data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
@@ -607,6 +692,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;
+ if (kattr->test.flags || kattr->test.cpu)
+ return -EINVAL;
+
if (size < ETH_HLEN)
return -EINVAL;
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index 73d0b12789f1..8ad0233ce497 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -2,6 +2,7 @@
menuconfig BPFILTER
bool "BPF based packet filtering framework (BPFILTER)"
depends on NET && BPF && INET
+ select USERMODE_DRIVER
help
This builds experimental bpfilter framework that is aiming to
provide netfilter compatible functionality via BPF
diff --git a/net/bridge/br.c b/net/bridge/br.c
index b6fe30e3768f..401eeb9142eb 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -183,6 +183,11 @@ static int br_switchdev_event(struct notifier_block *unused,
br_fdb_offloaded_set(br, p, fdb_info->addr,
fdb_info->vid, fdb_info->offloaded);
break;
+ case SWITCHDEV_FDB_FLUSH_TO_BRIDGE:
+ fdb_info = ptr;
+ /* Don't delete static entries */
+ br_fdb_delete_by_port(br, p, fdb_info->vid, 0);
+ break;
}
out:
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index b18cdf03edb3..dfec65eca8a6 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -88,9 +88,10 @@ static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
}
}
-static int br_chk_addr_ip(struct net_device *dev, void *data)
+static int br_chk_addr_ip(struct net_device *dev,
+ struct netdev_nested_priv *priv)
{
- __be32 ip = *(__be32 *)data;
+ __be32 ip = *(__be32 *)priv->data;
struct in_device *in_dev;
__be32 addr = 0;
@@ -107,11 +108,15 @@ static int br_chk_addr_ip(struct net_device *dev, void *data)
static bool br_is_local_ip(struct net_device *dev, __be32 ip)
{
- if (br_chk_addr_ip(dev, &ip))
+ struct netdev_nested_priv priv = {
+ .data = (void *)&ip,
+ };
+
+ if (br_chk_addr_ip(dev, &priv))
return true;
/* check if ip is configured on upper dev */
- if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip))
+ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &priv))
return true;
return false;
@@ -361,9 +366,10 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
}
}
-static int br_chk_addr_ip6(struct net_device *dev, void *data)
+static int br_chk_addr_ip6(struct net_device *dev,
+ struct netdev_nested_priv *priv)
{
- struct in6_addr *addr = (struct in6_addr *)data;
+ struct in6_addr *addr = (struct in6_addr *)priv->data;
if (ipv6_chk_addr(dev_net(dev), addr, dev, 0))
return 1;
@@ -374,11 +380,15 @@ static int br_chk_addr_ip6(struct net_device *dev, void *data)
static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr)
{
- if (br_chk_addr_ip6(dev, addr))
+ struct netdev_nested_priv priv = {
+ .data = (void *)addr,
+ };
+
+ if (br_chk_addr_ip6(dev, &priv))
return true;
/* check if ip is configured on upper dev */
- if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr))
+ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, &priv))
return true;
return false;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 9a2fb4aa1a10..6f742fee874a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -206,27 +206,8 @@ static void br_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
struct net_bridge *br = netdev_priv(dev);
- struct pcpu_sw_netstats tmp, sum = { 0 };
- unsigned int cpu;
-
- for_each_possible_cpu(cpu) {
- unsigned int start;
- const struct pcpu_sw_netstats *bstats
- = per_cpu_ptr(br->stats, cpu);
- do {
- start = u64_stats_fetch_begin_irq(&bstats->syncp);
- memcpy(&tmp, bstats, sizeof(tmp));
- } while (u64_stats_fetch_retry_irq(&bstats->syncp, start));
- sum.tx_bytes += tmp.tx_bytes;
- sum.tx_packets += tmp.tx_packets;
- sum.rx_bytes += tmp.rx_bytes;
- sum.rx_packets += tmp.rx_packets;
- }
- stats->tx_bytes = sum.tx_bytes;
- stats->tx_packets = sum.tx_packets;
- stats->rx_bytes = sum.rx_bytes;
- stats->rx_packets = sum.rx_packets;
+ dev_fetch_sw_netstats(stats, br->stats);
}
static int br_change_mtu(struct net_device *dev, int new_mtu)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9db504baa094..32ac8343b0ba 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -413,6 +413,8 @@ void br_fdb_delete_by_port(struct net_bridge *br,
if (!do_all)
if (test_bit(BR_FDB_STATIC, &f->flags) ||
+ (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags) &&
+ !test_bit(BR_FDB_OFFLOADED, &f->flags)) ||
(vid && f->key.vlan_id != vid))
continue;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7629b63f6f30..e28ffadd1371 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -274,14 +274,23 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
+ bool allow_mode_include = true;
struct hlist_node *rp;
rp = rcu_dereference(hlist_first_rcu(&br->router_list));
- p = mdst ? rcu_dereference(mdst->ports) : NULL;
+ if (mdst) {
+ p = rcu_dereference(mdst->ports);
+ if (br_multicast_should_handle_mode(br, mdst->addr.proto) &&
+ br_multicast_is_star_g(&mdst->addr))
+ allow_mode_include = false;
+ } else {
+ p = NULL;
+ }
+
while (p || rp) {
struct net_bridge_port *port, *lport, *rport;
- lport = p ? p->port : NULL;
+ lport = p ? p->key.port : NULL;
rport = hlist_entry_safe(rp, struct net_bridge_port, rlist);
if ((unsigned long)lport > (unsigned long)rport) {
@@ -292,6 +301,10 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
local_orig);
goto delivered;
}
+ if ((!allow_mode_include &&
+ p->filter_mode == MCAST_INCLUDE) ||
+ (p->flags & MDB_PG_FLAGS_BLOCKED))
+ goto delivered;
} else {
port = rport;
}
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 5e71fc8b826f..2db800fc27ca 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -103,7 +103,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
/*
* Legacy ioctl's through SIOCDEVPRIVATE
- * This interface is deprecated because it was too difficult to
+ * This interface is deprecated because it was too difficult
* to do the translation for 32/64bit ioctl compatibility.
*/
static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index da5ed4cf9233..e15bab19a012 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -62,25 +62,96 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
e->flags |= MDB_FLAGS_OFFLOAD;
if (flags & MDB_PG_FLAGS_FAST_LEAVE)
e->flags |= MDB_FLAGS_FAST_LEAVE;
+ if (flags & MDB_PG_FLAGS_STAR_EXCL)
+ e->flags |= MDB_FLAGS_STAR_EXCL;
+ if (flags & MDB_PG_FLAGS_BLOCKED)
+ e->flags |= MDB_FLAGS_BLOCKED;
}
-static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
+static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip,
+ struct nlattr **mdb_attrs)
{
memset(ip, 0, sizeof(struct br_ip));
ip->vid = entry->vid;
ip->proto = entry->addr.proto;
- if (ip->proto == htons(ETH_P_IP))
- ip->u.ip4 = entry->addr.u.ip4;
+ switch (ip->proto) {
+ case htons(ETH_P_IP):
+ ip->dst.ip4 = entry->addr.u.ip4;
+ if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE])
+ ip->src.ip4 = nla_get_in_addr(mdb_attrs[MDBE_ATTR_SOURCE]);
+ break;
#if IS_ENABLED(CONFIG_IPV6)
- else
- ip->u.ip6 = entry->addr.u.ip6;
+ case htons(ETH_P_IPV6):
+ ip->dst.ip6 = entry->addr.u.ip6;
+ if (mdb_attrs && mdb_attrs[MDBE_ATTR_SOURCE])
+ ip->src.ip6 = nla_get_in6_addr(mdb_attrs[MDBE_ATTR_SOURCE]);
+ break;
+#endif
+ }
+
+}
+
+static int __mdb_fill_srcs(struct sk_buff *skb,
+ struct net_bridge_port_group *p)
+{
+ struct net_bridge_group_src *ent;
+ struct nlattr *nest, *nest_ent;
+
+ if (hlist_empty(&p->src_list))
+ return 0;
+
+ nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST);
+ if (!nest)
+ return -EMSGSIZE;
+
+ hlist_for_each_entry_rcu(ent, &p->src_list, node,
+ lockdep_is_held(&p->key.port->br->multicast_lock)) {
+ nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY);
+ if (!nest_ent)
+ goto out_cancel_err;
+ switch (ent->addr.proto) {
+ case htons(ETH_P_IP):
+ if (nla_put_in_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
+ ent->addr.src.ip4)) {
+ nla_nest_cancel(skb, nest_ent);
+ goto out_cancel_err;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ if (nla_put_in6_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
+ &ent->addr.src.ip6)) {
+ nla_nest_cancel(skb, nest_ent);
+ goto out_cancel_err;
+ }
+ break;
#endif
+ default:
+ nla_nest_cancel(skb, nest_ent);
+ continue;
+ }
+ if (nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER,
+ br_timer_value(&ent->timer))) {
+ nla_nest_cancel(skb, nest_ent);
+ goto out_cancel_err;
+ }
+ nla_nest_end(skb, nest_ent);
+ }
+
+ nla_nest_end(skb, nest);
+
+ return 0;
+
+out_cancel_err:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
}
static int __mdb_fill_info(struct sk_buff *skb,
struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *p)
{
+ bool dump_srcs_mode = false;
struct timer_list *mtimer;
struct nlattr *nest_ent;
struct br_mdb_entry e;
@@ -89,7 +160,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
memset(&e, 0, sizeof(e));
if (p) {
- ifindex = p->port->dev->ifindex;
+ ifindex = p->key.port->dev->ifindex;
mtimer = &p->timer;
flags = p->flags;
} else {
@@ -101,10 +172,10 @@ static int __mdb_fill_info(struct sk_buff *skb,
e.ifindex = ifindex;
e.vid = mp->addr.vid;
if (mp->addr.proto == htons(ETH_P_IP))
- e.addr.u.ip4 = mp->addr.u.ip4;
+ e.addr.u.ip4 = mp->addr.dst.ip4;
#if IS_ENABLED(CONFIG_IPV6)
if (mp->addr.proto == htons(ETH_P_IPV6))
- e.addr.u.ip6 = mp->addr.u.ip6;
+ e.addr.u.ip6 = mp->addr.dst.ip6;
#endif
e.addr.proto = mp->addr.proto;
nest_ent = nla_nest_start_noflag(skb,
@@ -115,19 +186,53 @@ static int __mdb_fill_info(struct sk_buff *skb,
if (nla_put_nohdr(skb, sizeof(e), &e) ||
nla_put_u32(skb,
MDBA_MDB_EATTR_TIMER,
- br_timer_value(mtimer))) {
- nla_nest_cancel(skb, nest_ent);
- return -EMSGSIZE;
+ br_timer_value(mtimer)))
+ goto nest_err;
+
+ switch (mp->addr.proto) {
+ case htons(ETH_P_IP):
+ dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3);
+ if (mp->addr.src.ip4) {
+ if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE,
+ mp->addr.src.ip4))
+ goto nest_err;
+ break;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ dump_srcs_mode = !!(mp->br->multicast_mld_version == 2);
+ if (!ipv6_addr_any(&mp->addr.src.ip6)) {
+ if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE,
+ &mp->addr.src.ip6))
+ goto nest_err;
+ break;
+ }
+ break;
+#endif
+ }
+ if (p) {
+ if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, p->rt_protocol))
+ goto nest_err;
+ if (dump_srcs_mode &&
+ (__mdb_fill_srcs(skb, p) ||
+ nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE,
+ p->filter_mode)))
+ goto nest_err;
}
nla_nest_end(skb, nest_ent);
return 0;
+
+nest_err:
+ nla_nest_cancel(skb, nest_ent);
+ return -EMSGSIZE;
}
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
- int idx = 0, s_idx = cb->args[1], err = 0;
+ int idx = 0, s_idx = cb->args[1], err = 0, pidx = 0, s_pidx = cb->args[2];
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_mdb_entry *mp;
struct nlattr *nest, *nest2;
@@ -152,7 +257,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
break;
}
- if (mp->host_joined) {
+ if (!s_pidx && mp->host_joined) {
err = __mdb_fill_info(skb, mp, NULL);
if (err) {
nla_nest_cancel(skb, nest2);
@@ -162,15 +267,21 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
pp = &p->next) {
- if (!p->port)
+ if (!p->key.port)
continue;
+ if (pidx < s_pidx)
+ goto skip_pg;
err = __mdb_fill_info(skb, mp, p);
if (err) {
- nla_nest_cancel(skb, nest2);
+ nla_nest_end(skb, nest2);
goto out;
}
+skip_pg:
+ pidx++;
}
+ pidx = 0;
+ s_pidx = 0;
nla_nest_end(skb, nest2);
skip:
idx++;
@@ -178,6 +289,7 @@ skip:
out:
cb->args[1] = idx;
+ cb->args[2] = pidx;
nla_nest_end(skb, nest);
return err;
}
@@ -263,14 +375,15 @@ out:
static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
struct net_device *dev,
- struct br_mdb_entry *entry, u32 pid,
- u32 seq, int type, unsigned int flags)
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ int type)
{
struct nlmsghdr *nlh;
struct br_port_msg *bpm;
struct nlattr *nest, *nest2;
- nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
+ nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0);
if (!nlh)
return -EMSGSIZE;
@@ -285,7 +398,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
if (nest2 == NULL)
goto end;
- if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry))
+ if (__mdb_fill_info(skb, mp, pg))
goto end;
nla_nest_end(skb, nest2);
@@ -300,10 +413,58 @@ cancel:
return -EMSGSIZE;
}
-static inline size_t rtnl_mdb_nlmsg_size(void)
+static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
{
- return NLMSG_ALIGN(sizeof(struct br_port_msg))
- + nla_total_size(sizeof(struct br_mdb_entry));
+ size_t nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
+ nla_total_size(sizeof(struct br_mdb_entry)) +
+ nla_total_size(sizeof(u32));
+ struct net_bridge_group_src *ent;
+ size_t addr_size = 0;
+
+ if (!pg)
+ goto out;
+
+ /* MDBA_MDB_EATTR_RTPROT */
+ nlmsg_size += nla_total_size(sizeof(u8));
+
+ switch (pg->key.addr.proto) {
+ case htons(ETH_P_IP):
+ /* MDBA_MDB_EATTR_SOURCE */
+ if (pg->key.addr.src.ip4)
+ nlmsg_size += nla_total_size(sizeof(__be32));
+ if (pg->key.port->br->multicast_igmp_version == 2)
+ goto out;
+ addr_size = sizeof(__be32);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ /* MDBA_MDB_EATTR_SOURCE */
+ if (!ipv6_addr_any(&pg->key.addr.src.ip6))
+ nlmsg_size += nla_total_size(sizeof(struct in6_addr));
+ if (pg->key.port->br->multicast_mld_version == 1)
+ goto out;
+ addr_size = sizeof(struct in6_addr);
+ break;
+#endif
+ }
+
+ /* MDBA_MDB_EATTR_GROUP_MODE */
+ nlmsg_size += nla_total_size(sizeof(u8));
+
+ /* MDBA_MDB_EATTR_SRC_LIST nested attr */
+ if (!hlist_empty(&pg->src_list))
+ nlmsg_size += nla_total_size(0);
+
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ /* MDBA_MDB_SRCLIST_ENTRY nested attr +
+ * MDBA_MDB_SRCATTR_ADDRESS + MDBA_MDB_SRCATTR_TIMER
+ */
+ nlmsg_size += nla_total_size(0) +
+ nla_total_size(addr_size) +
+ nla_total_size(sizeof(u32));
+ }
+out:
+ return nlmsg_size;
}
struct br_mdb_complete_info {
@@ -329,7 +490,7 @@ static void br_mdb_complete(struct net_device *dev, int err, void *priv)
goto out;
for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
- if (p->port != port)
+ if (p->key.port != port)
continue;
p->flags |= MDB_PG_FLAGS_OFFLOAD;
}
@@ -341,21 +502,22 @@ err:
static void br_mdb_switchdev_host_port(struct net_device *dev,
struct net_device *lower_dev,
- struct br_mdb_entry *entry, int type)
+ struct net_bridge_mdb_entry *mp,
+ int type)
{
struct switchdev_obj_port_mdb mdb = {
.obj = {
.id = SWITCHDEV_OBJ_ID_HOST_MDB,
.flags = SWITCHDEV_F_DEFER,
},
- .vid = entry->vid,
+ .vid = mp->addr.vid,
};
- if (entry->addr.proto == htons(ETH_P_IP))
- ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
+ if (mp->addr.proto == htons(ETH_P_IP))
+ ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr);
#if IS_ENABLED(CONFIG_IPV6)
else
- ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
+ ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr);
#endif
mdb.obj.orig_dev = dev;
@@ -370,17 +532,19 @@ static void br_mdb_switchdev_host_port(struct net_device *dev,
}
static void br_mdb_switchdev_host(struct net_device *dev,
- struct br_mdb_entry *entry, int type)
+ struct net_bridge_mdb_entry *mp, int type)
{
struct net_device *lower_dev;
struct list_head *iter;
netdev_for_each_lower_dev(dev, lower_dev, iter)
- br_mdb_switchdev_host_port(dev, lower_dev, entry, type);
+ br_mdb_switchdev_host_port(dev, lower_dev, mp, type);
}
-static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
- struct br_mdb_entry *entry, int type)
+void br_mdb_notify(struct net_device *dev,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ int type)
{
struct br_mdb_complete_info *complete_info;
struct switchdev_obj_port_mdb mdb = {
@@ -388,44 +552,45 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
.id = SWITCHDEV_OBJ_ID_PORT_MDB,
.flags = SWITCHDEV_F_DEFER,
},
- .vid = entry->vid,
+ .vid = mp->addr.vid,
};
- struct net_device *port_dev;
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
- port_dev = __dev_get_by_index(net, entry->ifindex);
- if (entry->addr.proto == htons(ETH_P_IP))
- ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
+ if (pg) {
+ if (mp->addr.proto == htons(ETH_P_IP))
+ ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr);
#if IS_ENABLED(CONFIG_IPV6)
- else
- ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
+ else
+ ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr);
#endif
-
- mdb.obj.orig_dev = port_dev;
- if (p && port_dev && type == RTM_NEWMDB) {
- complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
- if (complete_info) {
- complete_info->port = p;
- __mdb_entry_to_br_ip(entry, &complete_info->ip);
+ mdb.obj.orig_dev = pg->key.port->dev;
+ switch (type) {
+ case RTM_NEWMDB:
+ complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
+ if (!complete_info)
+ break;
+ complete_info->port = pg->key.port;
+ complete_info->ip = mp->addr;
mdb.obj.complete_priv = complete_info;
mdb.obj.complete = br_mdb_complete;
- if (switchdev_port_obj_add(port_dev, &mdb.obj, NULL))
+ if (switchdev_port_obj_add(pg->key.port->dev, &mdb.obj, NULL))
kfree(complete_info);
+ break;
+ case RTM_DELMDB:
+ switchdev_port_obj_del(pg->key.port->dev, &mdb.obj);
+ break;
}
- } else if (p && port_dev && type == RTM_DELMDB) {
- switchdev_port_obj_del(port_dev, &mdb.obj);
+ } else {
+ br_mdb_switchdev_host(dev, mp, type);
}
- if (!p)
- br_mdb_switchdev_host(dev, entry, type);
-
- skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
+ skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC);
if (!skb)
goto errout;
- err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF);
+ err = nlmsg_populate_mdb_fill(skb, dev, mp, pg, type);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -437,26 +602,6 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
-void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type, u8 flags)
-{
- struct br_mdb_entry entry;
-
- memset(&entry, 0, sizeof(entry));
- if (port)
- entry.ifindex = port->dev->ifindex;
- else
- entry.ifindex = dev->ifindex;
- entry.addr.proto = group->proto;
- entry.addr.u.ip4 = group->u.ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- entry.addr.u.ip6 = group->u.ip6;
-#endif
- entry.vid = group->vid;
- __mdb_entry_fill_flags(&entry, flags);
- __br_mdb_notify(dev, port, &entry, type);
-}
-
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
struct net_device *dev,
int ifindex, u32 pid,
@@ -524,33 +669,94 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
-static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
+static bool is_valid_mdb_entry(struct br_mdb_entry *entry,
+ struct netlink_ext_ack *extack)
{
- if (entry->ifindex == 0)
+ if (entry->ifindex == 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Zero entry ifindex is not allowed");
return false;
+ }
if (entry->addr.proto == htons(ETH_P_IP)) {
- if (!ipv4_is_multicast(entry->addr.u.ip4))
+ if (!ipv4_is_multicast(entry->addr.u.ip4)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is not multicast");
return false;
- if (ipv4_is_local_multicast(entry->addr.u.ip4))
+ }
+ if (ipv4_is_local_multicast(entry->addr.u.ip4)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is local multicast");
return false;
+ }
#if IS_ENABLED(CONFIG_IPV6)
} else if (entry->addr.proto == htons(ETH_P_IPV6)) {
- if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6))
+ if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 entry group address is link-local all nodes");
return false;
+ }
#endif
- } else
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol");
+ return false;
+ }
+
+ if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown entry state");
return false;
- if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
+ }
+ if (entry->vid >= VLAN_VID_MASK) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid entry VLAN id");
return false;
- if (entry->vid >= VLAN_VID_MASK)
+ }
+
+ return true;
+}
+
+static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto,
+ struct netlink_ext_ack *extack)
+{
+ switch (proto) {
+ case htons(ETH_P_IP):
+ if (nla_len(attr) != sizeof(struct in_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length");
+ return false;
+ }
+ if (ipv4_is_multicast(nla_get_in_addr(attr))) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed");
+ return false;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6): {
+ struct in6_addr src;
+
+ if (nla_len(attr) != sizeof(struct in6_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length");
+ return false;
+ }
+ src = nla_get_in6_addr(attr);
+ if (ipv6_addr_is_multicast(&src)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed");
+ return false;
+ }
+ break;
+ }
+#endif
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address");
return false;
+ }
return true;
}
+static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
+ [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+};
+
static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct net_device **pdev, struct br_mdb_entry **pentry)
+ struct net_device **pdev, struct br_mdb_entry **pentry,
+ struct nlattr **mdb_attrs, struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
struct br_mdb_entry *entry;
@@ -566,51 +772,86 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
bpm = nlmsg_data(nlh);
if (bpm->ifindex == 0) {
- pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n");
+ NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, bpm->ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n");
+ NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist");
return -ENODEV;
}
if (!(dev->priv_flags & IFF_EBRIDGE)) {
- pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n");
+ NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge");
return -EOPNOTSUPP;
}
*pdev = dev;
- if (!tb[MDBA_SET_ENTRY] ||
- nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
- pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n");
+ if (!tb[MDBA_SET_ENTRY]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute");
+ return -EINVAL;
+ }
+ if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length");
return -EINVAL;
}
entry = nla_data(tb[MDBA_SET_ENTRY]);
- if (!is_valid_mdb_entry(entry)) {
- pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n");
+ if (!is_valid_mdb_entry(entry, extack))
return -EINVAL;
+ *pentry = entry;
+
+ if (tb[MDBA_SET_ENTRY_ATTRS]) {
+ err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX,
+ tb[MDBA_SET_ENTRY_ATTRS],
+ br_mdbe_attrs_pol, extack);
+ if (err)
+ return err;
+ if (mdb_attrs[MDBE_ATTR_SOURCE] &&
+ !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE],
+ entry->addr.proto, extack))
+ return -EINVAL;
+ } else {
+ memset(mdb_attrs, 0,
+ sizeof(struct nlattr *) * (MDBE_ATTR_MAX + 1));
}
- *pentry = entry;
return 0;
}
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group, unsigned char state)
+ struct br_mdb_entry *entry,
+ struct nlattr **mdb_attrs,
+ struct netlink_ext_ack *extack)
{
- struct net_bridge_mdb_entry *mp;
+ struct net_bridge_mdb_entry *mp, *star_mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct br_ip group, star_group;
unsigned long now = jiffies;
+ u8 filter_mode;
int err;
- mp = br_mdb_ip_get(br, group);
+ __mdb_entry_to_br_ip(entry, &group, mdb_attrs);
+
+ /* host join errors which can happen before creating the group */
+ if (!port) {
+ /* don't allow any flags for host-joined groups */
+ if (entry->state) {
+ NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
+ return -EINVAL;
+ }
+ if (!br_multicast_is_star_g(&group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined");
+ return -EINVAL;
+ }
+ }
+
+ mp = br_mdb_ip_get(br, &group);
if (!mp) {
- mp = br_multicast_new_group(br, group);
+ mp = br_multicast_new_group(br, &group);
err = PTR_ERR_OR_ZERO(mp);
if (err)
return err;
@@ -618,13 +859,13 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
/* host join */
if (!port) {
- /* don't allow any flags for host-joined groups */
- if (state)
- return -EINVAL;
- if (mp->host_joined)
+ if (mp->host_joined) {
+ NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host");
return -EEXIST;
+ }
br_multicast_host_join(mp, false);
+ br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB);
return 0;
}
@@ -632,54 +873,69 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
- if (p->port == port)
+ if (p->key.port == port) {
+ NL_SET_ERR_MSG_MOD(extack, "Group is already joined by port");
return -EEXIST;
- if ((unsigned long)p->port < (unsigned long)port)
+ }
+ if ((unsigned long)p->key.port < (unsigned long)port)
break;
}
- p = br_multicast_new_port_group(port, group, *pp, state, NULL);
- if (unlikely(!p))
+ filter_mode = br_multicast_is_star_g(&group) ? MCAST_EXCLUDE :
+ MCAST_INCLUDE;
+
+ p = br_multicast_new_port_group(port, &group, *pp, entry->state, NULL,
+ filter_mode, RTPROT_STATIC);
+ if (unlikely(!p)) {
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
return -ENOMEM;
+ }
rcu_assign_pointer(*pp, p);
- if (state == MDB_TEMPORARY)
+ if (entry->state == MDB_TEMPORARY)
mod_timer(&p->timer, now + br->multicast_membership_interval);
+ br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
+ /* if we are adding a new EXCLUDE port group (*,G) it needs to be also
+ * added to all S,G entries for proper replication, if we are adding
+ * a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be
+ * added to it for proper replication
+ */
+ if (br_multicast_should_handle_mode(br, group.proto)) {
+ switch (filter_mode) {
+ case MCAST_EXCLUDE:
+ br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE);
+ break;
+ case MCAST_INCLUDE:
+ star_group = p->key.addr;
+ memset(&star_group.src, 0, sizeof(star_group.src));
+ star_mp = br_mdb_ip_get(br, &star_group);
+ if (star_mp)
+ br_multicast_sg_add_exclude_ports(star_mp, p);
+ break;
+ }
+ }
return 0;
}
static int __br_mdb_add(struct net *net, struct net_bridge *br,
- struct br_mdb_entry *entry)
+ struct net_bridge_port *p,
+ struct br_mdb_entry *entry,
+ struct nlattr **mdb_attrs,
+ struct netlink_ext_ack *extack)
{
- struct br_ip ip;
- struct net_device *dev;
- struct net_bridge_port *p = NULL;
int ret;
- if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
- return -EINVAL;
-
- if (entry->ifindex != br->dev->ifindex) {
- dev = __dev_get_by_index(net, entry->ifindex);
- if (!dev)
- return -ENODEV;
-
- p = br_port_get_rtnl(dev);
- if (!p || p->br != br || p->state == BR_STATE_DISABLED)
- return -EINVAL;
- }
-
- __mdb_entry_to_br_ip(entry, &ip);
-
spin_lock_bh(&br->multicast_lock);
- ret = br_mdb_add_group(br, p, &ip, entry->state);
+ ret = br_mdb_add_group(br, p, entry, mdb_attrs, extack);
spin_unlock_bh(&br->multicast_lock);
+
return ret;
}
static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
@@ -689,20 +945,43 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_bridge *br;
int err;
- err = br_mdb_parse(skb, nlh, &dev, &entry);
+ err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
if (err < 0)
return err;
br = netdev_priv(dev);
+ if (!netif_running(br->dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Bridge device is not running");
+ return -EINVAL;
+ }
+
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
+ NL_SET_ERR_MSG_MOD(extack, "Bridge's multicast processing is disabled");
+ return -EINVAL;
+ }
+
if (entry->ifindex != br->dev->ifindex) {
pdev = __dev_get_by_index(net, entry->ifindex);
- if (!pdev)
+ if (!pdev) {
+ NL_SET_ERR_MSG_MOD(extack, "Port net device doesn't exist");
return -ENODEV;
+ }
p = br_port_get_rtnl(pdev);
- if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port");
+ return -EINVAL;
+ }
+
+ if (p->br != br) {
+ NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
return -EINVAL;
+ }
+ if (p->state == BR_STATE_DISABLED) {
+ NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state");
+ return -EINVAL;
+ }
vg = nbp_vlan_group(p);
} else {
vg = br_vlan_group(br);
@@ -714,21 +993,19 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
- err = __br_mdb_add(net, br, entry);
+ err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
if (err)
break;
- __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
}
} else {
- err = __br_mdb_add(net, br, entry);
- if (!err)
- __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
+ err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack);
}
return err;
}
-static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
+static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry,
+ struct nlattr **mdb_attrs)
{
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
@@ -739,7 +1016,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
return -EINVAL;
- __mdb_entry_to_br_ip(entry, &ip);
+ __mdb_entry_to_br_ip(entry, &ip, mdb_attrs);
spin_lock_bh(&br->multicast_lock);
mp = br_mdb_ip_get(br, &ip);
@@ -750,6 +1027,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) {
br_multicast_host_leave(mp, false);
err = 0;
+ br_mdb_notify(br->dev, mp, NULL, RTM_DELMDB);
if (!mp->ports && netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
goto unlock;
@@ -758,22 +1036,14 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
for (pp = &mp->ports;
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
- if (!p->port || p->port->dev->ifindex != entry->ifindex)
+ if (!p->key.port || p->key.port->dev->ifindex != entry->ifindex)
continue;
- if (p->port->state == BR_STATE_DISABLED)
+ if (p->key.port->state == BR_STATE_DISABLED)
goto unlock;
- __mdb_entry_fill_flags(entry, p->flags);
- rcu_assign_pointer(*pp, p->next);
- hlist_del_init(&p->mglist);
- del_timer(&p->timer);
- kfree_rcu(p, rcu);
+ br_multicast_del_pg(mp, p, pp);
err = 0;
-
- if (!mp->ports && !mp->host_joined &&
- netif_running(br->dev))
- mod_timer(&mp->timer, jiffies);
break;
}
@@ -785,6 +1055,7 @@ unlock:
static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1];
struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
@@ -794,7 +1065,7 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_bridge *br;
int err;
- err = br_mdb_parse(skb, nlh, &dev, &entry);
+ err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack);
if (err < 0)
return err;
@@ -819,14 +1090,10 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) {
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
- err = __br_mdb_del(br, entry);
- if (!err)
- __br_mdb_notify(dev, p, entry, RTM_DELMDB);
+ err = __br_mdb_del(br, entry, mdb_attrs);
}
} else {
- err = __br_mdb_del(br, entry);
- if (!err)
- __br_mdb_notify(dev, p, entry, RTM_DELMDB);
+ err = __br_mdb_del(br, entry, mdb_attrs);
}
return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 4c4a93abde68..eae898c3cff7 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -41,6 +41,13 @@ static const struct rhashtable_params br_mdb_rht_params = {
.automatic_shrinking = true,
};
+static const struct rhashtable_params br_sg_port_rht_params = {
+ .head_offset = offsetof(struct net_bridge_port_group, rhnode),
+ .key_offset = offsetof(struct net_bridge_port_group, key),
+ .key_len = sizeof(struct net_bridge_port_group_sg_key),
+ .automatic_shrinking = true,
+};
+
static void br_multicast_start_querier(struct net_bridge *br,
struct bridge_mcast_own_query *query);
static void br_multicast_add_router(struct net_bridge *br,
@@ -50,6 +57,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
__be32 group,
__u16 vid,
const unsigned char *src);
+static void br_multicast_port_group_rexmit(struct timer_list *t);
static void __del_port_router(struct net_bridge_port *p);
#if IS_ENABLED(CONFIG_IPV6)
@@ -58,6 +66,26 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
const struct in6_addr *group,
__u16 vid, const unsigned char *src);
#endif
+static struct net_bridge_port_group *
+__br_multicast_add_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *group,
+ const unsigned char *src,
+ u8 filter_mode,
+ bool igmpv2_mldv1,
+ bool blocked);
+static void br_multicast_find_del_pg(struct net_bridge *br,
+ struct net_bridge_port_group *pg);
+
+static struct net_bridge_port_group *
+br_sg_port_find(struct net_bridge *br,
+ struct net_bridge_port_group_sg_key *sg_p)
+{
+ lockdep_assert_held_once(&br->multicast_lock);
+
+ return rhashtable_lookup_fast(&br->sg_port_tbl, sg_p,
+ br_sg_port_rht_params);
+}
static struct net_bridge_mdb_entry *br_mdb_ip_get_rcu(struct net_bridge *br,
struct br_ip *dst)
@@ -85,7 +113,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(struct net_bridge *br,
struct br_ip br_dst;
memset(&br_dst, 0, sizeof(br_dst));
- br_dst.u.ip4 = dst;
+ br_dst.dst.ip4 = dst;
br_dst.proto = htons(ETH_P_IP);
br_dst.vid = vid;
@@ -100,7 +128,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
struct br_ip br_dst;
memset(&br_dst, 0, sizeof(br_dst));
- br_dst.u.ip6 = *dst;
+ br_dst.dst.ip6 = *dst;
br_dst.proto = htons(ETH_P_IPV6);
br_dst.vid = vid;
@@ -125,11 +153,29 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
switch (skb->protocol) {
case htons(ETH_P_IP):
- ip.u.ip4 = ip_hdr(skb)->daddr;
+ ip.dst.ip4 = ip_hdr(skb)->daddr;
+ if (br->multicast_igmp_version == 3) {
+ struct net_bridge_mdb_entry *mdb;
+
+ ip.src.ip4 = ip_hdr(skb)->saddr;
+ mdb = br_mdb_ip_get_rcu(br, &ip);
+ if (mdb)
+ return mdb;
+ ip.src.ip4 = 0;
+ }
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- ip.u.ip6 = ipv6_hdr(skb)->daddr;
+ ip.dst.ip6 = ipv6_hdr(skb)->daddr;
+ if (br->multicast_mld_version == 2) {
+ struct net_bridge_mdb_entry *mdb;
+
+ ip.src.ip6 = ipv6_hdr(skb)->saddr;
+ mdb = br_mdb_ip_get_rcu(br, &ip);
+ if (mdb)
+ return mdb;
+ memset(&ip.src.ip6, 0, sizeof(ip.src.ip6));
+ }
break;
#endif
default:
@@ -139,38 +185,438 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
return br_mdb_ip_get_rcu(br, &ip);
}
+static bool br_port_group_equal(struct net_bridge_port_group *p,
+ struct net_bridge_port *port,
+ const unsigned char *src)
+{
+ if (p->key.port != port)
+ return false;
+
+ if (!(port->flags & BR_MULTICAST_TO_UNICAST))
+ return true;
+
+ return ether_addr_equal(src, p->eth_addr);
+}
+
+static void __fwd_add_star_excl(struct net_bridge_port_group *pg,
+ struct br_ip *sg_ip)
+{
+ struct net_bridge_port_group_sg_key sg_key;
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_port_group *src_pg;
+
+ memset(&sg_key, 0, sizeof(sg_key));
+ sg_key.port = pg->key.port;
+ sg_key.addr = *sg_ip;
+ if (br_sg_port_find(br, &sg_key))
+ return;
+
+ src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr,
+ MCAST_INCLUDE, false, false);
+ if (IS_ERR_OR_NULL(src_pg) ||
+ src_pg->rt_protocol != RTPROT_KERNEL)
+ return;
+
+ src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL;
+}
+
+static void __fwd_del_star_excl(struct net_bridge_port_group *pg,
+ struct br_ip *sg_ip)
+{
+ struct net_bridge_port_group_sg_key sg_key;
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_port_group *src_pg;
+
+ memset(&sg_key, 0, sizeof(sg_key));
+ sg_key.port = pg->key.port;
+ sg_key.addr = *sg_ip;
+ src_pg = br_sg_port_find(br, &sg_key);
+ if (!src_pg || !(src_pg->flags & MDB_PG_FLAGS_STAR_EXCL) ||
+ src_pg->rt_protocol != RTPROT_KERNEL)
+ return;
+
+ br_multicast_find_del_pg(br, src_pg);
+}
+
+/* When a port group transitions to (or is added as) EXCLUDE we need to add it
+ * to all other ports' S,G entries which are not blocked by the current group
+ * for proper replication, the assumption is that any S,G blocked entries
+ * are already added so the S,G,port lookup should skip them.
+ * When a port group transitions from EXCLUDE -> INCLUDE mode or is being
+ * deleted we need to remove it from all ports' S,G entries where it was
+ * automatically installed before (i.e. where it's MDB_PG_FLAGS_STAR_EXCL).
+ */
+void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
+ u8 filter_mode)
+{
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_port_group *pg_lst;
+ struct net_bridge_mdb_entry *mp;
+ struct br_ip sg_ip;
+
+ if (WARN_ON(!br_multicast_is_star_g(&pg->key.addr)))
+ return;
+
+ mp = br_mdb_ip_get(br, &pg->key.addr);
+ if (!mp)
+ return;
+
+ memset(&sg_ip, 0, sizeof(sg_ip));
+ sg_ip = pg->key.addr;
+ for (pg_lst = mlock_dereference(mp->ports, br);
+ pg_lst;
+ pg_lst = mlock_dereference(pg_lst->next, br)) {
+ struct net_bridge_group_src *src_ent;
+
+ if (pg_lst == pg)
+ continue;
+ hlist_for_each_entry(src_ent, &pg_lst->src_list, node) {
+ if (!(src_ent->flags & BR_SGRP_F_INSTALLED))
+ continue;
+ sg_ip.src = src_ent->addr.src;
+ switch (filter_mode) {
+ case MCAST_INCLUDE:
+ __fwd_del_star_excl(pg, &sg_ip);
+ break;
+ case MCAST_EXCLUDE:
+ __fwd_add_star_excl(pg, &sg_ip);
+ break;
+ }
+ }
+ }
+}
+
+/* called when adding a new S,G with host_joined == false by default */
+static void br_multicast_sg_host_state(struct net_bridge_mdb_entry *star_mp,
+ struct net_bridge_port_group *sg)
+{
+ struct net_bridge_mdb_entry *sg_mp;
+
+ if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr)))
+ return;
+ if (!star_mp->host_joined)
+ return;
+
+ sg_mp = br_mdb_ip_get(star_mp->br, &sg->key.addr);
+ if (!sg_mp)
+ return;
+ sg_mp->host_joined = true;
+}
+
+/* set the host_joined state of all of *,G's S,G entries */
+static void br_multicast_star_g_host_state(struct net_bridge_mdb_entry *star_mp)
+{
+ struct net_bridge *br = star_mp->br;
+ struct net_bridge_mdb_entry *sg_mp;
+ struct net_bridge_port_group *pg;
+ struct br_ip sg_ip;
+
+ if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr)))
+ return;
+
+ memset(&sg_ip, 0, sizeof(sg_ip));
+ sg_ip = star_mp->addr;
+ for (pg = mlock_dereference(star_mp->ports, br);
+ pg;
+ pg = mlock_dereference(pg->next, br)) {
+ struct net_bridge_group_src *src_ent;
+
+ hlist_for_each_entry(src_ent, &pg->src_list, node) {
+ if (!(src_ent->flags & BR_SGRP_F_INSTALLED))
+ continue;
+ sg_ip.src = src_ent->addr.src;
+ sg_mp = br_mdb_ip_get(br, &sg_ip);
+ if (!sg_mp)
+ continue;
+ sg_mp->host_joined = star_mp->host_joined;
+ }
+ }
+}
+
+static void br_multicast_sg_del_exclude_ports(struct net_bridge_mdb_entry *sgmp)
+{
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+
+ /* *,G exclude ports are only added to S,G entries */
+ if (WARN_ON(br_multicast_is_star_g(&sgmp->addr)))
+ return;
+
+ /* we need the STAR_EXCLUDE ports if there are non-STAR_EXCLUDE ports
+ * we should ignore perm entries since they're managed by user-space
+ */
+ for (pp = &sgmp->ports;
+ (p = mlock_dereference(*pp, sgmp->br)) != NULL;
+ pp = &p->next)
+ if (!(p->flags & (MDB_PG_FLAGS_STAR_EXCL |
+ MDB_PG_FLAGS_PERMANENT)))
+ return;
+
+ /* currently the host can only have joined the *,G which means
+ * we treat it as EXCLUDE {}, so for an S,G it's considered a
+ * STAR_EXCLUDE entry and we can safely leave it
+ */
+ sgmp->host_joined = false;
+
+ for (pp = &sgmp->ports;
+ (p = mlock_dereference(*pp, sgmp->br)) != NULL;) {
+ if (!(p->flags & MDB_PG_FLAGS_PERMANENT))
+ br_multicast_del_pg(sgmp, p, pp);
+ else
+ pp = &p->next;
+ }
+}
+
+void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
+ struct net_bridge_port_group *sg)
+{
+ struct net_bridge_port_group_sg_key sg_key;
+ struct net_bridge *br = star_mp->br;
+ struct net_bridge_port_group *pg;
+
+ if (WARN_ON(br_multicast_is_star_g(&sg->key.addr)))
+ return;
+ if (WARN_ON(!br_multicast_is_star_g(&star_mp->addr)))
+ return;
+
+ br_multicast_sg_host_state(star_mp, sg);
+ memset(&sg_key, 0, sizeof(sg_key));
+ sg_key.addr = sg->key.addr;
+ /* we need to add all exclude ports to the S,G */
+ for (pg = mlock_dereference(star_mp->ports, br);
+ pg;
+ pg = mlock_dereference(pg->next, br)) {
+ struct net_bridge_port_group *src_pg;
+
+ if (pg == sg || pg->filter_mode == MCAST_INCLUDE)
+ continue;
+
+ sg_key.port = pg->key.port;
+ if (br_sg_port_find(br, &sg_key))
+ continue;
+
+ src_pg = __br_multicast_add_group(br, pg->key.port,
+ &sg->key.addr,
+ sg->eth_addr,
+ MCAST_INCLUDE, false, false);
+ if (IS_ERR_OR_NULL(src_pg) ||
+ src_pg->rt_protocol != RTPROT_KERNEL)
+ continue;
+ src_pg->flags |= MDB_PG_FLAGS_STAR_EXCL;
+ }
+}
+
+static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
+{
+ struct net_bridge_mdb_entry *star_mp;
+ struct net_bridge_port_group *sg;
+ struct br_ip sg_ip;
+
+ if (src->flags & BR_SGRP_F_INSTALLED)
+ return;
+
+ memset(&sg_ip, 0, sizeof(sg_ip));
+ sg_ip = src->pg->key.addr;
+ sg_ip.src = src->addr.src;
+ sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip,
+ src->pg->eth_addr, MCAST_INCLUDE, false,
+ !timer_pending(&src->timer));
+ if (IS_ERR_OR_NULL(sg))
+ return;
+ src->flags |= BR_SGRP_F_INSTALLED;
+ sg->flags &= ~MDB_PG_FLAGS_STAR_EXCL;
+
+ /* if it was added by user-space as perm we can skip next steps */
+ if (sg->rt_protocol != RTPROT_KERNEL &&
+ (sg->flags & MDB_PG_FLAGS_PERMANENT))
+ return;
+
+ /* the kernel is now responsible for removing this S,G */
+ del_timer(&sg->timer);
+ star_mp = br_mdb_ip_get(src->br, &src->pg->key.addr);
+ if (!star_mp)
+ return;
+
+ br_multicast_sg_add_exclude_ports(star_mp, sg);
+}
+
+static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
+{
+ struct net_bridge_port_group *p, *pg = src->pg;
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_mdb_entry *mp;
+ struct br_ip sg_ip;
+
+ memset(&sg_ip, 0, sizeof(sg_ip));
+ sg_ip = pg->key.addr;
+ sg_ip.src = src->addr.src;
+
+ mp = br_mdb_ip_get(src->br, &sg_ip);
+ if (!mp)
+ return;
+
+ for (pp = &mp->ports;
+ (p = mlock_dereference(*pp, src->br)) != NULL;
+ pp = &p->next) {
+ if (!br_port_group_equal(p, pg->key.port, pg->eth_addr))
+ continue;
+
+ if (p->rt_protocol != RTPROT_KERNEL &&
+ (p->flags & MDB_PG_FLAGS_PERMANENT))
+ break;
+
+ br_multicast_del_pg(mp, p, pp);
+ break;
+ }
+ src->flags &= ~BR_SGRP_F_INSTALLED;
+}
+
+/* install S,G and based on src's timer enable or disable forwarding */
+static void br_multicast_fwd_src_handle(struct net_bridge_group_src *src)
+{
+ struct net_bridge_port_group_sg_key sg_key;
+ struct net_bridge_port_group *sg;
+ u8 old_flags;
+
+ br_multicast_fwd_src_add(src);
+
+ memset(&sg_key, 0, sizeof(sg_key));
+ sg_key.addr = src->pg->key.addr;
+ sg_key.addr.src = src->addr.src;
+ sg_key.port = src->pg->key.port;
+
+ sg = br_sg_port_find(src->br, &sg_key);
+ if (!sg || (sg->flags & MDB_PG_FLAGS_PERMANENT))
+ return;
+
+ old_flags = sg->flags;
+ if (timer_pending(&src->timer))
+ sg->flags &= ~MDB_PG_FLAGS_BLOCKED;
+ else
+ sg->flags |= MDB_PG_FLAGS_BLOCKED;
+
+ if (old_flags != sg->flags) {
+ struct net_bridge_mdb_entry *sg_mp;
+
+ sg_mp = br_mdb_ip_get(src->br, &sg_key.addr);
+ if (!sg_mp)
+ return;
+ br_mdb_notify(src->br->dev, sg_mp, sg, RTM_NEWMDB);
+ }
+}
+
+static void br_multicast_destroy_mdb_entry(struct net_bridge_mcast_gc *gc)
+{
+ struct net_bridge_mdb_entry *mp;
+
+ mp = container_of(gc, struct net_bridge_mdb_entry, mcast_gc);
+ WARN_ON(!hlist_unhashed(&mp->mdb_node));
+ WARN_ON(mp->ports);
+
+ del_timer_sync(&mp->timer);
+ kfree_rcu(mp, rcu);
+}
+
+static void br_multicast_del_mdb_entry(struct net_bridge_mdb_entry *mp)
+{
+ struct net_bridge *br = mp->br;
+
+ rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
+ br_mdb_rht_params);
+ hlist_del_init_rcu(&mp->mdb_node);
+ hlist_add_head(&mp->mcast_gc.gc_node, &br->mcast_gc_list);
+ queue_work(system_long_wq, &br->mcast_gc_work);
+}
+
static void br_multicast_group_expired(struct timer_list *t)
{
struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
struct net_bridge *br = mp->br;
spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) || timer_pending(&mp->timer))
+ if (hlist_unhashed(&mp->mdb_node) || !netif_running(br->dev) ||
+ timer_pending(&mp->timer))
goto out;
br_multicast_host_leave(mp, true);
if (mp->ports)
goto out;
+ br_multicast_del_mdb_entry(mp);
+out:
+ spin_unlock(&br->multicast_lock);
+}
- rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
- br_mdb_rht_params);
- hlist_del_rcu(&mp->mdb_node);
+static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc)
+{
+ struct net_bridge_group_src *src;
- kfree_rcu(mp, rcu);
+ src = container_of(gc, struct net_bridge_group_src, mcast_gc);
+ WARN_ON(!hlist_unhashed(&src->node));
-out:
- spin_unlock(&br->multicast_lock);
+ del_timer_sync(&src->timer);
+ kfree_rcu(src, rcu);
+}
+
+static void br_multicast_del_group_src(struct net_bridge_group_src *src)
+{
+ struct net_bridge *br = src->pg->key.port->br;
+
+ br_multicast_fwd_src_remove(src);
+ hlist_del_init_rcu(&src->node);
+ src->pg->src_ents--;
+ hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list);
+ queue_work(system_long_wq, &br->mcast_gc_work);
+}
+
+static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
+{
+ struct net_bridge_port_group *pg;
+
+ pg = container_of(gc, struct net_bridge_port_group, mcast_gc);
+ WARN_ON(!hlist_unhashed(&pg->mglist));
+ WARN_ON(!hlist_empty(&pg->src_list));
+
+ del_timer_sync(&pg->rexmit_timer);
+ del_timer_sync(&pg->timer);
+ kfree_rcu(pg, rcu);
+}
+
+void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ struct net_bridge_port_group __rcu **pp)
+{
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_group_src *ent;
+ struct hlist_node *tmp;
+
+ rcu_assign_pointer(*pp, pg->next);
+ hlist_del_init(&pg->mglist);
+ hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
+ br_multicast_del_group_src(ent);
+ br_mdb_notify(br->dev, mp, pg, RTM_DELMDB);
+ if (!br_multicast_is_star_g(&mp->addr)) {
+ rhashtable_remove_fast(&br->sg_port_tbl, &pg->rhnode,
+ br_sg_port_rht_params);
+ br_multicast_sg_del_exclude_ports(mp);
+ } else {
+ br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
+ }
+ hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
+ queue_work(system_long_wq, &br->mcast_gc_work);
+
+ if (!mp->ports && !mp->host_joined && netif_running(br->dev))
+ mod_timer(&mp->timer, jiffies);
}
-static void br_multicast_del_pg(struct net_bridge *br,
- struct net_bridge_port_group *pg)
+static void br_multicast_find_del_pg(struct net_bridge *br,
+ struct net_bridge_port_group *pg)
{
+ struct net_bridge_port_group __rcu **pp;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
- struct net_bridge_port_group __rcu **pp;
- mp = br_mdb_ip_get(br, &pg->addr);
+ mp = br_mdb_ip_get(br, &pg->key.addr);
if (WARN_ON(!mp))
return;
@@ -180,17 +626,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
if (p != pg)
continue;
- rcu_assign_pointer(*pp, p->next);
- hlist_del_init(&p->mglist);
- del_timer(&p->timer);
- br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
- p->flags);
- kfree_rcu(p, rcu);
-
- if (!mp->ports && !mp->host_joined &&
- netif_running(br->dev))
- mod_timer(&mp->timer, jiffies);
-
+ br_multicast_del_pg(mp, pg, pp);
return;
}
@@ -200,35 +636,98 @@ static void br_multicast_del_pg(struct net_bridge *br,
static void br_multicast_port_group_expired(struct timer_list *t)
{
struct net_bridge_port_group *pg = from_timer(pg, t, timer);
- struct net_bridge *br = pg->port->br;
+ struct net_bridge_group_src *src_ent;
+ struct net_bridge *br = pg->key.port->br;
+ struct hlist_node *tmp;
+ bool changed;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT)
goto out;
- br_multicast_del_pg(br, pg);
+ changed = !!(pg->filter_mode == MCAST_EXCLUDE);
+ pg->filter_mode = MCAST_INCLUDE;
+ hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) {
+ if (!timer_pending(&src_ent->timer)) {
+ br_multicast_del_group_src(src_ent);
+ changed = true;
+ }
+ }
+
+ if (hlist_empty(&pg->src_list)) {
+ br_multicast_find_del_pg(br, pg);
+ } else if (changed) {
+ struct net_bridge_mdb_entry *mp = br_mdb_ip_get(br, &pg->key.addr);
+ if (changed && br_multicast_is_star_g(&pg->key.addr))
+ br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
+
+ if (WARN_ON(!mp))
+ goto out;
+ br_mdb_notify(br->dev, mp, pg, RTM_NEWMDB);
+ }
out:
spin_unlock(&br->multicast_lock);
}
-static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
- __be32 group,
- u8 *igmp_type)
+static void br_multicast_gc(struct hlist_head *head)
{
+ struct net_bridge_mcast_gc *gcent;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(gcent, tmp, head, gc_node) {
+ hlist_del_init(&gcent->gc_node);
+ gcent->destroy(gcent);
+ }
+}
+
+static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
+ struct net_bridge_port_group *pg,
+ __be32 ip_dst, __be32 group,
+ bool with_srcs, bool over_lmqt,
+ u8 sflag, u8 *igmp_type,
+ bool *need_rexmit)
+{
+ struct net_bridge_port *p = pg ? pg->key.port : NULL;
+ struct net_bridge_group_src *ent;
+ size_t pkt_size, igmp_hdr_size;
+ unsigned long now = jiffies;
struct igmpv3_query *ihv3;
- size_t igmp_hdr_size;
+ void *csum_start = NULL;
+ __sum16 *csum = NULL;
struct sk_buff *skb;
struct igmphdr *ih;
struct ethhdr *eth;
+ unsigned long lmqt;
struct iphdr *iph;
+ u16 lmqt_srcs = 0;
igmp_hdr_size = sizeof(*ih);
- if (br->multicast_igmp_version == 3)
+ if (br->multicast_igmp_version == 3) {
igmp_hdr_size = sizeof(*ihv3);
- skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) +
- igmp_hdr_size + 4);
+ if (pg && with_srcs) {
+ lmqt = now + (br->multicast_last_member_interval *
+ br->multicast_last_member_count);
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ if (over_lmqt == time_after(ent->timer.expires,
+ lmqt) &&
+ ent->src_query_rexmit_cnt > 0)
+ lmqt_srcs++;
+ }
+
+ if (!lmqt_srcs)
+ return NULL;
+ igmp_hdr_size += lmqt_srcs * sizeof(__be32);
+ }
+ }
+
+ pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size;
+ if ((p && pkt_size > p->dev->mtu) ||
+ pkt_size > br->dev->mtu)
+ return NULL;
+
+ skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
if (!skb)
goto out;
@@ -238,29 +737,24 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
eth = eth_hdr(skb);
ether_addr_copy(eth->h_source, br->dev->dev_addr);
- eth->h_dest[0] = 1;
- eth->h_dest[1] = 0;
- eth->h_dest[2] = 0x5e;
- eth->h_dest[3] = 0;
- eth->h_dest[4] = 0;
- eth->h_dest[5] = 1;
+ ip_eth_mc_map(ip_dst, eth->h_dest);
eth->h_proto = htons(ETH_P_IP);
skb_put(skb, sizeof(*eth));
skb_set_network_header(skb, skb->len);
iph = ip_hdr(skb);
+ iph->tot_len = htons(pkt_size - sizeof(*eth));
iph->version = 4;
iph->ihl = 6;
iph->tos = 0xc0;
- iph->tot_len = htons(sizeof(*iph) + igmp_hdr_size + 4);
iph->id = 0;
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
iph->protocol = IPPROTO_IGMP;
iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
- iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
+ iph->daddr = ip_dst;
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
@@ -280,7 +774,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
(HZ / IGMP_TIMER_SCALE);
ih->group = group;
ih->csum = 0;
- ih->csum = ip_compute_csum((void *)ih, sizeof(*ih));
+ csum = &ih->csum;
+ csum_start = (void *)ih;
break;
case 3:
ihv3 = igmpv3_query_hdr(skb);
@@ -290,15 +785,40 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
(HZ / IGMP_TIMER_SCALE);
ihv3->group = group;
ihv3->qqic = br->multicast_query_interval / HZ;
- ihv3->nsrcs = 0;
+ ihv3->nsrcs = htons(lmqt_srcs);
ihv3->resv = 0;
- ihv3->suppress = 0;
+ ihv3->suppress = sflag;
ihv3->qrv = 2;
ihv3->csum = 0;
- ihv3->csum = ip_compute_csum((void *)ihv3, sizeof(*ihv3));
+ csum = &ihv3->csum;
+ csum_start = (void *)ihv3;
+ if (!pg || !with_srcs)
+ break;
+
+ lmqt_srcs = 0;
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ if (over_lmqt == time_after(ent->timer.expires,
+ lmqt) &&
+ ent->src_query_rexmit_cnt > 0) {
+ ihv3->srcs[lmqt_srcs++] = ent->addr.src.ip4;
+ ent->src_query_rexmit_cnt--;
+ if (need_rexmit && ent->src_query_rexmit_cnt)
+ *need_rexmit = true;
+ }
+ }
+ if (WARN_ON(lmqt_srcs != ntohs(ihv3->nsrcs))) {
+ kfree_skb(skb);
+ return NULL;
+ }
break;
}
+ if (WARN_ON(!csum || !csum_start)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ *csum = ip_compute_csum(csum_start, igmp_hdr_size);
skb_put(skb, igmp_hdr_size);
__skb_pull(skb, sizeof(*eth));
@@ -308,23 +828,54 @@ out:
#if IS_ENABLED(CONFIG_IPV6)
static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
- const struct in6_addr *grp,
- u8 *igmp_type)
-{
+ struct net_bridge_port_group *pg,
+ const struct in6_addr *ip6_dst,
+ const struct in6_addr *group,
+ bool with_srcs, bool over_llqt,
+ u8 sflag, u8 *igmp_type,
+ bool *need_rexmit)
+{
+ struct net_bridge_port *p = pg ? pg->key.port : NULL;
+ struct net_bridge_group_src *ent;
+ size_t pkt_size, mld_hdr_size;
+ unsigned long now = jiffies;
struct mld2_query *mld2q;
+ void *csum_start = NULL;
unsigned long interval;
+ __sum16 *csum = NULL;
struct ipv6hdr *ip6h;
struct mld_msg *mldq;
- size_t mld_hdr_size;
struct sk_buff *skb;
+ unsigned long llqt;
struct ethhdr *eth;
+ u16 llqt_srcs = 0;
u8 *hopopt;
mld_hdr_size = sizeof(*mldq);
- if (br->multicast_mld_version == 2)
+ if (br->multicast_mld_version == 2) {
mld_hdr_size = sizeof(*mld2q);
- skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
- 8 + mld_hdr_size);
+ if (pg && with_srcs) {
+ llqt = now + (br->multicast_last_member_interval *
+ br->multicast_last_member_count);
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ if (over_llqt == time_after(ent->timer.expires,
+ llqt) &&
+ ent->src_query_rexmit_cnt > 0)
+ llqt_srcs++;
+ }
+
+ if (!llqt_srcs)
+ return NULL;
+ mld_hdr_size += llqt_srcs * sizeof(struct in6_addr);
+ }
+ }
+
+ pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size;
+ if ((p && pkt_size > p->dev->mtu) ||
+ pkt_size > br->dev->mtu)
+ return NULL;
+
+ skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
if (!skb)
goto out;
@@ -346,7 +897,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
ip6h->payload_len = htons(8 + mld_hdr_size);
ip6h->nexthdr = IPPROTO_HOPOPTS;
ip6h->hop_limit = 1;
- ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+ ip6h->daddr = *ip6_dst;
if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
&ip6h->saddr)) {
kfree_skb(skb);
@@ -371,7 +922,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
/* ICMPv6 */
skb_set_transport_header(skb, skb->len);
- interval = ipv6_addr_any(grp) ?
+ interval = ipv6_addr_any(group) ?
br->multicast_query_response_interval :
br->multicast_last_member_interval;
*igmp_type = ICMPV6_MGM_QUERY;
@@ -383,12 +934,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
mldq->mld_cksum = 0;
mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
mldq->mld_reserved = 0;
- mldq->mld_mca = *grp;
- mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
- sizeof(*mldq), IPPROTO_ICMPV6,
- csum_partial(mldq,
- sizeof(*mldq),
- 0));
+ mldq->mld_mca = *group;
+ csum = &mldq->mld_cksum;
+ csum_start = (void *)mldq;
break;
case 2:
mld2q = (struct mld2_query *)icmp6_hdr(skb);
@@ -398,21 +946,43 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
mld2q->mld2q_cksum = 0;
mld2q->mld2q_resv1 = 0;
mld2q->mld2q_resv2 = 0;
- mld2q->mld2q_suppress = 0;
+ mld2q->mld2q_suppress = sflag;
mld2q->mld2q_qrv = 2;
- mld2q->mld2q_nsrcs = 0;
+ mld2q->mld2q_nsrcs = htons(llqt_srcs);
mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
- mld2q->mld2q_mca = *grp;
- mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
- sizeof(*mld2q),
- IPPROTO_ICMPV6,
- csum_partial(mld2q,
- sizeof(*mld2q),
- 0));
+ mld2q->mld2q_mca = *group;
+ csum = &mld2q->mld2q_cksum;
+ csum_start = (void *)mld2q;
+ if (!pg || !with_srcs)
+ break;
+
+ llqt_srcs = 0;
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ if (over_llqt == time_after(ent->timer.expires,
+ llqt) &&
+ ent->src_query_rexmit_cnt > 0) {
+ mld2q->mld2q_srcs[llqt_srcs++] = ent->addr.src.ip6;
+ ent->src_query_rexmit_cnt--;
+ if (need_rexmit && ent->src_query_rexmit_cnt)
+ *need_rexmit = true;
+ }
+ }
+ if (WARN_ON(llqt_srcs != ntohs(mld2q->mld2q_nsrcs))) {
+ kfree_skb(skb);
+ return NULL;
+ }
break;
}
- skb_put(skb, mld_hdr_size);
+ if (WARN_ON(!csum || !csum_start)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ *csum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, mld_hdr_size,
+ IPPROTO_ICMPV6,
+ csum_partial(csum_start, mld_hdr_size, 0));
+ skb_put(skb, mld_hdr_size);
__skb_pull(skb, sizeof(*eth));
out:
@@ -421,16 +991,39 @@ out:
#endif
static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
- struct br_ip *addr,
- u8 *igmp_type)
+ struct net_bridge_port_group *pg,
+ struct br_ip *ip_dst,
+ struct br_ip *group,
+ bool with_srcs, bool over_lmqt,
+ u8 sflag, u8 *igmp_type,
+ bool *need_rexmit)
{
- switch (addr->proto) {
+ __be32 ip4_dst;
+
+ switch (group->proto) {
case htons(ETH_P_IP):
- return br_ip4_multicast_alloc_query(br, addr->u.ip4, igmp_type);
+ ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP);
+ return br_ip4_multicast_alloc_query(br, pg,
+ ip4_dst, group->dst.ip4,
+ with_srcs, over_lmqt,
+ sflag, igmp_type,
+ need_rexmit);
#if IS_ENABLED(CONFIG_IPV6)
- case htons(ETH_P_IPV6):
- return br_ip6_multicast_alloc_query(br, &addr->u.ip6,
- igmp_type);
+ case htons(ETH_P_IPV6): {
+ struct in6_addr ip6_dst;
+
+ if (ip_dst)
+ ip6_dst = ip_dst->dst.ip6;
+ else
+ ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0,
+ htonl(1));
+
+ return br_ip6_multicast_alloc_query(br, pg,
+ &ip6_dst, &group->dst.ip6,
+ with_srcs, over_lmqt,
+ sflag, igmp_type,
+ need_rexmit);
+ }
#endif
}
return NULL;
@@ -457,6 +1050,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
mp->br = br;
mp->addr = *group;
+ mp->mcast_gc.destroy = br_multicast_destroy_mdb_entry;
timer_setup(&mp->timer, br_multicast_group_expired, 0);
err = rhashtable_lookup_insert_fast(&br->mdb_hash_tbl, &mp->rhnode,
br_mdb_rht_params);
@@ -470,12 +1064,101 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
}
+static void br_multicast_group_src_expired(struct timer_list *t)
+{
+ struct net_bridge_group_src *src = from_timer(src, t, timer);
+ struct net_bridge_port_group *pg;
+ struct net_bridge *br = src->br;
+
+ spin_lock(&br->multicast_lock);
+ if (hlist_unhashed(&src->node) || !netif_running(br->dev) ||
+ timer_pending(&src->timer))
+ goto out;
+
+ pg = src->pg;
+ if (pg->filter_mode == MCAST_INCLUDE) {
+ br_multicast_del_group_src(src);
+ if (!hlist_empty(&pg->src_list))
+ goto out;
+ br_multicast_find_del_pg(br, pg);
+ } else {
+ br_multicast_fwd_src_handle(src);
+ }
+
+out:
+ spin_unlock(&br->multicast_lock);
+}
+
+static struct net_bridge_group_src *
+br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip)
+{
+ struct net_bridge_group_src *ent;
+
+ switch (ip->proto) {
+ case htons(ETH_P_IP):
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ if (ip->src.ip4 == ent->addr.src.ip4)
+ return ent;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ if (!ipv6_addr_cmp(&ent->addr.src.ip6, &ip->src.ip6))
+ return ent;
+ break;
+#endif
+ }
+
+ return NULL;
+}
+
+static struct net_bridge_group_src *
+br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_ip)
+{
+ struct net_bridge_group_src *grp_src;
+
+ if (unlikely(pg->src_ents >= PG_SRC_ENT_LIMIT))
+ return NULL;
+
+ switch (src_ip->proto) {
+ case htons(ETH_P_IP):
+ if (ipv4_is_zeronet(src_ip->src.ip4) ||
+ ipv4_is_multicast(src_ip->src.ip4))
+ return NULL;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ if (ipv6_addr_any(&src_ip->src.ip6) ||
+ ipv6_addr_is_multicast(&src_ip->src.ip6))
+ return NULL;
+ break;
+#endif
+ }
+
+ grp_src = kzalloc(sizeof(*grp_src), GFP_ATOMIC);
+ if (unlikely(!grp_src))
+ return NULL;
+
+ grp_src->pg = pg;
+ grp_src->br = pg->key.port->br;
+ grp_src->addr = *src_ip;
+ grp_src->mcast_gc.destroy = br_multicast_destroy_group_src;
+ timer_setup(&grp_src->timer, br_multicast_group_src_expired, 0);
+
+ hlist_add_head_rcu(&grp_src->node, &pg->src_list);
+ pg->src_ents++;
+
+ return grp_src;
+}
+
struct net_bridge_port_group *br_multicast_new_port_group(
struct net_bridge_port *port,
struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags,
- const unsigned char *src)
+ const unsigned char *src,
+ u8 filter_mode,
+ u8 rt_protocol)
{
struct net_bridge_port_group *p;
@@ -483,12 +1166,25 @@ struct net_bridge_port_group *br_multicast_new_port_group(
if (unlikely(!p))
return NULL;
- p->addr = *group;
- p->port = port;
+ p->key.addr = *group;
+ p->key.port = port;
p->flags = flags;
+ p->filter_mode = filter_mode;
+ p->rt_protocol = rt_protocol;
+ p->mcast_gc.destroy = br_multicast_destroy_port_group;
+ INIT_HLIST_HEAD(&p->src_list);
+
+ if (!br_multicast_is_star_g(group) &&
+ rhashtable_lookup_insert_fast(&port->br->sg_port_tbl, &p->rhnode,
+ br_sg_port_rht_params)) {
+ kfree(p);
+ return NULL;
+ }
+
rcu_assign_pointer(p->next, next);
- hlist_add_head(&p->mglist, &port->mglist);
timer_setup(&p->timer, br_multicast_port_group_expired, 0);
+ timer_setup(&p->rexmit_timer, br_multicast_port_group_rexmit, 0);
+ hlist_add_head(&p->mglist, &port->mglist);
if (src)
memcpy(p->eth_addr, src, ETH_ALEN);
@@ -498,26 +1194,14 @@ struct net_bridge_port_group *br_multicast_new_port_group(
return p;
}
-static bool br_port_group_equal(struct net_bridge_port_group *p,
- struct net_bridge_port *port,
- const unsigned char *src)
-{
- if (p->port != port)
- return false;
-
- if (!(port->flags & BR_MULTICAST_TO_UNICAST))
- return true;
-
- return ether_addr_equal(src, p->eth_addr);
-}
-
void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
{
if (!mp->host_joined) {
mp->host_joined = true;
+ if (br_multicast_is_star_g(&mp->addr))
+ br_multicast_star_g_host_state(mp);
if (notify)
- br_mdb_notify(mp->br->dev, NULL, &mp->addr,
- RTM_NEWMDB, 0);
+ br_mdb_notify(mp->br->dev, mp, NULL, RTM_NEWMDB);
}
mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
}
@@ -528,30 +1212,33 @@ void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
return;
mp->host_joined = false;
+ if (br_multicast_is_star_g(&mp->addr))
+ br_multicast_star_g_host_state(mp);
if (notify)
- br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_DELMDB, 0);
+ br_mdb_notify(mp->br->dev, mp, NULL, RTM_DELMDB);
}
-static int br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
- struct br_ip *group,
- const unsigned char *src)
+static struct net_bridge_port_group *
+__br_multicast_add_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *group,
+ const unsigned char *src,
+ u8 filter_mode,
+ bool igmpv2_mldv1,
+ bool blocked)
{
struct net_bridge_port_group __rcu **pp;
- struct net_bridge_port_group *p;
+ struct net_bridge_port_group *p = NULL;
struct net_bridge_mdb_entry *mp;
unsigned long now = jiffies;
- int err;
- spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) ||
(port && port->state == BR_STATE_DISABLED))
goto out;
mp = br_multicast_new_group(br, group);
- err = PTR_ERR(mp);
if (IS_ERR(mp))
- goto err;
+ return ERR_PTR(PTR_ERR(mp));
if (!port) {
br_multicast_host_join(mp, true);
@@ -563,23 +1250,46 @@ static int br_multicast_add_group(struct net_bridge *br,
pp = &p->next) {
if (br_port_group_equal(p, port, src))
goto found;
- if ((unsigned long)p->port < (unsigned long)port)
+ if ((unsigned long)p->key.port < (unsigned long)port)
break;
}
- p = br_multicast_new_port_group(port, group, *pp, 0, src);
- if (unlikely(!p))
- goto err;
+ p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode,
+ RTPROT_KERNEL);
+ if (unlikely(!p)) {
+ p = ERR_PTR(-ENOMEM);
+ goto out;
+ }
rcu_assign_pointer(*pp, p);
- br_mdb_notify(br->dev, port, group, RTM_NEWMDB, 0);
+ if (blocked)
+ p->flags |= MDB_PG_FLAGS_BLOCKED;
+ br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
found:
- mod_timer(&p->timer, now + br->multicast_membership_interval);
+ if (igmpv2_mldv1)
+ mod_timer(&p->timer, now + br->multicast_membership_interval);
+
out:
- err = 0;
+ return p;
+}
+
+static int br_multicast_add_group(struct net_bridge *br,
+ struct net_bridge_port *port,
+ struct br_ip *group,
+ const unsigned char *src,
+ u8 filter_mode,
+ bool igmpv2_mldv1)
+{
+ struct net_bridge_port_group *pg;
+ int err;
-err:
+ spin_lock(&br->multicast_lock);
+ pg = __br_multicast_add_group(br, port, group, src, filter_mode,
+ igmpv2_mldv1, false);
+ /* NULL is considered valid for host joined groups */
+ err = IS_ERR(pg) ? PTR_ERR(pg) : 0;
spin_unlock(&br->multicast_lock);
+
return err;
}
@@ -587,19 +1297,23 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
struct net_bridge_port *port,
__be32 group,
__u16 vid,
- const unsigned char *src)
+ const unsigned char *src,
+ bool igmpv2)
{
struct br_ip br_group;
+ u8 filter_mode;
if (ipv4_is_local_multicast(group))
return 0;
memset(&br_group, 0, sizeof(br_group));
- br_group.u.ip4 = group;
+ br_group.dst.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
+ filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE;
- return br_multicast_add_group(br, port, &br_group, src);
+ return br_multicast_add_group(br, port, &br_group, src, filter_mode,
+ igmpv2);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -607,19 +1321,23 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
struct net_bridge_port *port,
const struct in6_addr *group,
__u16 vid,
- const unsigned char *src)
+ const unsigned char *src,
+ bool mldv1)
{
struct br_ip br_group;
+ u8 filter_mode;
if (ipv6_addr_is_ll_all_nodes(group))
return 0;
memset(&br_group, 0, sizeof(br_group));
- br_group.u.ip6 = *group;
+ br_group.dst.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
+ filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE;
- return br_multicast_add_group(br, port, &br_group, src);
+ return br_multicast_add_group(br, port, &br_group, src, filter_mode,
+ mldv1);
}
#endif
@@ -702,21 +1420,30 @@ static void br_multicast_select_own_querier(struct net_bridge *br,
struct sk_buff *skb)
{
if (ip->proto == htons(ETH_P_IP))
- br->ip4_querier.addr.u.ip4 = ip_hdr(skb)->saddr;
+ br->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
#if IS_ENABLED(CONFIG_IPV6)
else
- br->ip6_querier.addr.u.ip6 = ipv6_hdr(skb)->saddr;
+ br->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
#endif
}
static void __br_multicast_send_query(struct net_bridge *br,
struct net_bridge_port *port,
- struct br_ip *ip)
-{
+ struct net_bridge_port_group *pg,
+ struct br_ip *ip_dst,
+ struct br_ip *group,
+ bool with_srcs,
+ u8 sflag,
+ bool *need_rexmit)
+{
+ bool over_lmqt = !!sflag;
struct sk_buff *skb;
u8 igmp_type;
- skb = br_multicast_alloc_query(br, ip, &igmp_type);
+again_under_lmqt:
+ skb = br_multicast_alloc_query(br, pg, ip_dst, group, with_srcs,
+ over_lmqt, sflag, &igmp_type,
+ need_rexmit);
if (!skb)
return;
@@ -727,8 +1454,13 @@ static void __br_multicast_send_query(struct net_bridge *br,
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
dev_net(port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
+
+ if (over_lmqt && with_srcs && sflag) {
+ over_lmqt = false;
+ goto again_under_lmqt;
+ }
} else {
- br_multicast_select_own_querier(br, ip, skb);
+ br_multicast_select_own_querier(br, group, skb);
br_multicast_count(br, port, skb, igmp_type,
BR_MCAST_DIR_RX);
netif_rx(skb);
@@ -748,7 +1480,7 @@ static void br_multicast_send_query(struct net_bridge *br,
!br_opt_get(br, BROPT_MULTICAST_QUERIER))
return;
- memset(&br_group.u, 0, sizeof(br_group.u));
+ memset(&br_group.dst, 0, sizeof(br_group.dst));
if (port ? (own_query == &port->ip4_own_query) :
(own_query == &br->ip4_own_query)) {
@@ -764,7 +1496,8 @@ static void br_multicast_send_query(struct net_bridge *br,
if (!other_query || timer_pending(&other_query->timer))
return;
- __br_multicast_send_query(br, port, &br_group);
+ __br_multicast_send_query(br, port, NULL, NULL, &br_group, false, 0,
+ NULL);
time = jiffies;
time += own_query->startup_sent < br->multicast_startup_query_count ?
@@ -809,6 +1542,44 @@ static void br_ip6_multicast_port_query_expired(struct timer_list *t)
}
#endif
+static void br_multicast_port_group_rexmit(struct timer_list *t)
+{
+ struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer);
+ struct bridge_mcast_other_query *other_query = NULL;
+ struct net_bridge *br = pg->key.port->br;
+ bool need_rexmit = false;
+
+ spin_lock(&br->multicast_lock);
+ if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) ||
+ !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+ !br_opt_get(br, BROPT_MULTICAST_QUERIER))
+ goto out;
+
+ if (pg->key.addr.proto == htons(ETH_P_IP))
+ other_query = &br->ip4_other_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ other_query = &br->ip6_other_query;
+#endif
+
+ if (!other_query || timer_pending(&other_query->timer))
+ goto out;
+
+ if (pg->grp_query_rexmit_cnt) {
+ pg->grp_query_rexmit_cnt--;
+ __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ &pg->key.addr, false, 1, NULL);
+ }
+ __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ &pg->key.addr, true, 0, &need_rexmit);
+
+ if (pg->grp_query_rexmit_cnt || need_rexmit)
+ mod_timer(&pg->rexmit_timer, jiffies +
+ br->multicast_last_member_interval);
+out:
+ spin_unlock(&br->multicast_lock);
+}
+
static void br_mc_disabled_update(struct net_device *dev, bool value)
{
struct switchdev_attr attr = {
@@ -847,13 +1618,16 @@ void br_multicast_del_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
struct net_bridge_port_group *pg;
+ HLIST_HEAD(deleted_head);
struct hlist_node *n;
/* Take care of the remaining groups, only perm ones should be left */
spin_lock_bh(&br->multicast_lock);
hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
- br_multicast_del_pg(br, pg);
+ br_multicast_find_del_pg(br, pg);
+ hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
+ br_multicast_gc(&deleted_head);
del_timer_sync(&port->multicast_router_timer);
free_percpu(port->mcast_stats);
}
@@ -901,7 +1675,7 @@ void br_multicast_disable_port(struct net_bridge_port *port)
spin_lock(&br->multicast_lock);
hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
- br_multicast_del_pg(br, pg);
+ br_multicast_find_del_pg(br, pg);
__del_port_router(port);
@@ -913,20 +1687,574 @@ void br_multicast_disable_port(struct net_bridge_port *port)
spin_unlock(&br->multicast_lock);
}
+static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
+{
+ struct net_bridge_group_src *ent;
+ struct hlist_node *tmp;
+ int deleted = 0;
+
+ hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
+ if (ent->flags & BR_SGRP_F_DELETE) {
+ br_multicast_del_group_src(ent);
+ deleted++;
+ }
+
+ return deleted;
+}
+
+static void __grp_src_mod_timer(struct net_bridge_group_src *src,
+ unsigned long expires)
+{
+ mod_timer(&src->timer, expires);
+ br_multicast_fwd_src_handle(src);
+}
+
+static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
+{
+ struct bridge_mcast_other_query *other_query = NULL;
+ struct net_bridge *br = pg->key.port->br;
+ u32 lmqc = br->multicast_last_member_count;
+ unsigned long lmqt, lmi, now = jiffies;
+ struct net_bridge_group_src *ent;
+
+ if (!netif_running(br->dev) ||
+ !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ return;
+
+ if (pg->key.addr.proto == htons(ETH_P_IP))
+ other_query = &br->ip4_other_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ other_query = &br->ip6_other_query;
+#endif
+
+ lmqt = now + br_multicast_lmqt(br);
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ if (ent->flags & BR_SGRP_F_SEND) {
+ ent->flags &= ~BR_SGRP_F_SEND;
+ if (ent->timer.expires > lmqt) {
+ if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+ other_query &&
+ !timer_pending(&other_query->timer))
+ ent->src_query_rexmit_cnt = lmqc;
+ __grp_src_mod_timer(ent, lmqt);
+ }
+ }
+ }
+
+ if (!br_opt_get(br, BROPT_MULTICAST_QUERIER) ||
+ !other_query || timer_pending(&other_query->timer))
+ return;
+
+ __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ &pg->key.addr, true, 1, NULL);
+
+ lmi = now + br->multicast_last_member_interval;
+ if (!timer_pending(&pg->rexmit_timer) ||
+ time_after(pg->rexmit_timer.expires, lmi))
+ mod_timer(&pg->rexmit_timer, lmi);
+}
+
+static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
+{
+ struct bridge_mcast_other_query *other_query = NULL;
+ struct net_bridge *br = pg->key.port->br;
+ unsigned long now = jiffies, lmi;
+
+ if (!netif_running(br->dev) ||
+ !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ return;
+
+ if (pg->key.addr.proto == htons(ETH_P_IP))
+ other_query = &br->ip4_other_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ other_query = &br->ip6_other_query;
+#endif
+
+ if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+ other_query && !timer_pending(&other_query->timer)) {
+ lmi = now + br->multicast_last_member_interval;
+ pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1;
+ __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ &pg->key.addr, false, 0, NULL);
+ if (!timer_pending(&pg->rexmit_timer) ||
+ time_after(pg->rexmit_timer.expires, lmi))
+ mod_timer(&pg->rexmit_timer, lmi);
+ }
+
+ if (pg->filter_mode == MCAST_EXCLUDE &&
+ (!timer_pending(&pg->timer) ||
+ time_after(pg->timer.expires, now + br_multicast_lmqt(br))))
+ mod_timer(&pg->timer, now + br_multicast_lmqt(br));
+}
+
+/* State Msg type New state Actions
+ * INCLUDE (A) IS_IN (B) INCLUDE (A+B) (B)=GMI
+ * INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI
+ * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI
+ */
+static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_group_src *ent;
+ unsigned long now = jiffies;
+ bool changed = false;
+ struct br_ip src_ip;
+ u32 src_idx;
+
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&src_ip.src, srcs, src_size);
+ ent = br_multicast_find_group_src(pg, &src_ip);
+ if (!ent) {
+ ent = br_multicast_new_group_src(pg, &src_ip);
+ if (ent)
+ changed = true;
+ }
+
+ if (ent)
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ srcs += src_size;
+ }
+
+ return changed;
+}
+
+/* State Msg type New state Actions
+ * INCLUDE (A) IS_EX (B) EXCLUDE (A*B,B-A) (B-A)=0
+ * Delete (A-B)
+ * Group Timer=GMI
+ */
+static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge_group_src *ent;
+ struct br_ip src_ip;
+ u32 src_idx;
+
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ ent->flags |= BR_SGRP_F_DELETE;
+
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&src_ip.src, srcs, src_size);
+ ent = br_multicast_find_group_src(pg, &src_ip);
+ if (ent)
+ ent->flags &= ~BR_SGRP_F_DELETE;
+ else
+ ent = br_multicast_new_group_src(pg, &src_ip);
+ if (ent)
+ br_multicast_fwd_src_handle(ent);
+ srcs += src_size;
+ }
+
+ __grp_src_delete_marked(pg);
+}
+
+/* State Msg type New state Actions
+ * EXCLUDE (X,Y) IS_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=GMI
+ * Delete (X-A)
+ * Delete (Y-A)
+ * Group Timer=GMI
+ */
+static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_group_src *ent;
+ unsigned long now = jiffies;
+ bool changed = false;
+ struct br_ip src_ip;
+ u32 src_idx;
+
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ ent->flags |= BR_SGRP_F_DELETE;
+
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&src_ip.src, srcs, src_size);
+ ent = br_multicast_find_group_src(pg, &src_ip);
+ if (ent) {
+ ent->flags &= ~BR_SGRP_F_DELETE;
+ } else {
+ ent = br_multicast_new_group_src(pg, &src_ip);
+ if (ent) {
+ __grp_src_mod_timer(ent,
+ now + br_multicast_gmi(br));
+ changed = true;
+ }
+ }
+ srcs += src_size;
+ }
+
+ if (__grp_src_delete_marked(pg))
+ changed = true;
+
+ return changed;
+}
+
+static bool br_multicast_isexc(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge *br = pg->key.port->br;
+ bool changed = false;
+
+ switch (pg->filter_mode) {
+ case MCAST_INCLUDE:
+ __grp_src_isexc_incl(pg, srcs, nsrcs, src_size);
+ br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
+ changed = true;
+ break;
+ case MCAST_EXCLUDE:
+ changed = __grp_src_isexc_excl(pg, srcs, nsrcs, src_size);
+ break;
+ }
+
+ pg->filter_mode = MCAST_EXCLUDE;
+ mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+
+ return changed;
+}
+
+/* State Msg type New state Actions
+ * INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI
+ * Send Q(G,A-B)
+ */
+static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge *br = pg->key.port->br;
+ u32 src_idx, to_send = pg->src_ents;
+ struct net_bridge_group_src *ent;
+ unsigned long now = jiffies;
+ bool changed = false;
+ struct br_ip src_ip;
+
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ ent->flags |= BR_SGRP_F_SEND;
+
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&src_ip.src, srcs, src_size);
+ ent = br_multicast_find_group_src(pg, &src_ip);
+ if (ent) {
+ ent->flags &= ~BR_SGRP_F_SEND;
+ to_send--;
+ } else {
+ ent = br_multicast_new_group_src(pg, &src_ip);
+ if (ent)
+ changed = true;
+ }
+ if (ent)
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ srcs += src_size;
+ }
+
+ if (to_send)
+ __grp_src_query_marked_and_rexmit(pg);
+
+ return changed;
+}
+
+/* State Msg type New state Actions
+ * EXCLUDE (X,Y) TO_IN (A) EXCLUDE (X+A,Y-A) (A)=GMI
+ * Send Q(G,X-A)
+ * Send Q(G)
+ */
+static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge *br = pg->key.port->br;
+ u32 src_idx, to_send = pg->src_ents;
+ struct net_bridge_group_src *ent;
+ unsigned long now = jiffies;
+ bool changed = false;
+ struct br_ip src_ip;
+
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ if (timer_pending(&ent->timer))
+ ent->flags |= BR_SGRP_F_SEND;
+
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&src_ip.src, srcs, src_size);
+ ent = br_multicast_find_group_src(pg, &src_ip);
+ if (ent) {
+ if (timer_pending(&ent->timer)) {
+ ent->flags &= ~BR_SGRP_F_SEND;
+ to_send--;
+ }
+ } else {
+ ent = br_multicast_new_group_src(pg, &src_ip);
+ if (ent)
+ changed = true;
+ }
+ if (ent)
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ srcs += src_size;
+ }
+
+ if (to_send)
+ __grp_src_query_marked_and_rexmit(pg);
+
+ __grp_send_query_and_rexmit(pg);
+
+ return changed;
+}
+
+static bool br_multicast_toin(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ bool changed = false;
+
+ switch (pg->filter_mode) {
+ case MCAST_INCLUDE:
+ changed = __grp_src_toin_incl(pg, srcs, nsrcs, src_size);
+ break;
+ case MCAST_EXCLUDE:
+ changed = __grp_src_toin_excl(pg, srcs, nsrcs, src_size);
+ break;
+ }
+
+ return changed;
+}
+
+/* State Msg type New state Actions
+ * INCLUDE (A) TO_EX (B) EXCLUDE (A*B,B-A) (B-A)=0
+ * Delete (A-B)
+ * Send Q(G,A*B)
+ * Group Timer=GMI
+ */
+static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge_group_src *ent;
+ u32 src_idx, to_send = 0;
+ struct br_ip src_ip;
+
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE;
+
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&src_ip.src, srcs, src_size);
+ ent = br_multicast_find_group_src(pg, &src_ip);
+ if (ent) {
+ ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) |
+ BR_SGRP_F_SEND;
+ to_send++;
+ } else {
+ ent = br_multicast_new_group_src(pg, &src_ip);
+ }
+ if (ent)
+ br_multicast_fwd_src_handle(ent);
+ srcs += src_size;
+ }
+
+ __grp_src_delete_marked(pg);
+ if (to_send)
+ __grp_src_query_marked_and_rexmit(pg);
+}
+
+/* State Msg type New state Actions
+ * EXCLUDE (X,Y) TO_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=Group Timer
+ * Delete (X-A)
+ * Delete (Y-A)
+ * Send Q(G,A-Y)
+ * Group Timer=GMI
+ */
+static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge_group_src *ent;
+ u32 src_idx, to_send = 0;
+ bool changed = false;
+ struct br_ip src_ip;
+
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE;
+
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&src_ip.src, srcs, src_size);
+ ent = br_multicast_find_group_src(pg, &src_ip);
+ if (ent) {
+ ent->flags &= ~BR_SGRP_F_DELETE;
+ } else {
+ ent = br_multicast_new_group_src(pg, &src_ip);
+ if (ent) {
+ __grp_src_mod_timer(ent, pg->timer.expires);
+ changed = true;
+ }
+ }
+ if (ent && timer_pending(&ent->timer)) {
+ ent->flags |= BR_SGRP_F_SEND;
+ to_send++;
+ }
+ srcs += src_size;
+ }
+
+ if (__grp_src_delete_marked(pg))
+ changed = true;
+ if (to_send)
+ __grp_src_query_marked_and_rexmit(pg);
+
+ return changed;
+}
+
+static bool br_multicast_toex(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge *br = pg->key.port->br;
+ bool changed = false;
+
+ switch (pg->filter_mode) {
+ case MCAST_INCLUDE:
+ __grp_src_toex_incl(pg, srcs, nsrcs, src_size);
+ br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
+ changed = true;
+ break;
+ case MCAST_EXCLUDE:
+ changed = __grp_src_toex_excl(pg, srcs, nsrcs, src_size);
+ break;
+ }
+
+ pg->filter_mode = MCAST_EXCLUDE;
+ mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+
+ return changed;
+}
+
+/* State Msg type New state Actions
+ * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B)
+ */
+static void __grp_src_block_incl(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge_group_src *ent;
+ u32 src_idx, to_send = 0;
+ struct br_ip src_ip;
+
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ ent->flags &= ~BR_SGRP_F_SEND;
+
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&src_ip.src, srcs, src_size);
+ ent = br_multicast_find_group_src(pg, &src_ip);
+ if (ent) {
+ ent->flags |= BR_SGRP_F_SEND;
+ to_send++;
+ }
+ srcs += src_size;
+ }
+
+ if (to_send)
+ __grp_src_query_marked_and_rexmit(pg);
+
+ if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list))
+ br_multicast_find_del_pg(pg->key.port->br, pg);
+}
+
+/* State Msg type New state Actions
+ * EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer
+ * Send Q(G,A-Y)
+ */
+static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ struct net_bridge_group_src *ent;
+ u32 src_idx, to_send = 0;
+ bool changed = false;
+ struct br_ip src_ip;
+
+ hlist_for_each_entry(ent, &pg->src_list, node)
+ ent->flags &= ~BR_SGRP_F_SEND;
+
+ memset(&src_ip, 0, sizeof(src_ip));
+ src_ip.proto = pg->key.addr.proto;
+ for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+ memcpy(&src_ip.src, srcs, src_size);
+ ent = br_multicast_find_group_src(pg, &src_ip);
+ if (!ent) {
+ ent = br_multicast_new_group_src(pg, &src_ip);
+ if (ent) {
+ __grp_src_mod_timer(ent, pg->timer.expires);
+ changed = true;
+ }
+ }
+ if (ent && timer_pending(&ent->timer)) {
+ ent->flags |= BR_SGRP_F_SEND;
+ to_send++;
+ }
+ srcs += src_size;
+ }
+
+ if (to_send)
+ __grp_src_query_marked_and_rexmit(pg);
+
+ return changed;
+}
+
+static bool br_multicast_block(struct net_bridge_port_group *pg,
+ void *srcs, u32 nsrcs, size_t src_size)
+{
+ bool changed = false;
+
+ switch (pg->filter_mode) {
+ case MCAST_INCLUDE:
+ __grp_src_block_incl(pg, srcs, nsrcs, src_size);
+ break;
+ case MCAST_EXCLUDE:
+ changed = __grp_src_block_excl(pg, srcs, nsrcs, src_size);
+ break;
+ }
+
+ return changed;
+}
+
+static struct net_bridge_port_group *
+br_multicast_find_port(struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port *p,
+ const unsigned char *src)
+{
+ struct net_bridge *br __maybe_unused = mp->br;
+ struct net_bridge_port_group *pg;
+
+ for (pg = mlock_dereference(mp->ports, br);
+ pg;
+ pg = mlock_dereference(pg->next, br))
+ if (br_port_group_equal(pg, p, src))
+ return pg;
+
+ return NULL;
+}
+
static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
struct net_bridge_port *port,
struct sk_buff *skb,
u16 vid)
{
+ bool igmpv2 = br->multicast_igmp_version == 2;
+ struct net_bridge_mdb_entry *mdst;
+ struct net_bridge_port_group *pg;
const unsigned char *src;
struct igmpv3_report *ih;
struct igmpv3_grec *grec;
- int i;
- int len;
- int num;
- int type;
- int err = 0;
+ int i, len, num, type;
+ bool changed = false;
__be32 group;
+ int err = 0;
u16 nsrcs;
ih = igmpv3_report_hdr(skb);
@@ -947,7 +2275,6 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
if (!ip_mc_may_pull(skb, len))
return -EINVAL;
- /* We treat this as an IGMPv2 report for now. */
switch (type) {
case IGMPV3_MODE_IS_INCLUDE:
case IGMPV3_MODE_IS_EXCLUDE:
@@ -962,16 +2289,62 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
}
src = eth_hdr(skb)->h_source;
- if ((type == IGMPV3_CHANGE_TO_INCLUDE ||
- type == IGMPV3_MODE_IS_INCLUDE) &&
- nsrcs == 0) {
- br_ip4_multicast_leave_group(br, port, group, vid, src);
+ if (nsrcs == 0 &&
+ (type == IGMPV3_CHANGE_TO_INCLUDE ||
+ type == IGMPV3_MODE_IS_INCLUDE)) {
+ if (!port || igmpv2) {
+ br_ip4_multicast_leave_group(br, port, group, vid, src);
+ continue;
+ }
} else {
err = br_ip4_multicast_add_group(br, port, group, vid,
- src);
+ src, igmpv2);
if (err)
break;
}
+
+ if (!port || igmpv2)
+ continue;
+
+ spin_lock_bh(&br->multicast_lock);
+ mdst = br_mdb_ip4_get(br, group, vid);
+ if (!mdst)
+ goto unlock_continue;
+ pg = br_multicast_find_port(mdst, port, src);
+ if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
+ goto unlock_continue;
+ /* reload grec */
+ grec = (void *)(skb->data + len - sizeof(*grec) - (nsrcs * 4));
+ switch (type) {
+ case IGMPV3_ALLOW_NEW_SOURCES:
+ changed = br_multicast_isinc_allow(pg, grec->grec_src,
+ nsrcs, sizeof(__be32));
+ break;
+ case IGMPV3_MODE_IS_INCLUDE:
+ changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs,
+ sizeof(__be32));
+ break;
+ case IGMPV3_MODE_IS_EXCLUDE:
+ changed = br_multicast_isexc(pg, grec->grec_src, nsrcs,
+ sizeof(__be32));
+ break;
+ case IGMPV3_CHANGE_TO_INCLUDE:
+ changed = br_multicast_toin(pg, grec->grec_src, nsrcs,
+ sizeof(__be32));
+ break;
+ case IGMPV3_CHANGE_TO_EXCLUDE:
+ changed = br_multicast_toex(pg, grec->grec_src, nsrcs,
+ sizeof(__be32));
+ break;
+ case IGMPV3_BLOCK_OLD_SOURCES:
+ changed = br_multicast_block(pg, grec->grec_src, nsrcs,
+ sizeof(__be32));
+ break;
+ }
+ if (changed)
+ br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+unlock_continue:
+ spin_unlock_bh(&br->multicast_lock);
}
return err;
@@ -983,14 +2356,16 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
struct sk_buff *skb,
u16 vid)
{
+ bool mldv1 = br->multicast_mld_version == 1;
+ struct net_bridge_mdb_entry *mdst;
+ struct net_bridge_port_group *pg;
unsigned int nsrcs_offset;
const unsigned char *src;
struct icmp6hdr *icmp6h;
struct mld2_grec *grec;
unsigned int grec_len;
- int i;
- int len;
- int num;
+ bool changed = false;
+ int i, len, num;
int err = 0;
if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h)))
@@ -1024,7 +2399,6 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
grec = (struct mld2_grec *)(skb->data + len);
len += grec_len;
- /* We treat these as MLDv1 reports for now. */
switch (grec->grec_type) {
case MLD2_MODE_IS_INCLUDE:
case MLD2_MODE_IS_EXCLUDE:
@@ -1042,15 +2416,61 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE ||
grec->grec_type == MLD2_MODE_IS_INCLUDE) &&
nsrcs == 0) {
- br_ip6_multicast_leave_group(br, port, &grec->grec_mca,
- vid, src);
+ if (!port || mldv1) {
+ br_ip6_multicast_leave_group(br, port,
+ &grec->grec_mca,
+ vid, src);
+ continue;
+ }
} else {
err = br_ip6_multicast_add_group(br, port,
&grec->grec_mca, vid,
- src);
+ src, mldv1);
if (err)
break;
}
+
+ if (!port || mldv1)
+ continue;
+
+ spin_lock_bh(&br->multicast_lock);
+ mdst = br_mdb_ip6_get(br, &grec->grec_mca, vid);
+ if (!mdst)
+ goto unlock_continue;
+ pg = br_multicast_find_port(mdst, port, src);
+ if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
+ goto unlock_continue;
+ switch (grec->grec_type) {
+ case MLD2_ALLOW_NEW_SOURCES:
+ changed = br_multicast_isinc_allow(pg, grec->grec_src,
+ nsrcs,
+ sizeof(struct in6_addr));
+ break;
+ case MLD2_MODE_IS_INCLUDE:
+ changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs,
+ sizeof(struct in6_addr));
+ break;
+ case MLD2_MODE_IS_EXCLUDE:
+ changed = br_multicast_isexc(pg, grec->grec_src, nsrcs,
+ sizeof(struct in6_addr));
+ break;
+ case MLD2_CHANGE_TO_INCLUDE:
+ changed = br_multicast_toin(pg, grec->grec_src, nsrcs,
+ sizeof(struct in6_addr));
+ break;
+ case MLD2_CHANGE_TO_EXCLUDE:
+ changed = br_multicast_toex(pg, grec->grec_src, nsrcs,
+ sizeof(struct in6_addr));
+ break;
+ case MLD2_BLOCK_OLD_SOURCES:
+ changed = br_multicast_block(pg, grec->grec_src, nsrcs,
+ sizeof(struct in6_addr));
+ break;
+ }
+ if (changed)
+ br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+unlock_continue:
+ spin_unlock_bh(&br->multicast_lock);
}
return err;
@@ -1065,16 +2485,16 @@ static bool br_ip4_multicast_select_querier(struct net_bridge *br,
!timer_pending(&br->ip4_other_query.timer))
goto update;
- if (!br->ip4_querier.addr.u.ip4)
+ if (!br->ip4_querier.addr.src.ip4)
goto update;
- if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.u.ip4))
+ if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.src.ip4))
goto update;
return false;
update:
- br->ip4_querier.addr.u.ip4 = saddr;
+ br->ip4_querier.addr.src.ip4 = saddr;
/* update protected by general multicast_lock by caller */
rcu_assign_pointer(br->ip4_querier.port, port);
@@ -1091,13 +2511,13 @@ static bool br_ip6_multicast_select_querier(struct net_bridge *br,
!timer_pending(&br->ip6_other_query.timer))
goto update;
- if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.u.ip6) <= 0)
+ if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.src.ip6) <= 0)
goto update;
return false;
update:
- br->ip6_querier.addr.u.ip6 = *saddr;
+ br->ip6_querier.addr.src.ip6 = *saddr;
/* update protected by general multicast_lock by caller */
rcu_assign_pointer(br->ip6_querier.port, port);
@@ -1112,10 +2532,10 @@ static bool br_multicast_select_querier(struct net_bridge *br,
{
switch (saddr->proto) {
case htons(ETH_P_IP):
- return br_ip4_multicast_select_querier(br, port, saddr->u.ip4);
+ return br_ip4_multicast_select_querier(br, port, saddr->src.ip4);
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6);
+ return br_ip6_multicast_select_querier(br, port, &saddr->src.ip6);
#endif
}
@@ -1245,7 +2665,8 @@ static void br_ip4_multicast_query(struct net_bridge *br,
}
} else if (transport_len >= sizeof(*ih3)) {
ih3 = igmpv3_query_hdr(skb);
- if (ih3->nsrcs)
+ if (ih3->nsrcs ||
+ (br->multicast_igmp_version == 3 && group && ih3->suppress))
goto out;
max_delay = ih3->code ?
@@ -1256,7 +2677,7 @@ static void br_ip4_multicast_query(struct net_bridge *br,
if (!group) {
saddr.proto = htons(ETH_P_IP);
- saddr.u.ip4 = iph->saddr;
+ saddr.src.ip4 = iph->saddr;
br_multicast_query_received(br, port, &br->ip4_other_query,
&saddr, max_delay);
@@ -1280,7 +2701,9 @@ static void br_ip4_multicast_query(struct net_bridge *br,
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
- try_to_del_timer_sync(&p->timer) >= 0)
+ try_to_del_timer_sync(&p->timer) >= 0 &&
+ (br->multicast_igmp_version == 2 ||
+ p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
}
@@ -1330,6 +2753,10 @@ static int br_ip6_multicast_query(struct net_bridge *br,
mld2q = (struct mld2_query *)icmp6_hdr(skb);
if (!mld2q->mld2q_nsrcs)
group = &mld2q->mld2q_mca;
+ if (br->multicast_mld_version == 2 &&
+ !ipv6_addr_any(&mld2q->mld2q_mca) &&
+ mld2q->mld2q_suppress)
+ goto out;
max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL);
}
@@ -1338,7 +2765,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (is_general_query) {
saddr.proto = htons(ETH_P_IPV6);
- saddr.u.ip6 = ipv6_hdr(skb)->saddr;
+ saddr.src.ip6 = ipv6_hdr(skb)->saddr;
br_multicast_query_received(br, port, &br->ip6_other_query,
&saddr, max_delay);
@@ -1363,7 +2790,9 @@ static int br_ip6_multicast_query(struct net_bridge *br,
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
- try_to_del_timer_sync(&p->timer) >= 0)
+ try_to_del_timer_sync(&p->timer) >= 0 &&
+ (br->multicast_mld_version == 1 ||
+ p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
}
@@ -1407,16 +2836,8 @@ br_multicast_leave_group(struct net_bridge *br,
if (p->flags & MDB_PG_FLAGS_PERMANENT)
break;
- rcu_assign_pointer(*pp, p->next);
- hlist_del_init(&p->mglist);
- del_timer(&p->timer);
- kfree_rcu(p, rcu);
- br_mdb_notify(br->dev, port, group, RTM_DELMDB,
- p->flags | MDB_PG_FLAGS_FAST_LEAVE);
-
- if (!mp->ports && !mp->host_joined &&
- netif_running(br->dev))
- mod_timer(&mp->timer, jiffies);
+ p->flags |= MDB_PG_FLAGS_FAST_LEAVE;
+ br_multicast_del_pg(mp, p, pp);
}
goto out;
}
@@ -1425,7 +2846,8 @@ br_multicast_leave_group(struct net_bridge *br,
goto out;
if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
- __br_multicast_send_query(br, port, &mp->addr);
+ __br_multicast_send_query(br, port, NULL, NULL, &mp->addr,
+ false, 0, NULL);
time = jiffies + br->multicast_last_member_count *
br->multicast_last_member_interval;
@@ -1467,7 +2889,7 @@ br_multicast_leave_group(struct net_bridge *br,
for (p = mlock_dereference(mp->ports, br);
p != NULL;
p = mlock_dereference(p->next, br)) {
- if (p->port != port)
+ if (p->key.port != port)
continue;
if (!hlist_unhashed(&p->mglist) &&
@@ -1498,7 +2920,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
memset(&br_group, 0, sizeof(br_group));
- br_group.u.ip4 = group;
+ br_group.dst.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
@@ -1522,7 +2944,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
memset(&br_group, 0, sizeof(br_group));
- br_group.u.ip6 = *group;
+ br_group.dst.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
@@ -1627,7 +3049,8 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
case IGMP_HOST_MEMBERSHIP_REPORT:
case IGMPV2_HOST_MEMBERSHIP_REPORT:
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
- err = br_ip4_multicast_add_group(br, port, ih->group, vid, src);
+ err = br_ip4_multicast_add_group(br, port, ih->group, vid, src,
+ true);
break;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
err = br_ip4_multicast_igmp3_report(br, port, skb, vid);
@@ -1706,7 +3129,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
src = eth_hdr(skb)->h_source;
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid,
- src);
+ src, true);
break;
case ICMPV6_MLD2_REPORT:
err = br_ip6_multicast_mld2_report(br, port, skb, vid);
@@ -1781,6 +3204,19 @@ static void br_ip6_multicast_query_expired(struct timer_list *t)
}
#endif
+static void br_multicast_gc_work(struct work_struct *work)
+{
+ struct net_bridge *br = container_of(work, struct net_bridge,
+ mcast_gc_work);
+ HLIST_HEAD(deleted_head);
+
+ spin_lock_bh(&br->multicast_lock);
+ hlist_move_list(&br->mcast_gc_list, &deleted_head);
+ spin_unlock_bh(&br->multicast_lock);
+
+ br_multicast_gc(&deleted_head);
+}
+
void br_multicast_init(struct net_bridge *br)
{
br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
@@ -1821,6 +3257,8 @@ void br_multicast_init(struct net_bridge *br)
br_ip6_multicast_query_expired, 0);
#endif
INIT_HLIST_HEAD(&br->mdb_list);
+ INIT_HLIST_HEAD(&br->mcast_gc_list);
+ INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work);
}
static void br_ip4_multicast_join_snoopers(struct net_bridge *br)
@@ -1924,18 +3362,18 @@ void br_multicast_stop(struct net_bridge *br)
void br_multicast_dev_del(struct net_bridge *br)
{
struct net_bridge_mdb_entry *mp;
+ HLIST_HEAD(deleted_head);
struct hlist_node *tmp;
spin_lock_bh(&br->multicast_lock);
- hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node) {
- del_timer(&mp->timer);
- rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
- br_mdb_rht_params);
- hlist_del_rcu(&mp->mdb_node);
- kfree_rcu(mp, rcu);
- }
+ hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node)
+ br_multicast_del_mdb_entry(mp);
+ hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
+ br_multicast_gc(&deleted_head);
+ cancel_work_sync(&br->mcast_gc_work);
+
rcu_barrier();
}
@@ -2211,7 +3649,7 @@ int br_multicast_list_adjacent(struct net_device *dev,
if (!entry)
goto unlock;
- entry->addr = group->addr;
+ entry->addr = group->key.addr;
list_add(&entry->list, br_ip_list);
count++;
}
@@ -2468,10 +3906,23 @@ void br_multicast_get_stats(const struct net_bridge *br,
int br_mdb_hash_init(struct net_bridge *br)
{
- return rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params);
+ int err;
+
+ err = rhashtable_init(&br->sg_port_tbl, &br_sg_port_rht_params);
+ if (err)
+ return err;
+
+ err = rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params);
+ if (err) {
+ rhashtable_destroy(&br->sg_port_tbl);
+ return err;
+ }
+
+ return 0;
}
void br_mdb_hash_fini(struct net_bridge *br)
{
+ rhashtable_destroy(&br->sg_port_tbl);
rhashtable_destroy(&br->mdb_hash_tbl);
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 147d52596e17..92d64abffa87 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -380,6 +380,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
u32 filter_mask, const struct net_device *dev)
{
u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
+ struct nlattr *af = NULL;
struct net_bridge *br;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
@@ -423,11 +424,18 @@ static int br_fill_ifinfo(struct sk_buff *skb,
nla_nest_end(skb, nest);
}
+ if (filter_mask & (RTEXT_FILTER_BRVLAN |
+ RTEXT_FILTER_BRVLAN_COMPRESSED |
+ RTEXT_FILTER_MRP)) {
+ af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
+ if (!af)
+ goto nla_put_failure;
+ }
+
/* Check if the VID information is requested */
if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
struct net_bridge_vlan_group *vg;
- struct nlattr *af;
int err;
/* RCU needed because of the VLAN locking rules (rcu || rtnl) */
@@ -441,11 +449,6 @@ static int br_fill_ifinfo(struct sk_buff *skb,
rcu_read_unlock();
goto done;
}
- af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
- if (!af) {
- rcu_read_unlock();
- goto nla_put_failure;
- }
if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
err = br_fill_ifvlaninfo_compressed(skb, vg);
else
@@ -456,32 +459,25 @@ static int br_fill_ifinfo(struct sk_buff *skb,
rcu_read_unlock();
if (err)
goto nla_put_failure;
-
- nla_nest_end(skb, af);
}
if (filter_mask & RTEXT_FILTER_MRP) {
- struct nlattr *af;
int err;
if (!br_mrp_enabled(br) || port)
goto done;
- af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
- if (!af)
- goto nla_put_failure;
-
rcu_read_lock();
err = br_mrp_fill_info(skb, br);
rcu_read_unlock();
if (err)
goto nla_put_failure;
-
- nla_nest_end(skb, af);
}
done:
+ if (af)
+ nla_nest_end(skb, af);
nlmsg_end(skb, nlh);
return 0;
@@ -1095,8 +1091,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
[IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
[IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 },
- [IFLA_BR_MULTI_BOOLOPT] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct br_boolopt_multi) },
+ [IFLA_BR_MULTI_BOOLOPT] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)),
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index baa1500f384f..345118e35c42 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -213,27 +213,71 @@ struct net_bridge_fdb_entry {
#define MDB_PG_FLAGS_PERMANENT BIT(0)
#define MDB_PG_FLAGS_OFFLOAD BIT(1)
#define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
+#define MDB_PG_FLAGS_STAR_EXCL BIT(3)
+#define MDB_PG_FLAGS_BLOCKED BIT(4)
-struct net_bridge_port_group {
- struct net_bridge_port *port;
- struct net_bridge_port_group __rcu *next;
- struct hlist_node mglist;
- struct rcu_head rcu;
+#define PG_SRC_ENT_LIMIT 32
+
+#define BR_SGRP_F_DELETE BIT(0)
+#define BR_SGRP_F_SEND BIT(1)
+#define BR_SGRP_F_INSTALLED BIT(2)
+
+struct net_bridge_mcast_gc {
+ struct hlist_node gc_node;
+ void (*destroy)(struct net_bridge_mcast_gc *gc);
+};
+
+struct net_bridge_group_src {
+ struct hlist_node node;
+
+ struct br_ip addr;
+ struct net_bridge_port_group *pg;
+ u8 flags;
+ u8 src_query_rexmit_cnt;
struct timer_list timer;
+
+ struct net_bridge *br;
+ struct net_bridge_mcast_gc mcast_gc;
+ struct rcu_head rcu;
+};
+
+struct net_bridge_port_group_sg_key {
+ struct net_bridge_port *port;
struct br_ip addr;
+};
+
+struct net_bridge_port_group {
+ struct net_bridge_port_group __rcu *next;
+ struct net_bridge_port_group_sg_key key;
unsigned char eth_addr[ETH_ALEN] __aligned(2);
unsigned char flags;
+ unsigned char filter_mode;
+ unsigned char grp_query_rexmit_cnt;
+ unsigned char rt_protocol;
+
+ struct hlist_head src_list;
+ unsigned int src_ents;
+ struct timer_list timer;
+ struct timer_list rexmit_timer;
+ struct hlist_node mglist;
+
+ struct rhash_head rhnode;
+ struct net_bridge_mcast_gc mcast_gc;
+ struct rcu_head rcu;
};
struct net_bridge_mdb_entry {
struct rhash_head rhnode;
struct net_bridge *br;
struct net_bridge_port_group __rcu *ports;
- struct rcu_head rcu;
- struct timer_list timer;
struct br_ip addr;
bool host_joined;
+
+ struct timer_list timer;
struct hlist_node mdb_node;
+
+ struct net_bridge_mcast_gc mcast_gc;
+ struct rcu_head rcu;
};
struct net_bridge_port {
@@ -405,7 +449,9 @@ struct net_bridge {
unsigned long multicast_startup_query_interval;
struct rhashtable mdb_hash_tbl;
+ struct rhashtable sg_port_tbl;
+ struct hlist_head mcast_gc_list;
struct hlist_head mdb_list;
struct hlist_head router_list;
@@ -419,6 +465,7 @@ struct net_bridge {
struct bridge_mcast_own_query ip6_own_query;
struct bridge_mcast_querier ip6_querier;
#endif /* IS_ENABLED(CONFIG_IPV6) */
+ struct work_struct mcast_gc_work;
#endif
struct timer_list hello_timer;
@@ -766,13 +813,17 @@ br_multicast_new_group(struct net_bridge *br, struct br_ip *group);
struct net_bridge_port_group *
br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
struct net_bridge_port_group __rcu *next,
- unsigned char flags, const unsigned char *src);
+ unsigned char flags, const unsigned char *src,
+ u8 filter_mode, u8 rt_protocol);
int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
-void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type, u8 flags);
+void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg, int type);
void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
int type);
+void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ struct net_bridge_port_group __rcu **pp);
void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
const struct sk_buff *skb, u8 type, u8 dir);
int br_multicast_init_stats(struct net_bridge *br);
@@ -784,6 +835,10 @@ void br_mdb_init(void);
void br_mdb_uninit(void);
void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
+ u8 filter_mode);
+void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
+ struct net_bridge_port_group *sg);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -832,10 +887,52 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
}
}
+static inline bool br_multicast_is_star_g(const struct br_ip *ip)
+{
+ switch (ip->proto) {
+ case htons(ETH_P_IP):
+ return ipv4_is_zeronet(ip->src.ip4);
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ return ipv6_addr_any(&ip->src.ip6);
+#endif
+ default:
+ return false;
+ }
+}
+
+static inline bool br_multicast_should_handle_mode(const struct net_bridge *br,
+ __be16 proto)
+{
+ switch (proto) {
+ case htons(ETH_P_IP):
+ return !!(br->multicast_igmp_version == 3);
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ return !!(br->multicast_mld_version == 2);
+#endif
+ default:
+ return false;
+ }
+}
+
static inline int br_multicast_igmp_type(const struct sk_buff *skb)
{
return BR_INPUT_SKB_CB(skb)->igmp;
}
+
+static inline unsigned long br_multicast_lmqt(const struct net_bridge *br)
+{
+ return br->multicast_last_member_interval *
+ br->multicast_last_member_count;
+}
+
+static inline unsigned long br_multicast_gmi(const struct net_bridge *br)
+{
+ /* use the RFC default of 2 for QRV */
+ return 2 * br->multicast_query_interval +
+ br->multicast_query_response_interval;
+}
#else
static inline int br_multicast_rcv(struct net_bridge *br,
struct net_bridge_port *port,
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index f9092c71225f..3e493eb85bb2 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -140,7 +140,7 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
return err == -EOPNOTSUPP ? 0 : err;
}
-/* Returns a master vlan, if it didn't exist it gets created. In all cases a
+/* Returns a master vlan, if it didn't exist it gets created. In all cases
* a reference is taken to the master vlan before returning.
*/
static struct net_bridge_vlan *
@@ -1288,11 +1288,13 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v,
}
}
-static int __br_vlan_get_pvid(const struct net_device *dev,
- struct net_bridge_port *p, u16 *p_pvid)
+int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
{
struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p;
+ ASSERT_RTNL();
+ p = br_port_get_check_rtnl(dev);
if (p)
vg = nbp_vlan_group(p);
else if (netif_is_bridge_master(dev))
@@ -1303,18 +1305,23 @@ static int __br_vlan_get_pvid(const struct net_device *dev,
*p_pvid = br_get_pvid(vg);
return 0;
}
-
-int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
-{
- ASSERT_RTNL();
-
- return __br_vlan_get_pvid(dev, br_port_get_check_rtnl(dev), p_pvid);
-}
EXPORT_SYMBOL_GPL(br_vlan_get_pvid);
int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
{
- return __br_vlan_get_pvid(dev, br_port_get_check_rcu(dev), p_pvid);
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p;
+
+ p = br_port_get_check_rcu(dev);
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
+ else if (netif_is_bridge_master(dev))
+ vg = br_vlan_group_rcu(netdev_priv(dev));
+ else
+ return -EINVAL;
+
+ *p_pvid = br_get_pvid(vg);
+ return 0;
}
EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
@@ -1353,7 +1360,7 @@ static int br_vlan_is_bind_vlan_dev(const struct net_device *dev)
}
static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev,
- __always_unused void *data)
+ __always_unused struct netdev_nested_priv *priv)
{
return br_vlan_is_bind_vlan_dev(dev);
}
@@ -1376,9 +1383,9 @@ struct br_vlan_bind_walk_data {
};
static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev,
- void *data_in)
+ struct netdev_nested_priv *priv)
{
- struct br_vlan_bind_walk_data *data = data_in;
+ struct br_vlan_bind_walk_data *data = priv->data;
int found = 0;
if (br_vlan_is_bind_vlan_dev(dev) &&
@@ -1396,10 +1403,13 @@ br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid)
struct br_vlan_bind_walk_data data = {
.vid = vid,
};
+ struct netdev_nested_priv priv = {
+ .data = (void *)&data,
+ };
rcu_read_lock();
netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn,
- &data);
+ &priv);
rcu_read_unlock();
return data.result;
@@ -1480,9 +1490,9 @@ struct br_vlan_link_state_walk_data {
};
static int br_vlan_link_state_change_fn(struct net_device *vlan_dev,
- void *data_in)
+ struct netdev_nested_priv *priv)
{
- struct br_vlan_link_state_walk_data *data = data_in;
+ struct br_vlan_link_state_walk_data *data = priv->data;
if (br_vlan_is_bind_vlan_dev(vlan_dev))
br_vlan_set_vlan_dev_state(data->br, vlan_dev);
@@ -1496,10 +1506,13 @@ static void br_vlan_link_state_change(struct net_device *dev,
struct br_vlan_link_state_walk_data data = {
.br = br
};
+ struct netdev_nested_priv priv = {
+ .data = (void *)&data,
+ };
rcu_read_lock();
netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn,
- &data);
+ &priv);
rcu_read_unlock();
}
@@ -1884,8 +1897,8 @@ out_err:
}
static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] = {
- [BRIDGE_VLANDB_ENTRY_INFO] = { .type = NLA_EXACT_LEN,
- .len = sizeof(struct bridge_vlan_info) },
+ [BRIDGE_VLANDB_ENTRY_INFO] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct bridge_vlan_info)),
[BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 },
[BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 },
[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED },
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 0d6d20c9105e..8f68afda5f81 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -15,7 +15,6 @@
#include <linux/netfilter_bridge/ebt_stp.h>
#define BPDU_TYPE_CONFIG 0
-#define BPDU_TYPE_TCN 0x80
struct stp_header {
u8 dsap;
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index d0a4d0ac7045..9cef9496a707 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -21,7 +21,6 @@
#define SRVL_FLOW_OFF 0x81
#define SRVL_FLOW_ON 0x80
#define SRVL_SET_PIN 0x82
-#define SRVL_CTRL_PKT_SIZE 1
#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 25436a715db3..224e5e0283a9 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -55,6 +55,20 @@ config CAN_GW
source "net/can/j1939/Kconfig"
+config CAN_ISOTP
+ tristate "ISO 15765-2:2016 CAN transport protocol"
+ help
+ CAN Transport Protocols offer support for segmented Point-to-Point
+ communication between CAN nodes via two defined CAN Identifiers.
+ As CAN frames can only transport a small amount of data bytes
+ (max. 8 bytes for 'classic' CAN and max. 64 bytes for CAN FD) this
+ segmentation is needed to transport longer PDUs as needed e.g. for
+ vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN traffic.
+ This protocol driver implements data transfers according to
+ ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types.
+ If you want to perform automotive vehicle diagnostic services (UDS),
+ say 'y'.
+
source "drivers/net/can/Kconfig"
endif
diff --git a/net/can/Makefile b/net/can/Makefile
index 08bd217fc051..58f2c31c1ef3 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_CAN_GW) += can-gw.o
can-gw-y := gw.o
obj-$(CONFIG_CAN_J1939) += j1939/
+
+obj-$(CONFIG_CAN_ISOTP) += can-isotp.o
+can-isotp-y := isotp.o
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 5c06404bdf3e..6373ab9c5507 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -338,7 +338,7 @@ static unsigned int effhash(canid_t can_id)
* can_rcv_list_find - determine optimal filterlist inside device filter struct
* @can_id: pointer to CAN identifier of a given can_filter
* @mask: pointer to CAN mask of a given can_filter
- * @d: pointer to the device filter struct
+ * @dev_rcv_lists: pointer to the device filter struct
*
* Description:
* Returns the optimal filterlist to reduce the filter handling in the
@@ -358,7 +358,7 @@ static unsigned int effhash(canid_t can_id)
*
* Return:
* Pointer to optimal filterlist for the given can_id/mask pair.
- * Constistency checked mask.
+ * Consistency checked mask.
* Reduced can_id to have a preprocessed filter compare value.
*/
static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask,
@@ -411,7 +411,7 @@ static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask,
/**
* can_rx_register - subscribe CAN frames from a specific interface
* @net: the applicable net namespace
- * @dev: pointer to netdevice (NULL => subcribe from 'all' CAN devices list)
+ * @dev: pointer to netdevice (NULL => subscribe from 'all' CAN devices list)
* @can_id: CAN identifier (see description)
* @mask: CAN mask (see description)
* @func: callback function on filter match
@@ -875,7 +875,7 @@ static __init int can_init(void)
offsetof(struct can_frame, data) !=
offsetof(struct canfd_frame, data));
- pr_info("can: controller area network core (" CAN_VERSION_STRING ")\n");
+ pr_info("can: controller area network core\n");
rcv_cache = kmem_cache_create("can_receiver", sizeof(struct receiver),
0, 0, NULL);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index d14ea12affb1..0e5c37be4a2b 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
*
@@ -81,8 +81,6 @@
(CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
(CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
-#define CAN_BCM_VERSION "20170425"
-
MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>");
@@ -1696,7 +1694,7 @@ static int __init bcm_module_init(void)
{
int err;
- pr_info("can: broadcast manager protocol (rev " CAN_BCM_VERSION " t)\n");
+ pr_info("can: broadcast manager protocol\n");
err = can_proto_register(&bcm_can_proto);
if (err < 0) {
diff --git a/net/can/gw.c b/net/can/gw.c
index 65d60c93af29..6b790b6ff8d2 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/* gw.c - CAN frame Gateway/Router/Bridge with netlink interface
*
* Copyright (c) 2019 Volkswagen Group Electronic Research
@@ -59,7 +59,6 @@
#include <net/net_namespace.h>
#include <net/sock.h>
-#define CAN_GW_VERSION "20190810"
#define CAN_GW_NAME "can-gw"
MODULE_DESCRIPTION("PF_CAN netlink gateway");
@@ -1194,8 +1193,7 @@ static __init int cgw_module_init(void)
/* sanitize given module parameter */
max_hops = clamp_t(unsigned int, max_hops, CGW_MIN_HOPS, CGW_MAX_HOPS);
- pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
- max_hops);
+ pr_info("can: netlink gateway - max_hops=%d\n", max_hops);
ret = register_pernet_subsys(&cangw_pernet_ops);
if (ret)
diff --git a/net/can/isotp.c b/net/can/isotp.c
new file mode 100644
index 000000000000..4c2062875893
--- /dev/null
+++ b/net/can/isotp.c
@@ -0,0 +1,1424 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/* isotp.c - ISO 15765-2 CAN transport protocol for protocol family CAN
+ *
+ * This implementation does not provide ISO-TP specific return values to the
+ * userspace.
+ *
+ * - RX path timeout of data reception leads to -ETIMEDOUT
+ * - RX path SN mismatch leads to -EILSEQ
+ * - RX path data reception with wrong padding leads to -EBADMSG
+ * - TX path flowcontrol reception timeout leads to -ECOMM
+ * - TX path flowcontrol reception overflow leads to -EMSGSIZE
+ * - TX path flowcontrol reception with wrong layout/padding leads to -EBADMSG
+ * - when a transfer (tx) is on the run the next write() blocks until it's done
+ * - use CAN_ISOTP_WAIT_TX_DONE flag to block the caller until the PDU is sent
+ * - as we have static buffers the check whether the PDU fits into the buffer
+ * is done at FF reception time (no support for sending 'wait frames')
+ * - take care of the tx-queue-len as traffic shaping is still on the TODO list
+ *
+ * Copyright (c) 2020 Volkswagen Group Electronic Research
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/hrtimer.h>
+#include <linux/wait.h>
+#include <linux/uio.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/can.h>
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/can/isotp.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/net_namespace.h>
+
+MODULE_DESCRIPTION("PF_CAN isotp 15765-2:2016 protocol");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Oliver Hartkopp <socketcan@hartkopp.net>");
+MODULE_ALIAS("can-proto-6");
+
+#define SINGLE_MASK(id) (((id) & CAN_EFF_FLAG) ? \
+ (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
+ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
+
+/* ISO 15765-2:2016 supports more than 4095 byte per ISO PDU as the FF_DL can
+ * take full 32 bit values (4 Gbyte). We would need some good concept to handle
+ * this between user space and kernel space. For now increase the static buffer
+ * to something about 8 kbyte to be able to test this new functionality.
+ */
+#define MAX_MSG_LENGTH 8200
+
+/* N_PCI type values in bits 7-4 of N_PCI bytes */
+#define N_PCI_SF 0x00 /* single frame */
+#define N_PCI_FF 0x10 /* first frame */
+#define N_PCI_CF 0x20 /* consecutive frame */
+#define N_PCI_FC 0x30 /* flow control */
+
+#define N_PCI_SZ 1 /* size of the PCI byte #1 */
+#define SF_PCI_SZ4 1 /* size of SingleFrame PCI including 4 bit SF_DL */
+#define SF_PCI_SZ8 2 /* size of SingleFrame PCI including 8 bit SF_DL */
+#define FF_PCI_SZ12 2 /* size of FirstFrame PCI including 12 bit FF_DL */
+#define FF_PCI_SZ32 6 /* size of FirstFrame PCI including 32 bit FF_DL */
+#define FC_CONTENT_SZ 3 /* flow control content size in byte (FS/BS/STmin) */
+
+#define ISOTP_CHECK_PADDING (CAN_ISOTP_CHK_PAD_LEN | CAN_ISOTP_CHK_PAD_DATA)
+
+/* Flow Status given in FC frame */
+#define ISOTP_FC_CTS 0 /* clear to send */
+#define ISOTP_FC_WT 1 /* wait */
+#define ISOTP_FC_OVFLW 2 /* overflow */
+
+enum {
+ ISOTP_IDLE = 0,
+ ISOTP_WAIT_FIRST_FC,
+ ISOTP_WAIT_FC,
+ ISOTP_WAIT_DATA,
+ ISOTP_SENDING
+};
+
+struct tpcon {
+ int idx;
+ int len;
+ u8 state;
+ u8 bs;
+ u8 sn;
+ u8 ll_dl;
+ u8 buf[MAX_MSG_LENGTH + 1];
+};
+
+struct isotp_sock {
+ struct sock sk;
+ int bound;
+ int ifindex;
+ canid_t txid;
+ canid_t rxid;
+ ktime_t tx_gap;
+ ktime_t lastrxcf_tstamp;
+ struct hrtimer rxtimer, txtimer;
+ struct can_isotp_options opt;
+ struct can_isotp_fc_options rxfc, txfc;
+ struct can_isotp_ll_options ll;
+ u32 force_tx_stmin;
+ u32 force_rx_stmin;
+ struct tpcon rx, tx;
+ struct notifier_block notifier;
+ wait_queue_head_t wait;
+};
+
+static inline struct isotp_sock *isotp_sk(const struct sock *sk)
+{
+ return (struct isotp_sock *)sk;
+}
+
+static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
+{
+ struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
+ rxtimer);
+ struct sock *sk = &so->sk;
+
+ if (so->rx.state == ISOTP_WAIT_DATA) {
+ /* we did not get new data frames in time */
+
+ /* report 'connection timed out' */
+ sk->sk_err = ETIMEDOUT;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+
+ /* reset rx state */
+ so->rx.state = ISOTP_IDLE;
+ }
+
+ return HRTIMER_NORESTART;
+}
+
+static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus)
+{
+ struct net_device *dev;
+ struct sk_buff *nskb;
+ struct canfd_frame *ncf;
+ struct isotp_sock *so = isotp_sk(sk);
+ int can_send_ret;
+
+ nskb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv), gfp_any());
+ if (!nskb)
+ return 1;
+
+ dev = dev_get_by_index(sock_net(sk), so->ifindex);
+ if (!dev) {
+ kfree_skb(nskb);
+ return 1;
+ }
+
+ can_skb_reserve(nskb);
+ can_skb_prv(nskb)->ifindex = dev->ifindex;
+ can_skb_prv(nskb)->skbcnt = 0;
+
+ nskb->dev = dev;
+ can_skb_set_owner(nskb, sk);
+ ncf = (struct canfd_frame *)nskb->data;
+ skb_put(nskb, so->ll.mtu);
+
+ /* create & send flow control reply */
+ ncf->can_id = so->txid;
+
+ if (so->opt.flags & CAN_ISOTP_TX_PADDING) {
+ memset(ncf->data, so->opt.txpad_content, CAN_MAX_DLEN);
+ ncf->len = CAN_MAX_DLEN;
+ } else {
+ ncf->len = ae + FC_CONTENT_SZ;
+ }
+
+ ncf->data[ae] = N_PCI_FC | flowstatus;
+ ncf->data[ae + 1] = so->rxfc.bs;
+ ncf->data[ae + 2] = so->rxfc.stmin;
+
+ if (ae)
+ ncf->data[0] = so->opt.ext_address;
+
+ if (so->ll.mtu == CANFD_MTU)
+ ncf->flags = so->ll.tx_flags;
+
+ can_send_ret = can_send(nskb, 1);
+ if (can_send_ret)
+ pr_notice_once("can-isotp: %s: can_send_ret %d\n",
+ __func__, can_send_ret);
+
+ dev_put(dev);
+
+ /* reset blocksize counter */
+ so->rx.bs = 0;
+
+ /* reset last CF frame rx timestamp for rx stmin enforcement */
+ so->lastrxcf_tstamp = ktime_set(0, 0);
+
+ /* start rx timeout watchdog */
+ hrtimer_start(&so->rxtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+ return 0;
+}
+
+static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk)
+{
+ struct sockaddr_can *addr = (struct sockaddr_can *)skb->cb;
+
+ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
+
+ memset(addr, 0, sizeof(*addr));
+ addr->can_family = AF_CAN;
+ addr->can_ifindex = skb->dev->ifindex;
+
+ if (sock_queue_rcv_skb(sk, skb) < 0)
+ kfree_skb(skb);
+}
+
+static u8 padlen(u8 datalen)
+{
+ const u8 plen[] = {8, 8, 8, 8, 8, 8, 8, 8, 8, /* 0 - 8 */
+ 12, 12, 12, 12, /* 9 - 12 */
+ 16, 16, 16, 16, /* 13 - 16 */
+ 20, 20, 20, 20, /* 17 - 20 */
+ 24, 24, 24, 24, /* 21 - 24 */
+ 32, 32, 32, 32, 32, 32, 32, 32, /* 25 - 32 */
+ 48, 48, 48, 48, 48, 48, 48, 48, /* 33 - 40 */
+ 48, 48, 48, 48, 48, 48, 48, 48}; /* 41 - 48 */
+
+ if (datalen > 48)
+ return 64;
+
+ return plen[datalen];
+}
+
+/* check for length optimization and return 1/true when the check fails */
+static int check_optimized(struct canfd_frame *cf, int start_index)
+{
+ /* for CAN_DL <= 8 the start_index is equal to the CAN_DL as the
+ * padding would start at this point. E.g. if the padding would
+ * start at cf.data[7] cf->len has to be 7 to be optimal.
+ * Note: The data[] index starts with zero.
+ */
+ if (cf->len <= CAN_MAX_DLEN)
+ return (cf->len != start_index);
+
+ /* This relation is also valid in the non-linear DLC range, where
+ * we need to take care of the minimal next possible CAN_DL.
+ * The correct check would be (padlen(cf->len) != padlen(start_index)).
+ * But as cf->len can only take discrete values from 12, .., 64 at this
+ * point the padlen(cf->len) is always equal to cf->len.
+ */
+ return (cf->len != padlen(start_index));
+}
+
+/* check padding and return 1/true when the check fails */
+static int check_pad(struct isotp_sock *so, struct canfd_frame *cf,
+ int start_index, u8 content)
+{
+ int i;
+
+ /* no RX_PADDING value => check length of optimized frame length */
+ if (!(so->opt.flags & CAN_ISOTP_RX_PADDING)) {
+ if (so->opt.flags & CAN_ISOTP_CHK_PAD_LEN)
+ return check_optimized(cf, start_index);
+
+ /* no valid test against empty value => ignore frame */
+ return 1;
+ }
+
+ /* check datalength of correctly padded CAN frame */
+ if ((so->opt.flags & CAN_ISOTP_CHK_PAD_LEN) &&
+ cf->len != padlen(cf->len))
+ return 1;
+
+ /* check padding content */
+ if (so->opt.flags & CAN_ISOTP_CHK_PAD_DATA) {
+ for (i = start_index; i < cf->len; i++)
+ if (cf->data[i] != content)
+ return 1;
+ }
+ return 0;
+}
+
+static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
+{
+ struct sock *sk = &so->sk;
+
+ if (so->tx.state != ISOTP_WAIT_FC &&
+ so->tx.state != ISOTP_WAIT_FIRST_FC)
+ return 0;
+
+ hrtimer_cancel(&so->txtimer);
+
+ if ((cf->len < ae + FC_CONTENT_SZ) ||
+ ((so->opt.flags & ISOTP_CHECK_PADDING) &&
+ check_pad(so, cf, ae + FC_CONTENT_SZ, so->opt.rxpad_content))) {
+ /* malformed PDU - report 'not a data message' */
+ sk->sk_err = EBADMSG;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
+ return 1;
+ }
+
+ /* get communication parameters only from the first FC frame */
+ if (so->tx.state == ISOTP_WAIT_FIRST_FC) {
+ so->txfc.bs = cf->data[ae + 1];
+ so->txfc.stmin = cf->data[ae + 2];
+
+ /* fix wrong STmin values according spec */
+ if (so->txfc.stmin > 0x7F &&
+ (so->txfc.stmin < 0xF1 || so->txfc.stmin > 0xF9))
+ so->txfc.stmin = 0x7F;
+
+ so->tx_gap = ktime_set(0, 0);
+ /* add transmission time for CAN frame N_As */
+ so->tx_gap = ktime_add_ns(so->tx_gap, so->opt.frame_txtime);
+ /* add waiting time for consecutive frames N_Cs */
+ if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
+ so->tx_gap = ktime_add_ns(so->tx_gap,
+ so->force_tx_stmin);
+ else if (so->txfc.stmin < 0x80)
+ so->tx_gap = ktime_add_ns(so->tx_gap,
+ so->txfc.stmin * 1000000);
+ else
+ so->tx_gap = ktime_add_ns(so->tx_gap,
+ (so->txfc.stmin - 0xF0)
+ * 100000);
+ so->tx.state = ISOTP_WAIT_FC;
+ }
+
+ switch (cf->data[ae] & 0x0F) {
+ case ISOTP_FC_CTS:
+ so->tx.bs = 0;
+ so->tx.state = ISOTP_SENDING;
+ /* start cyclic timer for sending CF frame */
+ hrtimer_start(&so->txtimer, so->tx_gap,
+ HRTIMER_MODE_REL_SOFT);
+ break;
+
+ case ISOTP_FC_WT:
+ /* start timer to wait for next FC frame */
+ hrtimer_start(&so->txtimer, ktime_set(1, 0),
+ HRTIMER_MODE_REL_SOFT);
+ break;
+
+ case ISOTP_FC_OVFLW:
+ /* overflow on receiver side - report 'message too long' */
+ sk->sk_err = EMSGSIZE;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+ fallthrough;
+
+ default:
+ /* stop this tx job */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
+ }
+ return 0;
+}
+
+static int isotp_rcv_sf(struct sock *sk, struct canfd_frame *cf, int pcilen,
+ struct sk_buff *skb, int len)
+{
+ struct isotp_sock *so = isotp_sk(sk);
+ struct sk_buff *nskb;
+
+ hrtimer_cancel(&so->rxtimer);
+ so->rx.state = ISOTP_IDLE;
+
+ if (!len || len > cf->len - pcilen)
+ return 1;
+
+ if ((so->opt.flags & ISOTP_CHECK_PADDING) &&
+ check_pad(so, cf, pcilen + len, so->opt.rxpad_content)) {
+ /* malformed PDU - report 'not a data message' */
+ sk->sk_err = EBADMSG;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+ return 1;
+ }
+
+ nskb = alloc_skb(len, gfp_any());
+ if (!nskb)
+ return 1;
+
+ memcpy(skb_put(nskb, len), &cf->data[pcilen], len);
+
+ nskb->tstamp = skb->tstamp;
+ nskb->dev = skb->dev;
+ isotp_rcv_skb(nskb, sk);
+ return 0;
+}
+
+static int isotp_rcv_ff(struct sock *sk, struct canfd_frame *cf, int ae)
+{
+ struct isotp_sock *so = isotp_sk(sk);
+ int i;
+ int off;
+ int ff_pci_sz;
+
+ hrtimer_cancel(&so->rxtimer);
+ so->rx.state = ISOTP_IDLE;
+
+ /* get the used sender LL_DL from the (first) CAN frame data length */
+ so->rx.ll_dl = padlen(cf->len);
+
+ /* the first frame has to use the entire frame up to LL_DL length */
+ if (cf->len != so->rx.ll_dl)
+ return 1;
+
+ /* get the FF_DL */
+ so->rx.len = (cf->data[ae] & 0x0F) << 8;
+ so->rx.len += cf->data[ae + 1];
+
+ /* Check for FF_DL escape sequence supporting 32 bit PDU length */
+ if (so->rx.len) {
+ ff_pci_sz = FF_PCI_SZ12;
+ } else {
+ /* FF_DL = 0 => get real length from next 4 bytes */
+ so->rx.len = cf->data[ae + 2] << 24;
+ so->rx.len += cf->data[ae + 3] << 16;
+ so->rx.len += cf->data[ae + 4] << 8;
+ so->rx.len += cf->data[ae + 5];
+ ff_pci_sz = FF_PCI_SZ32;
+ }
+
+ /* take care of a potential SF_DL ESC offset for TX_DL > 8 */
+ off = (so->rx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
+
+ if (so->rx.len + ae + off + ff_pci_sz < so->rx.ll_dl)
+ return 1;
+
+ if (so->rx.len > MAX_MSG_LENGTH) {
+ /* send FC frame with overflow status */
+ isotp_send_fc(sk, ae, ISOTP_FC_OVFLW);
+ return 1;
+ }
+
+ /* copy the first received data bytes */
+ so->rx.idx = 0;
+ for (i = ae + ff_pci_sz; i < so->rx.ll_dl; i++)
+ so->rx.buf[so->rx.idx++] = cf->data[i];
+
+ /* initial setup for this pdu reception */
+ so->rx.sn = 1;
+ so->rx.state = ISOTP_WAIT_DATA;
+
+ /* no creation of flow control frames */
+ if (so->opt.flags & CAN_ISOTP_LISTEN_MODE)
+ return 0;
+
+ /* send our first FC frame */
+ isotp_send_fc(sk, ae, ISOTP_FC_CTS);
+ return 0;
+}
+
+static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae,
+ struct sk_buff *skb)
+{
+ struct isotp_sock *so = isotp_sk(sk);
+ struct sk_buff *nskb;
+ int i;
+
+ if (so->rx.state != ISOTP_WAIT_DATA)
+ return 0;
+
+ /* drop if timestamp gap is less than force_rx_stmin nano secs */
+ if (so->opt.flags & CAN_ISOTP_FORCE_RXSTMIN) {
+ if (ktime_to_ns(ktime_sub(skb->tstamp, so->lastrxcf_tstamp)) <
+ so->force_rx_stmin)
+ return 0;
+
+ so->lastrxcf_tstamp = skb->tstamp;
+ }
+
+ hrtimer_cancel(&so->rxtimer);
+
+ /* CFs are never longer than the FF */
+ if (cf->len > so->rx.ll_dl)
+ return 1;
+
+ /* CFs have usually the LL_DL length */
+ if (cf->len < so->rx.ll_dl) {
+ /* this is only allowed for the last CF */
+ if (so->rx.len - so->rx.idx > so->rx.ll_dl - ae - N_PCI_SZ)
+ return 1;
+ }
+
+ if ((cf->data[ae] & 0x0F) != so->rx.sn) {
+ /* wrong sn detected - report 'illegal byte sequence' */
+ sk->sk_err = EILSEQ;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+
+ /* reset rx state */
+ so->rx.state = ISOTP_IDLE;
+ return 1;
+ }
+ so->rx.sn++;
+ so->rx.sn %= 16;
+
+ for (i = ae + N_PCI_SZ; i < cf->len; i++) {
+ so->rx.buf[so->rx.idx++] = cf->data[i];
+ if (so->rx.idx >= so->rx.len)
+ break;
+ }
+
+ if (so->rx.idx >= so->rx.len) {
+ /* we are done */
+ so->rx.state = ISOTP_IDLE;
+
+ if ((so->opt.flags & ISOTP_CHECK_PADDING) &&
+ check_pad(so, cf, i + 1, so->opt.rxpad_content)) {
+ /* malformed PDU - report 'not a data message' */
+ sk->sk_err = EBADMSG;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+ return 1;
+ }
+
+ nskb = alloc_skb(so->rx.len, gfp_any());
+ if (!nskb)
+ return 1;
+
+ memcpy(skb_put(nskb, so->rx.len), so->rx.buf,
+ so->rx.len);
+
+ nskb->tstamp = skb->tstamp;
+ nskb->dev = skb->dev;
+ isotp_rcv_skb(nskb, sk);
+ return 0;
+ }
+
+ /* no creation of flow control frames */
+ if (so->opt.flags & CAN_ISOTP_LISTEN_MODE)
+ return 0;
+
+ /* perform blocksize handling, if enabled */
+ if (!so->rxfc.bs || ++so->rx.bs < so->rxfc.bs) {
+ /* start rx timeout watchdog */
+ hrtimer_start(&so->rxtimer, ktime_set(1, 0),
+ HRTIMER_MODE_REL_SOFT);
+ return 0;
+ }
+
+ /* we reached the specified blocksize so->rxfc.bs */
+ isotp_send_fc(sk, ae, ISOTP_FC_CTS);
+ return 0;
+}
+
+static void isotp_rcv(struct sk_buff *skb, void *data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct isotp_sock *so = isotp_sk(sk);
+ struct canfd_frame *cf;
+ int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+ u8 n_pci_type, sf_dl;
+
+ /* Strictly receive only frames with the configured MTU size
+ * => clear separation of CAN2.0 / CAN FD transport channels
+ */
+ if (skb->len != so->ll.mtu)
+ return;
+
+ cf = (struct canfd_frame *)skb->data;
+
+ /* if enabled: check reception of my configured extended address */
+ if (ae && cf->data[0] != so->opt.rx_ext_address)
+ return;
+
+ n_pci_type = cf->data[ae] & 0xF0;
+
+ if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) {
+ /* check rx/tx path half duplex expectations */
+ if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) ||
+ (so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC))
+ return;
+ }
+
+ switch (n_pci_type) {
+ case N_PCI_FC:
+ /* tx path: flow control frame containing the FC parameters */
+ isotp_rcv_fc(so, cf, ae);
+ break;
+
+ case N_PCI_SF:
+ /* rx path: single frame
+ *
+ * As we do not have a rx.ll_dl configuration, we can only test
+ * if the CAN frames payload length matches the LL_DL == 8
+ * requirements - no matter if it's CAN 2.0 or CAN FD
+ */
+
+ /* get the SF_DL from the N_PCI byte */
+ sf_dl = cf->data[ae] & 0x0F;
+
+ if (cf->len <= CAN_MAX_DLEN) {
+ isotp_rcv_sf(sk, cf, SF_PCI_SZ4 + ae, skb, sf_dl);
+ } else {
+ if (skb->len == CANFD_MTU) {
+ /* We have a CAN FD frame and CAN_DL is greater than 8:
+ * Only frames with the SF_DL == 0 ESC value are valid.
+ *
+ * If so take care of the increased SF PCI size
+ * (SF_PCI_SZ8) to point to the message content behind
+ * the extended SF PCI info and get the real SF_DL
+ * length value from the formerly first data byte.
+ */
+ if (sf_dl == 0)
+ isotp_rcv_sf(sk, cf, SF_PCI_SZ8 + ae, skb,
+ cf->data[SF_PCI_SZ4 + ae]);
+ }
+ }
+ break;
+
+ case N_PCI_FF:
+ /* rx path: first frame */
+ isotp_rcv_ff(sk, cf, ae);
+ break;
+
+ case N_PCI_CF:
+ /* rx path: consecutive frame */
+ isotp_rcv_cf(sk, cf, ae, skb);
+ break;
+ }
+}
+
+static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so,
+ int ae, int off)
+{
+ int pcilen = N_PCI_SZ + ae + off;
+ int space = so->tx.ll_dl - pcilen;
+ int num = min_t(int, so->tx.len - so->tx.idx, space);
+ int i;
+
+ cf->can_id = so->txid;
+ cf->len = num + pcilen;
+
+ if (num < space) {
+ if (so->opt.flags & CAN_ISOTP_TX_PADDING) {
+ /* user requested padding */
+ cf->len = padlen(cf->len);
+ memset(cf->data, so->opt.txpad_content, cf->len);
+ } else if (cf->len > CAN_MAX_DLEN) {
+ /* mandatory padding for CAN FD frames */
+ cf->len = padlen(cf->len);
+ memset(cf->data, CAN_ISOTP_DEFAULT_PAD_CONTENT,
+ cf->len);
+ }
+ }
+
+ for (i = 0; i < num; i++)
+ cf->data[pcilen + i] = so->tx.buf[so->tx.idx++];
+
+ if (ae)
+ cf->data[0] = so->opt.ext_address;
+}
+
+static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so,
+ int ae)
+{
+ int i;
+ int ff_pci_sz;
+
+ cf->can_id = so->txid;
+ cf->len = so->tx.ll_dl;
+ if (ae)
+ cf->data[0] = so->opt.ext_address;
+
+ /* create N_PCI bytes with 12/32 bit FF_DL data length */
+ if (so->tx.len > 4095) {
+ /* use 32 bit FF_DL notation */
+ cf->data[ae] = N_PCI_FF;
+ cf->data[ae + 1] = 0;
+ cf->data[ae + 2] = (u8)(so->tx.len >> 24) & 0xFFU;
+ cf->data[ae + 3] = (u8)(so->tx.len >> 16) & 0xFFU;
+ cf->data[ae + 4] = (u8)(so->tx.len >> 8) & 0xFFU;
+ cf->data[ae + 5] = (u8)so->tx.len & 0xFFU;
+ ff_pci_sz = FF_PCI_SZ32;
+ } else {
+ /* use 12 bit FF_DL notation */
+ cf->data[ae] = (u8)(so->tx.len >> 8) | N_PCI_FF;
+ cf->data[ae + 1] = (u8)so->tx.len & 0xFFU;
+ ff_pci_sz = FF_PCI_SZ12;
+ }
+
+ /* add first data bytes depending on ae */
+ for (i = ae + ff_pci_sz; i < so->tx.ll_dl; i++)
+ cf->data[i] = so->tx.buf[so->tx.idx++];
+
+ so->tx.sn = 1;
+ so->tx.state = ISOTP_WAIT_FIRST_FC;
+}
+
+static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
+{
+ struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
+ txtimer);
+ struct sock *sk = &so->sk;
+ struct sk_buff *skb;
+ struct net_device *dev;
+ struct canfd_frame *cf;
+ enum hrtimer_restart restart = HRTIMER_NORESTART;
+ int can_send_ret;
+ int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+
+ switch (so->tx.state) {
+ case ISOTP_WAIT_FC:
+ case ISOTP_WAIT_FIRST_FC:
+
+ /* we did not get any flow control frame in time */
+
+ /* report 'communication error on send' */
+ sk->sk_err = ECOMM;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+
+ /* reset tx state */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
+ break;
+
+ case ISOTP_SENDING:
+
+ /* push out the next segmented pdu */
+ dev = dev_get_by_index(sock_net(sk), so->ifindex);
+ if (!dev)
+ break;
+
+isotp_tx_burst:
+ skb = alloc_skb(so->ll.mtu + sizeof(struct can_skb_priv),
+ GFP_ATOMIC);
+ if (!skb) {
+ dev_put(dev);
+ break;
+ }
+
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = dev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
+
+ cf = (struct canfd_frame *)skb->data;
+ skb_put(skb, so->ll.mtu);
+
+ /* create consecutive frame */
+ isotp_fill_dataframe(cf, so, ae, 0);
+
+ /* place consecutive frame N_PCI in appropriate index */
+ cf->data[ae] = N_PCI_CF | so->tx.sn++;
+ so->tx.sn %= 16;
+ so->tx.bs++;
+
+ if (so->ll.mtu == CANFD_MTU)
+ cf->flags = so->ll.tx_flags;
+
+ skb->dev = dev;
+ can_skb_set_owner(skb, sk);
+
+ can_send_ret = can_send(skb, 1);
+ if (can_send_ret)
+ pr_notice_once("can-isotp: %s: can_send_ret %d\n",
+ __func__, can_send_ret);
+
+ if (so->tx.idx >= so->tx.len) {
+ /* we are done */
+ so->tx.state = ISOTP_IDLE;
+ dev_put(dev);
+ wake_up_interruptible(&so->wait);
+ break;
+ }
+
+ if (so->txfc.bs && so->tx.bs >= so->txfc.bs) {
+ /* stop and wait for FC */
+ so->tx.state = ISOTP_WAIT_FC;
+ dev_put(dev);
+ hrtimer_set_expires(&so->txtimer,
+ ktime_add(ktime_get(),
+ ktime_set(1, 0)));
+ restart = HRTIMER_RESTART;
+ break;
+ }
+
+ /* no gap between data frames needed => use burst mode */
+ if (!so->tx_gap)
+ goto isotp_tx_burst;
+
+ /* start timer to send next data frame with correct delay */
+ dev_put(dev);
+ hrtimer_set_expires(&so->txtimer,
+ ktime_add(ktime_get(), so->tx_gap));
+ restart = HRTIMER_RESTART;
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ return restart;
+}
+
+static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+ struct sock *sk = sock->sk;
+ struct isotp_sock *so = isotp_sk(sk);
+ struct sk_buff *skb;
+ struct net_device *dev;
+ struct canfd_frame *cf;
+ int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0;
+ int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0;
+ int off;
+ int err;
+
+ if (!so->bound)
+ return -EADDRNOTAVAIL;
+
+ /* we do not support multiple buffers - for now */
+ if (so->tx.state != ISOTP_IDLE || wq_has_sleeper(&so->wait)) {
+ if (msg->msg_flags & MSG_DONTWAIT)
+ return -EAGAIN;
+
+ /* wait for complete transmission of current pdu */
+ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ }
+
+ if (!size || size > MAX_MSG_LENGTH)
+ return -EINVAL;
+
+ err = memcpy_from_msg(so->tx.buf, msg, size);
+ if (err < 0)
+ return err;
+
+ dev = dev_get_by_index(sock_net(sk), so->ifindex);
+ if (!dev)
+ return -ENXIO;
+
+ skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
+ msg->msg_flags & MSG_DONTWAIT, &err);
+ if (!skb) {
+ dev_put(dev);
+ return err;
+ }
+
+ can_skb_reserve(skb);
+ can_skb_prv(skb)->ifindex = dev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
+
+ so->tx.state = ISOTP_SENDING;
+ so->tx.len = size;
+ so->tx.idx = 0;
+
+ cf = (struct canfd_frame *)skb->data;
+ skb_put(skb, so->ll.mtu);
+
+ /* take care of a potential SF_DL ESC offset for TX_DL > 8 */
+ off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
+
+ /* check for single frame transmission depending on TX_DL */
+ if (size <= so->tx.ll_dl - SF_PCI_SZ4 - ae - off) {
+ /* The message size generally fits into a SingleFrame - good.
+ *
+ * SF_DL ESC offset optimization:
+ *
+ * When TX_DL is greater 8 but the message would still fit
+ * into a 8 byte CAN frame, we can omit the offset.
+ * This prevents a protocol caused length extension from
+ * CAN_DL = 8 to CAN_DL = 12 due to the SF_SL ESC handling.
+ */
+ if (size <= CAN_MAX_DLEN - SF_PCI_SZ4 - ae)
+ off = 0;
+
+ isotp_fill_dataframe(cf, so, ae, off);
+
+ /* place single frame N_PCI w/o length in appropriate index */
+ cf->data[ae] = N_PCI_SF;
+
+ /* place SF_DL size value depending on the SF_DL ESC offset */
+ if (off)
+ cf->data[SF_PCI_SZ4 + ae] = size;
+ else
+ cf->data[ae] |= size;
+
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
+
+ /* don't enable wait queue for a single frame transmission */
+ wait_tx_done = 0;
+ } else {
+ /* send first frame and wait for FC */
+
+ isotp_create_fframe(cf, so, ae);
+
+ /* start timeout for FC */
+ hrtimer_start(&so->txtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
+ }
+
+ /* send the first or only CAN frame */
+ if (so->ll.mtu == CANFD_MTU)
+ cf->flags = so->ll.tx_flags;
+
+ skb->dev = dev;
+ skb->sk = sk;
+ err = can_send(skb, 1);
+ dev_put(dev);
+ if (err) {
+ pr_notice_once("can-isotp: %s: can_send_ret %d\n",
+ __func__, err);
+ return err;
+ }
+
+ if (wait_tx_done) {
+ /* wait for complete transmission of current pdu */
+ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ }
+
+ return size;
+}
+
+static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
+{
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ int err = 0;
+ int noblock;
+
+ noblock = flags & MSG_DONTWAIT;
+ flags &= ~MSG_DONTWAIT;
+
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ return err;
+
+ if (size < skb->len)
+ msg->msg_flags |= MSG_TRUNC;
+ else
+ size = skb->len;
+
+ err = memcpy_to_msg(msg, skb->data, size);
+ if (err < 0) {
+ skb_free_datagram(sk, skb);
+ return err;
+ }
+
+ sock_recv_timestamp(msg, sk, skb);
+
+ if (msg->msg_name) {
+ msg->msg_namelen = sizeof(struct sockaddr_can);
+ memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+ }
+
+ skb_free_datagram(sk, skb);
+
+ return size;
+}
+
+static int isotp_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct isotp_sock *so;
+ struct net *net;
+
+ if (!sk)
+ return 0;
+
+ so = isotp_sk(sk);
+ net = sock_net(sk);
+
+ /* wait for complete transmission of current pdu */
+ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+
+ unregister_netdevice_notifier(&so->notifier);
+
+ lock_sock(sk);
+
+ hrtimer_cancel(&so->txtimer);
+ hrtimer_cancel(&so->rxtimer);
+
+ /* remove current filters & unregister */
+ if (so->bound) {
+ if (so->ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(net, so->ifindex);
+ if (dev) {
+ can_rx_unregister(net, dev, so->rxid,
+ SINGLE_MASK(so->rxid),
+ isotp_rcv, sk);
+ dev_put(dev);
+ }
+ }
+ }
+
+ so->ifindex = 0;
+ so->bound = 0;
+
+ sock_orphan(sk);
+ sock->sk = NULL;
+
+ release_sock(sk);
+ sock_put(sk);
+
+ return 0;
+}
+
+static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+{
+ struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+ struct sock *sk = sock->sk;
+ struct isotp_sock *so = isotp_sk(sk);
+ struct net *net = sock_net(sk);
+ int ifindex;
+ struct net_device *dev;
+ int err = 0;
+ int notify_enetdown = 0;
+
+ if (len < CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.tp))
+ return -EINVAL;
+
+ if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id)
+ return -EADDRNOTAVAIL;
+
+ if ((addr->can_addr.tp.rx_id | addr->can_addr.tp.tx_id) &
+ (CAN_ERR_FLAG | CAN_RTR_FLAG))
+ return -EADDRNOTAVAIL;
+
+ if (!addr->can_ifindex)
+ return -ENODEV;
+
+ lock_sock(sk);
+
+ if (so->bound && addr->can_ifindex == so->ifindex &&
+ addr->can_addr.tp.rx_id == so->rxid &&
+ addr->can_addr.tp.tx_id == so->txid)
+ goto out;
+
+ dev = dev_get_by_index(net, addr->can_ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto out;
+ }
+ if (dev->type != ARPHRD_CAN) {
+ dev_put(dev);
+ err = -ENODEV;
+ goto out;
+ }
+ if (dev->mtu < so->ll.mtu) {
+ dev_put(dev);
+ err = -EINVAL;
+ goto out;
+ }
+ if (!(dev->flags & IFF_UP))
+ notify_enetdown = 1;
+
+ ifindex = dev->ifindex;
+
+ can_rx_register(net, dev, addr->can_addr.tp.rx_id,
+ SINGLE_MASK(addr->can_addr.tp.rx_id), isotp_rcv, sk,
+ "isotp", sk);
+
+ dev_put(dev);
+
+ if (so->bound) {
+ /* unregister old filter */
+ if (so->ifindex) {
+ dev = dev_get_by_index(net, so->ifindex);
+ if (dev) {
+ can_rx_unregister(net, dev, so->rxid,
+ SINGLE_MASK(so->rxid),
+ isotp_rcv, sk);
+ dev_put(dev);
+ }
+ }
+ }
+
+ /* switch to new settings */
+ so->ifindex = ifindex;
+ so->rxid = addr->can_addr.tp.rx_id;
+ so->txid = addr->can_addr.tp.tx_id;
+ so->bound = 1;
+
+out:
+ release_sock(sk);
+
+ if (notify_enetdown) {
+ sk->sk_err = ENETDOWN;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+ }
+
+ return err;
+}
+
+static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
+{
+ struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+ struct sock *sk = sock->sk;
+ struct isotp_sock *so = isotp_sk(sk);
+
+ if (peer)
+ return -EOPNOTSUPP;
+
+ addr->can_family = AF_CAN;
+ addr->can_ifindex = so->ifindex;
+ addr->can_addr.tp.rx_id = so->rxid;
+ addr->can_addr.tp.tx_id = so->txid;
+
+ return sizeof(*addr);
+}
+
+static int isotp_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct sock *sk = sock->sk;
+ struct isotp_sock *so = isotp_sk(sk);
+ int ret = 0;
+
+ if (level != SOL_CAN_ISOTP)
+ return -EINVAL;
+
+ switch (optname) {
+ case CAN_ISOTP_OPTS:
+ if (optlen != sizeof(struct can_isotp_options))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&so->opt, optval, optlen))
+ return -EFAULT;
+
+ /* no separate rx_ext_address is given => use ext_address */
+ if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
+ so->opt.rx_ext_address = so->opt.ext_address;
+ break;
+
+ case CAN_ISOTP_RECV_FC:
+ if (optlen != sizeof(struct can_isotp_fc_options))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&so->rxfc, optval, optlen))
+ return -EFAULT;
+ break;
+
+ case CAN_ISOTP_TX_STMIN:
+ if (optlen != sizeof(u32))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&so->force_tx_stmin, optval, optlen))
+ return -EFAULT;
+ break;
+
+ case CAN_ISOTP_RX_STMIN:
+ if (optlen != sizeof(u32))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&so->force_rx_stmin, optval, optlen))
+ return -EFAULT;
+ break;
+
+ case CAN_ISOTP_LL_OPTS:
+ if (optlen == sizeof(struct can_isotp_ll_options)) {
+ struct can_isotp_ll_options ll;
+
+ if (copy_from_sockptr(&ll, optval, optlen))
+ return -EFAULT;
+
+ /* check for correct ISO 11898-1 DLC data length */
+ if (ll.tx_dl != padlen(ll.tx_dl))
+ return -EINVAL;
+
+ if (ll.mtu != CAN_MTU && ll.mtu != CANFD_MTU)
+ return -EINVAL;
+
+ if (ll.mtu == CAN_MTU && ll.tx_dl > CAN_MAX_DLEN)
+ return -EINVAL;
+
+ memcpy(&so->ll, &ll, sizeof(ll));
+
+ /* set ll_dl for tx path to similar place as for rx */
+ so->tx.ll_dl = ll.tx_dl;
+ } else {
+ return -EINVAL;
+ }
+ break;
+
+ default:
+ ret = -ENOPROTOOPT;
+ }
+
+ return ret;
+}
+
+static int isotp_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct sock *sk = sock->sk;
+ struct isotp_sock *so = isotp_sk(sk);
+ int len;
+ void *val;
+
+ if (level != SOL_CAN_ISOTP)
+ return -EINVAL;
+ if (get_user(len, optlen))
+ return -EFAULT;
+ if (len < 0)
+ return -EINVAL;
+
+ switch (optname) {
+ case CAN_ISOTP_OPTS:
+ len = min_t(int, len, sizeof(struct can_isotp_options));
+ val = &so->opt;
+ break;
+
+ case CAN_ISOTP_RECV_FC:
+ len = min_t(int, len, sizeof(struct can_isotp_fc_options));
+ val = &so->rxfc;
+ break;
+
+ case CAN_ISOTP_TX_STMIN:
+ len = min_t(int, len, sizeof(u32));
+ val = &so->force_tx_stmin;
+ break;
+
+ case CAN_ISOTP_RX_STMIN:
+ len = min_t(int, len, sizeof(u32));
+ val = &so->force_rx_stmin;
+ break;
+
+ case CAN_ISOTP_LL_OPTS:
+ len = min_t(int, len, sizeof(struct can_isotp_ll_options));
+ val = &so->ll;
+ break;
+
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, val, len))
+ return -EFAULT;
+ return 0;
+}
+
+static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier);
+ struct sock *sk = &so->sk;
+
+ if (!net_eq(dev_net(dev), sock_net(sk)))
+ return NOTIFY_DONE;
+
+ if (dev->type != ARPHRD_CAN)
+ return NOTIFY_DONE;
+
+ if (so->ifindex != dev->ifindex)
+ return NOTIFY_DONE;
+
+ switch (msg) {
+ case NETDEV_UNREGISTER:
+ lock_sock(sk);
+ /* remove current filters & unregister */
+ if (so->bound)
+ can_rx_unregister(dev_net(dev), dev, so->rxid,
+ SINGLE_MASK(so->rxid),
+ isotp_rcv, sk);
+
+ so->ifindex = 0;
+ so->bound = 0;
+ release_sock(sk);
+
+ sk->sk_err = ENODEV;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+ break;
+
+ case NETDEV_DOWN:
+ sk->sk_err = ENETDOWN;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_error_report(sk);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int isotp_init(struct sock *sk)
+{
+ struct isotp_sock *so = isotp_sk(sk);
+
+ so->ifindex = 0;
+ so->bound = 0;
+
+ so->opt.flags = CAN_ISOTP_DEFAULT_FLAGS;
+ so->opt.ext_address = CAN_ISOTP_DEFAULT_EXT_ADDRESS;
+ so->opt.rx_ext_address = CAN_ISOTP_DEFAULT_EXT_ADDRESS;
+ so->opt.rxpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
+ so->opt.txpad_content = CAN_ISOTP_DEFAULT_PAD_CONTENT;
+ so->opt.frame_txtime = CAN_ISOTP_DEFAULT_FRAME_TXTIME;
+ so->rxfc.bs = CAN_ISOTP_DEFAULT_RECV_BS;
+ so->rxfc.stmin = CAN_ISOTP_DEFAULT_RECV_STMIN;
+ so->rxfc.wftmax = CAN_ISOTP_DEFAULT_RECV_WFTMAX;
+ so->ll.mtu = CAN_ISOTP_DEFAULT_LL_MTU;
+ so->ll.tx_dl = CAN_ISOTP_DEFAULT_LL_TX_DL;
+ so->ll.tx_flags = CAN_ISOTP_DEFAULT_LL_TX_FLAGS;
+
+ /* set ll_dl for tx path to similar place as for rx */
+ so->tx.ll_dl = so->ll.tx_dl;
+
+ so->rx.state = ISOTP_IDLE;
+ so->tx.state = ISOTP_IDLE;
+
+ hrtimer_init(&so->rxtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+ so->rxtimer.function = isotp_rx_timer_handler;
+ hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+ so->txtimer.function = isotp_tx_timer_handler;
+
+ init_waitqueue_head(&so->wait);
+
+ so->notifier.notifier_call = isotp_notifier;
+ register_netdevice_notifier(&so->notifier);
+
+ return 0;
+}
+
+static int isotp_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ /* no ioctls for socket layer -> hand it down to NIC layer */
+ return -ENOIOCTLCMD;
+}
+
+static const struct proto_ops isotp_ops = {
+ .family = PF_CAN,
+ .release = isotp_release,
+ .bind = isotp_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = isotp_getname,
+ .poll = datagram_poll,
+ .ioctl = isotp_sock_no_ioctlcmd,
+ .gettstamp = sock_gettstamp,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = isotp_setsockopt,
+ .getsockopt = isotp_getsockopt,
+ .sendmsg = isotp_sendmsg,
+ .recvmsg = isotp_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static struct proto isotp_proto __read_mostly = {
+ .name = "CAN_ISOTP",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct isotp_sock),
+ .init = isotp_init,
+};
+
+static const struct can_proto isotp_can_proto = {
+ .type = SOCK_DGRAM,
+ .protocol = CAN_ISOTP,
+ .ops = &isotp_ops,
+ .prot = &isotp_proto,
+};
+
+static __init int isotp_module_init(void)
+{
+ int err;
+
+ pr_info("can: isotp protocol\n");
+
+ err = can_proto_register(&isotp_can_proto);
+ if (err < 0)
+ pr_err("can: registration of isotp protocol failed\n");
+
+ return err;
+}
+
+static __exit void isotp_module_exit(void)
+{
+ can_proto_unregister(&isotp_can_proto);
+}
+
+module_init(isotp_module_init);
+module_exit(isotp_module_exit);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 0cec4152f979..e09d087ba240 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -580,6 +580,7 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
skb->dev = priv->ndev;
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
/* reserve CAN header */
skb_reserve(skb, offsetof(struct can_frame, data));
@@ -1487,6 +1488,7 @@ j1939_session *j1939_session_fresh_new(struct j1939_priv *priv,
skb->dev = priv->ndev;
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
skcb = j1939_skb_to_cb(skb);
memcpy(skcb, rel_skcb, sizeof(*skcb));
diff --git a/net/can/proc.c b/net/can/proc.c
index e6881bfc3ed1..550928b8b8a2 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* proc.c - procfs support for Protocol family CAN core module
*
@@ -54,7 +54,6 @@
* proc filenames for the PF_CAN core
*/
-#define CAN_PROC_VERSION "version"
#define CAN_PROC_STATS "stats"
#define CAN_PROC_RESET_STATS "reset_stats"
#define CAN_PROC_RCVLIST_ALL "rcvlist_all"
@@ -293,12 +292,6 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v)
return 0;
}
-static int can_version_proc_show(struct seq_file *m, void *v)
-{
- seq_printf(m, "%s\n", CAN_VERSION_STRING);
- return 0;
-}
-
static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
struct net_device *dev,
struct can_dev_rcv_lists *dev_rcv_lists)
@@ -441,8 +434,6 @@ void can_init_proc(struct net *net)
}
/* own procfs entries from the AF_CAN core */
- net->can.pde_version = proc_create_net_single(CAN_PROC_VERSION, 0644,
- net->can.proc_dir, can_version_proc_show, NULL);
net->can.pde_stats = proc_create_net_single(CAN_PROC_STATS, 0644,
net->can.proc_dir, can_stats_proc_show, NULL);
net->can.pde_reset_stats = proc_create_net_single(CAN_PROC_RESET_STATS,
@@ -471,9 +462,6 @@ void can_init_proc(struct net *net)
*/
void can_remove_proc(struct net *net)
{
- if (net->can.pde_version)
- remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir);
-
if (net->can.pde_stats)
remove_proc_entry(CAN_PROC_STATS, net->can.proc_dir);
diff --git a/net/can/raw.c b/net/can/raw.c
index 94a9405658dc..6ec8aa1d0da4 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/* raw.c - Raw sockets for protocol family CAN
*
* Copyright (c) 2002-2007 Volkswagen Group Electronic Research
@@ -55,8 +55,6 @@
#include <net/sock.h>
#include <net/net_namespace.h>
-#define CAN_RAW_VERSION CAN_VERSION
-
MODULE_DESCRIPTION("PF_CAN raw protocol");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Urs Thuermann <urs.thuermann@volkswagen.de>");
@@ -154,16 +152,16 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
if (!skb)
return;
- /* Put the datagram to the queue so that raw_recvmsg() can
- * get it from there. We need to pass the interface index to
- * raw_recvmsg(). We pass a whole struct sockaddr_can in skb->cb
- * containing the interface index.
+ /* Put the datagram to the queue so that raw_recvmsg() can get
+ * it from there. We need to pass the interface index to
+ * raw_recvmsg(). We pass a whole struct sockaddr_can in
+ * skb->cb containing the interface index.
*/
sock_skb_cb_check_size(sizeof(struct sockaddr_can));
addr = (struct sockaddr_can *)skb->cb;
memset(addr, 0, sizeof(*addr));
- addr->can_family = AF_CAN;
+ addr->can_family = AF_CAN;
addr->can_ifindex = skb->dev->ifindex;
/* add CAN specific message flags for raw_recvmsg() */
@@ -290,8 +288,8 @@ static int raw_notifier(struct notifier_block *nb,
kfree(ro->filter);
ro->ifindex = 0;
- ro->bound = 0;
- ro->count = 0;
+ ro->bound = 0;
+ ro->count = 0;
release_sock(sk);
sk->sk_err = ENODEV;
@@ -374,8 +372,8 @@ static int raw_release(struct socket *sock)
kfree(ro->filter);
ro->ifindex = 0;
- ro->bound = 0;
- ro->count = 0;
+ ro->bound = 0;
+ ro->count = 0;
free_percpu(ro->uniq);
sock_orphan(sk);
@@ -773,7 +771,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
skb_setup_tx_timestamp(skb, sk->sk_tsflags);
skb->dev = dev;
- skb->sk = sk;
+ skb->sk = sk;
skb->priority = sk->sk_priority;
err = can_send(skb, ro->loopback);
@@ -801,8 +799,12 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int err = 0;
int noblock;
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
+ noblock = flags & MSG_DONTWAIT;
+ flags &= ~MSG_DONTWAIT;
+
+ if (flags & MSG_ERRQUEUE)
+ return sock_recv_errqueue(sk, msg, size,
+ SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE);
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
@@ -881,7 +883,7 @@ static __init int raw_module_init(void)
{
int err;
- pr_info("can: raw protocol (rev " CAN_RAW_VERSION ")\n");
+ pr_info("can: raw protocol\n");
err = can_proto_register(&raw_can_proto);
if (err < 0)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index bdfd66ba3843..d4d7a0e52491 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -575,7 +575,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
* coalescing neighboring slab objects into a single frag which
* triggers one of hardened usercopy checks.
*/
- if (page_count(page) >= 1 && !PageSlab(page))
+ if (sendpage_ok(page))
sendpage = sock->ops->sendpage;
else
sendpage = sock_no_sendpage;
diff --git a/net/compat.c b/net/compat.c
index 95ce707a30a3..ddd15af3a283 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -98,8 +98,8 @@ int get_compat_msghdr(struct msghdr *kmsg,
if (err)
return err;
- err = compat_import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr),
- len, UIO_FASTIOV, iov, &kmsg->msg_iter);
+ err = import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr), len,
+ UIO_FASTIOV, iov, &kmsg->msg_iter);
return err < 0 ? err : 0;
}
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index b988f48153a4..c907f0dc7f87 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -7,97 +7,13 @@
#include <linux/spinlock.h>
#include <linux/bpf.h>
#include <linux/btf_ids.h>
+#include <linux/bpf_local_storage.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
-#define SK_STORAGE_CREATE_FLAG_MASK \
- (BPF_F_NO_PREALLOC | BPF_F_CLONE)
-
-struct bucket {
- struct hlist_head list;
- raw_spinlock_t lock;
-};
-
-/* Thp map is not the primary owner of a bpf_sk_storage_elem.
- * Instead, the sk->sk_bpf_storage is.
- *
- * The map (bpf_sk_storage_map) is for two purposes
- * 1. Define the size of the "sk local storage". It is
- * the map's value_size.
- *
- * 2. Maintain a list to keep track of all elems such
- * that they can be cleaned up during the map destruction.
- *
- * When a bpf local storage is being looked up for a
- * particular sk, the "bpf_map" pointer is actually used
- * as the "key" to search in the list of elem in
- * sk->sk_bpf_storage.
- *
- * Hence, consider sk->sk_bpf_storage is the mini-map
- * with the "bpf_map" pointer as the searching key.
- */
-struct bpf_sk_storage_map {
- struct bpf_map map;
- /* Lookup elem does not require accessing the map.
- *
- * Updating/Deleting requires a bucket lock to
- * link/unlink the elem from the map. Having
- * multiple buckets to improve contention.
- */
- struct bucket *buckets;
- u32 bucket_log;
- u16 elem_size;
- u16 cache_idx;
-};
-
-struct bpf_sk_storage_data {
- /* smap is used as the searching key when looking up
- * from sk->sk_bpf_storage.
- *
- * Put it in the same cacheline as the data to minimize
- * the number of cachelines access during the cache hit case.
- */
- struct bpf_sk_storage_map __rcu *smap;
- u8 data[] __aligned(8);
-};
-
-/* Linked to bpf_sk_storage and bpf_sk_storage_map */
-struct bpf_sk_storage_elem {
- struct hlist_node map_node; /* Linked to bpf_sk_storage_map */
- struct hlist_node snode; /* Linked to bpf_sk_storage */
- struct bpf_sk_storage __rcu *sk_storage;
- struct rcu_head rcu;
- /* 8 bytes hole */
- /* The data is stored in aother cacheline to minimize
- * the number of cachelines access during a cache hit.
- */
- struct bpf_sk_storage_data sdata ____cacheline_aligned;
-};
-
-#define SELEM(_SDATA) container_of((_SDATA), struct bpf_sk_storage_elem, sdata)
-#define SDATA(_SELEM) (&(_SELEM)->sdata)
-#define BPF_SK_STORAGE_CACHE_SIZE 16
-
-static DEFINE_SPINLOCK(cache_idx_lock);
-static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE];
-
-struct bpf_sk_storage {
- struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
- struct hlist_head list; /* List of bpf_sk_storage_elem */
- struct sock *sk; /* The sk that owns the the above "list" of
- * bpf_sk_storage_elem.
- */
- struct rcu_head rcu;
- raw_spinlock_t lock; /* Protect adding/removing from the "list" */
-};
-
-static struct bucket *select_bucket(struct bpf_sk_storage_map *smap,
- struct bpf_sk_storage_elem *selem)
-{
- return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
-}
+DEFINE_BPF_STORAGE_CACHE(sk_cache);
static int omem_charge(struct sock *sk, unsigned int size)
{
@@ -111,445 +27,38 @@ static int omem_charge(struct sock *sk, unsigned int size)
return -ENOMEM;
}
-static bool selem_linked_to_sk(const struct bpf_sk_storage_elem *selem)
-{
- return !hlist_unhashed(&selem->snode);
-}
-
-static bool selem_linked_to_map(const struct bpf_sk_storage_elem *selem)
-{
- return !hlist_unhashed(&selem->map_node);
-}
-
-static struct bpf_sk_storage_elem *selem_alloc(struct bpf_sk_storage_map *smap,
- struct sock *sk, void *value,
- bool charge_omem)
-{
- struct bpf_sk_storage_elem *selem;
-
- if (charge_omem && omem_charge(sk, smap->elem_size))
- return NULL;
-
- selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
- if (selem) {
- if (value)
- memcpy(SDATA(selem)->data, value, smap->map.value_size);
- return selem;
- }
-
- if (charge_omem)
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
-
- return NULL;
-}
-
-/* sk_storage->lock must be held and selem->sk_storage == sk_storage.
- * The caller must ensure selem->smap is still valid to be
- * dereferenced for its smap->elem_size and smap->cache_idx.
- */
-static bool __selem_unlink_sk(struct bpf_sk_storage *sk_storage,
- struct bpf_sk_storage_elem *selem,
- bool uncharge_omem)
-{
- struct bpf_sk_storage_map *smap;
- bool free_sk_storage;
- struct sock *sk;
-
- smap = rcu_dereference(SDATA(selem)->smap);
- sk = sk_storage->sk;
-
- /* All uncharging on sk->sk_omem_alloc must be done first.
- * sk may be freed once the last selem is unlinked from sk_storage.
- */
- if (uncharge_omem)
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
-
- free_sk_storage = hlist_is_singular_node(&selem->snode,
- &sk_storage->list);
- if (free_sk_storage) {
- atomic_sub(sizeof(struct bpf_sk_storage), &sk->sk_omem_alloc);
- sk_storage->sk = NULL;
- /* After this RCU_INIT, sk may be freed and cannot be used */
- RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
-
- /* sk_storage is not freed now. sk_storage->lock is
- * still held and raw_spin_unlock_bh(&sk_storage->lock)
- * will be done by the caller.
- *
- * Although the unlock will be done under
- * rcu_read_lock(), it is more intutivie to
- * read if kfree_rcu(sk_storage, rcu) is done
- * after the raw_spin_unlock_bh(&sk_storage->lock).
- *
- * Hence, a "bool free_sk_storage" is returned
- * to the caller which then calls the kfree_rcu()
- * after unlock.
- */
- }
- hlist_del_init_rcu(&selem->snode);
- if (rcu_access_pointer(sk_storage->cache[smap->cache_idx]) ==
- SDATA(selem))
- RCU_INIT_POINTER(sk_storage->cache[smap->cache_idx], NULL);
-
- kfree_rcu(selem, rcu);
-
- return free_sk_storage;
-}
-
-static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
-{
- struct bpf_sk_storage *sk_storage;
- bool free_sk_storage = false;
-
- if (unlikely(!selem_linked_to_sk(selem)))
- /* selem has already been unlinked from sk */
- return;
-
- sk_storage = rcu_dereference(selem->sk_storage);
- raw_spin_lock_bh(&sk_storage->lock);
- if (likely(selem_linked_to_sk(selem)))
- free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
- raw_spin_unlock_bh(&sk_storage->lock);
-
- if (free_sk_storage)
- kfree_rcu(sk_storage, rcu);
-}
-
-static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
- struct bpf_sk_storage_elem *selem)
-{
- RCU_INIT_POINTER(selem->sk_storage, sk_storage);
- hlist_add_head(&selem->snode, &sk_storage->list);
-}
-
-static void selem_unlink_map(struct bpf_sk_storage_elem *selem)
-{
- struct bpf_sk_storage_map *smap;
- struct bucket *b;
-
- if (unlikely(!selem_linked_to_map(selem)))
- /* selem has already be unlinked from smap */
- return;
-
- smap = rcu_dereference(SDATA(selem)->smap);
- b = select_bucket(smap, selem);
- raw_spin_lock_bh(&b->lock);
- if (likely(selem_linked_to_map(selem)))
- hlist_del_init_rcu(&selem->map_node);
- raw_spin_unlock_bh(&b->lock);
-}
-
-static void selem_link_map(struct bpf_sk_storage_map *smap,
- struct bpf_sk_storage_elem *selem)
-{
- struct bucket *b = select_bucket(smap, selem);
-
- raw_spin_lock_bh(&b->lock);
- RCU_INIT_POINTER(SDATA(selem)->smap, smap);
- hlist_add_head_rcu(&selem->map_node, &b->list);
- raw_spin_unlock_bh(&b->lock);
-}
-
-static void selem_unlink(struct bpf_sk_storage_elem *selem)
-{
- /* Always unlink from map before unlinking from sk_storage
- * because selem will be freed after successfully unlinked from
- * the sk_storage.
- */
- selem_unlink_map(selem);
- selem_unlink_sk(selem);
-}
-
-static struct bpf_sk_storage_data *
-__sk_storage_lookup(struct bpf_sk_storage *sk_storage,
- struct bpf_sk_storage_map *smap,
- bool cacheit_lockit)
-{
- struct bpf_sk_storage_data *sdata;
- struct bpf_sk_storage_elem *selem;
-
- /* Fast path (cache hit) */
- sdata = rcu_dereference(sk_storage->cache[smap->cache_idx]);
- if (sdata && rcu_access_pointer(sdata->smap) == smap)
- return sdata;
-
- /* Slow path (cache miss) */
- hlist_for_each_entry_rcu(selem, &sk_storage->list, snode)
- if (rcu_access_pointer(SDATA(selem)->smap) == smap)
- break;
-
- if (!selem)
- return NULL;
-
- sdata = SDATA(selem);
- if (cacheit_lockit) {
- /* spinlock is needed to avoid racing with the
- * parallel delete. Otherwise, publishing an already
- * deleted sdata to the cache will become a use-after-free
- * problem in the next __sk_storage_lookup().
- */
- raw_spin_lock_bh(&sk_storage->lock);
- if (selem_linked_to_sk(selem))
- rcu_assign_pointer(sk_storage->cache[smap->cache_idx],
- sdata);
- raw_spin_unlock_bh(&sk_storage->lock);
- }
-
- return sdata;
-}
-
-static struct bpf_sk_storage_data *
+static struct bpf_local_storage_data *
sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
{
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_map *smap;
sk_storage = rcu_dereference(sk->sk_bpf_storage);
if (!sk_storage)
return NULL;
- smap = (struct bpf_sk_storage_map *)map;
- return __sk_storage_lookup(sk_storage, smap, cacheit_lockit);
-}
-
-static int check_flags(const struct bpf_sk_storage_data *old_sdata,
- u64 map_flags)
-{
- if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
- /* elem already exists */
- return -EEXIST;
-
- if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
- /* elem doesn't exist, cannot update it */
- return -ENOENT;
-
- return 0;
-}
-
-static int sk_storage_alloc(struct sock *sk,
- struct bpf_sk_storage_map *smap,
- struct bpf_sk_storage_elem *first_selem)
-{
- struct bpf_sk_storage *prev_sk_storage, *sk_storage;
- int err;
-
- err = omem_charge(sk, sizeof(*sk_storage));
- if (err)
- return err;
-
- sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN);
- if (!sk_storage) {
- err = -ENOMEM;
- goto uncharge;
- }
- INIT_HLIST_HEAD(&sk_storage->list);
- raw_spin_lock_init(&sk_storage->lock);
- sk_storage->sk = sk;
-
- __selem_link_sk(sk_storage, first_selem);
- selem_link_map(smap, first_selem);
- /* Publish sk_storage to sk. sk->sk_lock cannot be acquired.
- * Hence, atomic ops is used to set sk->sk_bpf_storage
- * from NULL to the newly allocated sk_storage ptr.
- *
- * From now on, the sk->sk_bpf_storage pointer is protected
- * by the sk_storage->lock. Hence, when freeing
- * the sk->sk_bpf_storage, the sk_storage->lock must
- * be held before setting sk->sk_bpf_storage to NULL.
- */
- prev_sk_storage = cmpxchg((struct bpf_sk_storage **)&sk->sk_bpf_storage,
- NULL, sk_storage);
- if (unlikely(prev_sk_storage)) {
- selem_unlink_map(first_selem);
- err = -EAGAIN;
- goto uncharge;
-
- /* Note that even first_selem was linked to smap's
- * bucket->list, first_selem can be freed immediately
- * (instead of kfree_rcu) because
- * bpf_sk_storage_map_free() does a
- * synchronize_rcu() before walking the bucket->list.
- * Hence, no one is accessing selem from the
- * bucket->list under rcu_read_lock().
- */
- }
-
- return 0;
-
-uncharge:
- kfree(sk_storage);
- atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
- return err;
-}
-
-/* sk cannot be going away because it is linking new elem
- * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
- * Otherwise, it will become a leak (and other memory issues
- * during map destruction).
- */
-static struct bpf_sk_storage_data *sk_storage_update(struct sock *sk,
- struct bpf_map *map,
- void *value,
- u64 map_flags)
-{
- struct bpf_sk_storage_data *old_sdata = NULL;
- struct bpf_sk_storage_elem *selem;
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_map *smap;
- int err;
-
- /* BPF_EXIST and BPF_NOEXIST cannot be both set */
- if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
- /* BPF_F_LOCK can only be used in a value with spin_lock */
- unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
- return ERR_PTR(-EINVAL);
-
- smap = (struct bpf_sk_storage_map *)map;
- sk_storage = rcu_dereference(sk->sk_bpf_storage);
- if (!sk_storage || hlist_empty(&sk_storage->list)) {
- /* Very first elem for this sk */
- err = check_flags(NULL, map_flags);
- if (err)
- return ERR_PTR(err);
-
- selem = selem_alloc(smap, sk, value, true);
- if (!selem)
- return ERR_PTR(-ENOMEM);
-
- err = sk_storage_alloc(sk, smap, selem);
- if (err) {
- kfree(selem);
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
- return ERR_PTR(err);
- }
-
- return SDATA(selem);
- }
-
- if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
- /* Hoping to find an old_sdata to do inline update
- * such that it can avoid taking the sk_storage->lock
- * and changing the lists.
- */
- old_sdata = __sk_storage_lookup(sk_storage, smap, false);
- err = check_flags(old_sdata, map_flags);
- if (err)
- return ERR_PTR(err);
- if (old_sdata && selem_linked_to_sk(SELEM(old_sdata))) {
- copy_map_value_locked(map, old_sdata->data,
- value, false);
- return old_sdata;
- }
- }
-
- raw_spin_lock_bh(&sk_storage->lock);
-
- /* Recheck sk_storage->list under sk_storage->lock */
- if (unlikely(hlist_empty(&sk_storage->list))) {
- /* A parallel del is happening and sk_storage is going
- * away. It has just been checked before, so very
- * unlikely. Return instead of retry to keep things
- * simple.
- */
- err = -EAGAIN;
- goto unlock_err;
- }
-
- old_sdata = __sk_storage_lookup(sk_storage, smap, false);
- err = check_flags(old_sdata, map_flags);
- if (err)
- goto unlock_err;
-
- if (old_sdata && (map_flags & BPF_F_LOCK)) {
- copy_map_value_locked(map, old_sdata->data, value, false);
- selem = SELEM(old_sdata);
- goto unlock;
- }
-
- /* sk_storage->lock is held. Hence, we are sure
- * we can unlink and uncharge the old_sdata successfully
- * later. Hence, instead of charging the new selem now
- * and then uncharge the old selem later (which may cause
- * a potential but unnecessary charge failure), avoid taking
- * a charge at all here (the "!old_sdata" check) and the
- * old_sdata will not be uncharged later during __selem_unlink_sk().
- */
- selem = selem_alloc(smap, sk, value, !old_sdata);
- if (!selem) {
- err = -ENOMEM;
- goto unlock_err;
- }
-
- /* First, link the new selem to the map */
- selem_link_map(smap, selem);
-
- /* Second, link (and publish) the new selem to sk_storage */
- __selem_link_sk(sk_storage, selem);
-
- /* Third, remove old selem, SELEM(old_sdata) */
- if (old_sdata) {
- selem_unlink_map(SELEM(old_sdata));
- __selem_unlink_sk(sk_storage, SELEM(old_sdata), false);
- }
-
-unlock:
- raw_spin_unlock_bh(&sk_storage->lock);
- return SDATA(selem);
-
-unlock_err:
- raw_spin_unlock_bh(&sk_storage->lock);
- return ERR_PTR(err);
+ smap = (struct bpf_local_storage_map *)map;
+ return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit);
}
static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
{
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage_data *sdata;
sdata = sk_storage_lookup(sk, map, false);
if (!sdata)
return -ENOENT;
- selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata));
return 0;
}
-static u16 cache_idx_get(void)
-{
- u64 min_usage = U64_MAX;
- u16 i, res = 0;
-
- spin_lock(&cache_idx_lock);
-
- for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) {
- if (cache_idx_usage_counts[i] < min_usage) {
- min_usage = cache_idx_usage_counts[i];
- res = i;
-
- /* Found a free cache_idx */
- if (!min_usage)
- break;
- }
- }
- cache_idx_usage_counts[res]++;
-
- spin_unlock(&cache_idx_lock);
-
- return res;
-}
-
-static void cache_idx_free(u16 idx)
-{
- spin_lock(&cache_idx_lock);
- cache_idx_usage_counts[idx]--;
- spin_unlock(&cache_idx_lock);
-}
-
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
- struct bpf_sk_storage_elem *selem;
- struct bpf_sk_storage *sk_storage;
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage *sk_storage;
bool free_sk_storage = false;
struct hlist_node *n;
@@ -565,7 +74,7 @@ void bpf_sk_storage_free(struct sock *sk)
* Thus, no elem can be added-to or deleted-from the
* sk_storage->list by the bpf_prog or by the bpf-map's syscall.
*
- * It is racing with bpf_sk_storage_map_free() alone
+ * It is racing with bpf_local_storage_map_free() alone
* when unlinking elem from the sk_storage->list and
* the map's bucket->list.
*/
@@ -574,8 +83,9 @@ void bpf_sk_storage_free(struct sock *sk)
/* Always unlink from map before unlinking from
* sk_storage.
*/
- selem_unlink_map(selem);
- free_sk_storage = __selem_unlink_sk(sk_storage, selem, true);
+ bpf_selem_unlink_map(selem);
+ free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage,
+ selem, true);
}
raw_spin_unlock_bh(&sk_storage->lock);
rcu_read_unlock();
@@ -584,132 +94,24 @@ void bpf_sk_storage_free(struct sock *sk)
kfree_rcu(sk_storage, rcu);
}
-static void bpf_sk_storage_map_free(struct bpf_map *map)
-{
- struct bpf_sk_storage_elem *selem;
- struct bpf_sk_storage_map *smap;
- struct bucket *b;
- unsigned int i;
-
- smap = (struct bpf_sk_storage_map *)map;
-
- cache_idx_free(smap->cache_idx);
-
- /* Note that this map might be concurrently cloned from
- * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
- * RCU read section to finish before proceeding. New RCU
- * read sections should be prevented via bpf_map_inc_not_zero.
- */
- synchronize_rcu();
-
- /* bpf prog and the userspace can no longer access this map
- * now. No new selem (of this map) can be added
- * to the sk->sk_bpf_storage or to the map bucket's list.
- *
- * The elem of this map can be cleaned up here
- * or
- * by bpf_sk_storage_free() during __sk_destruct().
- */
- for (i = 0; i < (1U << smap->bucket_log); i++) {
- b = &smap->buckets[i];
-
- rcu_read_lock();
- /* No one is adding to b->list now */
- while ((selem = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&b->list)),
- struct bpf_sk_storage_elem,
- map_node))) {
- selem_unlink(selem);
- cond_resched_rcu();
- }
- rcu_read_unlock();
- }
-
- /* bpf_sk_storage_free() may still need to access the map.
- * e.g. bpf_sk_storage_free() has unlinked selem from the map
- * which then made the above while((selem = ...)) loop
- * exited immediately.
- *
- * However, the bpf_sk_storage_free() still needs to access
- * the smap->elem_size to do the uncharging in
- * __selem_unlink_sk().
- *
- * Hence, wait another rcu grace period for the
- * bpf_sk_storage_free() to finish.
- */
- synchronize_rcu();
-
- kvfree(smap->buckets);
- kfree(map);
-}
-
-/* U16_MAX is much more than enough for sk local storage
- * considering a tcp_sock is ~2k.
- */
-#define MAX_VALUE_SIZE \
- min_t(u32, \
- (KMALLOC_MAX_SIZE - MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem)), \
- (U16_MAX - sizeof(struct bpf_sk_storage_elem)))
-
-static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
+static void sk_storage_map_free(struct bpf_map *map)
{
- if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
- !(attr->map_flags & BPF_F_NO_PREALLOC) ||
- attr->max_entries ||
- attr->key_size != sizeof(int) || !attr->value_size ||
- /* Enforce BTF for userspace sk dumping */
- !attr->btf_key_type_id || !attr->btf_value_type_id)
- return -EINVAL;
-
- if (!bpf_capable())
- return -EPERM;
-
- if (attr->value_size > MAX_VALUE_SIZE)
- return -E2BIG;
+ struct bpf_local_storage_map *smap;
- return 0;
+ smap = (struct bpf_local_storage_map *)map;
+ bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+ bpf_local_storage_map_free(smap);
}
-static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
+static struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr)
{
- struct bpf_sk_storage_map *smap;
- unsigned int i;
- u32 nbuckets;
- u64 cost;
- int ret;
-
- smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
- if (!smap)
- return ERR_PTR(-ENOMEM);
- bpf_map_init_from_attr(&smap->map, attr);
-
- nbuckets = roundup_pow_of_two(num_possible_cpus());
- /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
- nbuckets = max_t(u32, 2, nbuckets);
- smap->bucket_log = ilog2(nbuckets);
- cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
-
- ret = bpf_map_charge_init(&smap->map.memory, cost);
- if (ret < 0) {
- kfree(smap);
- return ERR_PTR(ret);
- }
+ struct bpf_local_storage_map *smap;
- smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
- GFP_USER | __GFP_NOWARN);
- if (!smap->buckets) {
- bpf_map_charge_finish(&smap->map.memory);
- kfree(smap);
- return ERR_PTR(-ENOMEM);
- }
-
- for (i = 0; i < nbuckets; i++) {
- INIT_HLIST_HEAD(&smap->buckets[i].list);
- raw_spin_lock_init(&smap->buckets[i].lock);
- }
-
- smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
- smap->cache_idx = cache_idx_get();
+ smap = bpf_local_storage_map_alloc(attr);
+ if (IS_ERR(smap))
+ return ERR_CAST(smap);
+ smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
return &smap->map;
}
@@ -719,26 +121,9 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
return -ENOTSUPP;
}
-static int bpf_sk_storage_map_check_btf(const struct bpf_map *map,
- const struct btf *btf,
- const struct btf_type *key_type,
- const struct btf_type *value_type)
-{
- u32 int_data;
-
- if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
- return -EINVAL;
-
- int_data = *(u32 *)(key_type + 1);
- if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
- return -EINVAL;
-
- return 0;
-}
-
static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
{
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage_data *sdata;
struct socket *sock;
int fd, err;
@@ -756,14 +141,16 @@ static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags)
{
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage_data *sdata;
struct socket *sock;
int fd, err;
fd = *(int *)key;
sock = sockfd_lookup(fd, &err);
if (sock) {
- sdata = sk_storage_update(sock->sk, map, value, map_flags);
+ sdata = bpf_local_storage_update(
+ sock->sk, (struct bpf_local_storage_map *)map, value,
+ map_flags);
sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -787,14 +174,14 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
return err;
}
-static struct bpf_sk_storage_elem *
+static struct bpf_local_storage_elem *
bpf_sk_storage_clone_elem(struct sock *newsk,
- struct bpf_sk_storage_map *smap,
- struct bpf_sk_storage_elem *selem)
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem)
{
- struct bpf_sk_storage_elem *copy_selem;
+ struct bpf_local_storage_elem *copy_selem;
- copy_selem = selem_alloc(smap, newsk, NULL, true);
+ copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
if (!copy_selem)
return NULL;
@@ -810,9 +197,9 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
{
- struct bpf_sk_storage *new_sk_storage = NULL;
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_elem *selem;
+ struct bpf_local_storage *new_sk_storage = NULL;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_elem *selem;
int ret = 0;
RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
@@ -824,8 +211,8 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
goto out;
hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
- struct bpf_sk_storage_elem *copy_selem;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage_elem *copy_selem;
+ struct bpf_local_storage_map *smap;
struct bpf_map *map;
smap = rcu_dereference(SDATA(selem)->smap);
@@ -833,7 +220,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
continue;
/* Note that for lockless listeners adding new element
- * here can race with cleanup in bpf_sk_storage_map_free.
+ * here can race with cleanup in bpf_local_storage_map_free.
* Try to grab map refcnt to make sure that it's still
* alive and prevent concurrent removal.
*/
@@ -849,10 +236,10 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
}
if (new_sk_storage) {
- selem_link_map(smap, copy_selem);
- __selem_link_sk(new_sk_storage, copy_selem);
+ bpf_selem_link_map(smap, copy_selem);
+ bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
} else {
- ret = sk_storage_alloc(newsk, smap, copy_selem);
+ ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
if (ret) {
kfree(copy_selem);
atomic_sub(smap->elem_size,
@@ -861,7 +248,8 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
goto out;
}
- new_sk_storage = rcu_dereference(copy_selem->sk_storage);
+ new_sk_storage =
+ rcu_dereference(copy_selem->local_storage);
}
bpf_map_put(map);
}
@@ -879,9 +267,9 @@ out:
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage_data *sdata;
- if (flags > BPF_SK_STORAGE_GET_F_CREATE)
+ if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
return (unsigned long)NULL;
sdata = sk_storage_lookup(sk, map, true);
@@ -895,7 +283,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
* destruction).
*/
refcount_inc_not_zero(&sk->sk_refcnt)) {
- sdata = sk_storage_update(sk, map, value, BPF_NOEXIST);
+ sdata = bpf_local_storage_update(
+ sk, (struct bpf_local_storage_map *)map, value,
+ BPF_NOEXIST);
/* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary.
*/
@@ -909,6 +299,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
{
+ if (!sk || !sk_fullsock(sk))
+ return -EINVAL;
+
if (refcount_inc_not_zero(&sk->sk_refcnt)) {
int err;
@@ -920,18 +313,44 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
return -ENOENT;
}
+static int sk_storage_charge(struct bpf_local_storage_map *smap,
+ void *owner, u32 size)
+{
+ return omem_charge(owner, size);
+}
+
+static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
+ void *owner, u32 size)
+{
+ struct sock *sk = owner;
+
+ atomic_sub(size, &sk->sk_omem_alloc);
+}
+
+static struct bpf_local_storage __rcu **
+sk_storage_ptr(void *owner)
+{
+ struct sock *sk = owner;
+
+ return &sk->sk_bpf_storage;
+}
+
static int sk_storage_map_btf_id;
const struct bpf_map_ops sk_storage_map_ops = {
- .map_alloc_check = bpf_sk_storage_map_alloc_check,
- .map_alloc = bpf_sk_storage_map_alloc,
- .map_free = bpf_sk_storage_map_free,
+ .map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = bpf_local_storage_map_alloc_check,
+ .map_alloc = sk_storage_map_alloc,
+ .map_free = sk_storage_map_free,
.map_get_next_key = notsupp_get_next_key,
.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
- .map_check_btf = bpf_sk_storage_map_check_btf,
- .map_btf_name = "bpf_sk_storage_map",
+ .map_check_btf = bpf_local_storage_map_check_btf,
+ .map_btf_name = "bpf_local_storage_map",
.map_btf_id = &sk_storage_map_btf_id,
+ .map_local_storage_charge = sk_storage_charge,
+ .map_local_storage_uncharge = sk_storage_uncharge,
+ .map_owner_storage_ptr = sk_storage_ptr,
};
const struct bpf_func_proto bpf_sk_storage_get_proto = {
@@ -939,7 +358,7 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_SOCKET,
+ .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
@@ -959,7 +378,7 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_SOCKET,
+ .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
};
struct bpf_sk_storage_diag {
@@ -1022,7 +441,7 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
u32 nr_maps = 0;
int rem, err;
- /* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as
+ /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as
* the map_alloc_check() side also does.
*/
if (!bpf_capable())
@@ -1072,13 +491,13 @@ err_free:
}
EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
-static int diag_get(struct bpf_sk_storage_data *sdata, struct sk_buff *skb)
+static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
{
struct nlattr *nla_stg, *nla_value;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage_map *smap;
/* It cannot exceed max nlattr's payload */
- BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < MAX_VALUE_SIZE);
+ BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
if (!nla_stg)
@@ -1114,9 +533,9 @@ static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
{
/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
unsigned int diag_size = nla_total_size(0);
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_elem *selem;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage_map *smap;
struct nlattr *nla_stgs;
unsigned int saved_len;
int err = 0;
@@ -1169,8 +588,8 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
{
/* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
unsigned int diag_size = nla_total_size(0);
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_data *sdata;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_data *sdata;
struct nlattr *nla_stgs;
unsigned int saved_len;
int err = 0;
@@ -1197,8 +616,8 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
saved_len = skb->len;
for (i = 0; i < diag->nr_maps; i++) {
- sdata = __sk_storage_lookup(sk_storage,
- (struct bpf_sk_storage_map *)diag->maps[i],
+ sdata = bpf_local_storage_lookup(sk_storage,
+ (struct bpf_local_storage_map *)diag->maps[i],
false);
if (!sdata)
@@ -1235,19 +654,20 @@ struct bpf_iter_seq_sk_storage_map_info {
unsigned skip_elems;
};
-static struct bpf_sk_storage_elem *
+static struct bpf_local_storage_elem *
bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
- struct bpf_sk_storage_elem *prev_selem)
+ struct bpf_local_storage_elem *prev_selem)
+ __acquires(RCU) __releases(RCU)
{
- struct bpf_sk_storage *sk_storage;
- struct bpf_sk_storage_elem *selem;
+ struct bpf_local_storage *sk_storage;
+ struct bpf_local_storage_elem *selem;
u32 skip_elems = info->skip_elems;
- struct bpf_sk_storage_map *smap;
+ struct bpf_local_storage_map *smap;
u32 bucket_id = info->bucket_id;
u32 i, count, n_buckets;
- struct bucket *b;
+ struct bpf_local_storage_map_bucket *b;
- smap = (struct bpf_sk_storage_map *)info->map;
+ smap = (struct bpf_local_storage_map *)info->map;
n_buckets = 1U << smap->bucket_log;
if (bucket_id >= n_buckets)
return NULL;
@@ -1256,16 +676,16 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
selem = prev_selem;
count = 0;
while (selem) {
- selem = hlist_entry_safe(selem->map_node.next,
- struct bpf_sk_storage_elem, map_node);
+ selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)),
+ struct bpf_local_storage_elem, map_node);
if (!selem) {
/* not found, unlock and go to the next bucket */
b = &smap->buckets[bucket_id++];
- raw_spin_unlock_bh(&b->lock);
+ rcu_read_unlock();
skip_elems = 0;
break;
}
- sk_storage = rcu_dereference_raw(selem->sk_storage);
+ sk_storage = rcu_dereference(selem->local_storage);
if (sk_storage) {
info->skip_elems = skip_elems + count;
return selem;
@@ -1275,10 +695,10 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
b = &smap->buckets[i];
- raw_spin_lock_bh(&b->lock);
+ rcu_read_lock();
count = 0;
- hlist_for_each_entry(selem, &b->list, map_node) {
- sk_storage = rcu_dereference_raw(selem->sk_storage);
+ hlist_for_each_entry_rcu(selem, &b->list, map_node) {
+ sk_storage = rcu_dereference(selem->local_storage);
if (sk_storage && count >= skip_elems) {
info->bucket_id = i;
info->skip_elems = count;
@@ -1286,7 +706,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
}
count++;
}
- raw_spin_unlock_bh(&b->lock);
+ rcu_read_unlock();
skip_elems = 0;
}
@@ -1297,7 +717,7 @@ bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct bpf_sk_storage_elem *selem;
+ struct bpf_local_storage_elem *selem;
selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
if (!selem)
@@ -1330,11 +750,11 @@ DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
void *value)
static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
- struct bpf_sk_storage_elem *selem)
+ struct bpf_local_storage_elem *selem)
{
struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
struct bpf_iter__bpf_sk_storage_map ctx = {};
- struct bpf_sk_storage *sk_storage;
+ struct bpf_local_storage *sk_storage;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
int ret = 0;
@@ -1345,8 +765,8 @@ static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
ctx.meta = &meta;
ctx.map = info->map;
if (selem) {
- sk_storage = rcu_dereference_raw(selem->sk_storage);
- ctx.sk = sk_storage->sk;
+ sk_storage = rcu_dereference(selem->local_storage);
+ ctx.sk = sk_storage->owner;
ctx.value = SDATA(selem)->data;
}
ret = bpf_iter_run_prog(prog, &ctx);
@@ -1361,18 +781,12 @@ static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
}
static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
{
- struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
- struct bpf_sk_storage_map *smap;
- struct bucket *b;
-
- if (!v) {
+ if (!v)
(void)__bpf_sk_storage_map_seq_show(seq, v);
- } else {
- smap = (struct bpf_sk_storage_map *)info->map;
- b = &smap->buckets[info->bucket_id];
- raw_spin_unlock_bh(&b->lock);
- }
+ else
+ rcu_read_unlock();
}
static int bpf_iter_init_sk_storage_map(void *priv_data,
@@ -1437,6 +851,8 @@ static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
.target = "bpf_sk_storage_map",
.attach_target = bpf_iter_attach_map,
.detach_target = bpf_iter_detach_map,
+ .show_fdinfo = bpf_iter_map_show_fdinfo,
+ .fill_link_info = bpf_iter_map_fill_link_info,
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 639745d4f3b9..9fcaa544f11a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -623,10 +623,11 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
while (length && iov_iter_count(from)) {
struct page *pages[MAX_SKB_FRAGS];
+ struct page *last_head = NULL;
size_t start;
ssize_t copied;
unsigned long truesize;
- int n = 0;
+ int refs, n = 0;
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
@@ -649,13 +650,37 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
}
- while (copied) {
+ for (refs = 0; copied != 0; start = 0) {
int size = min_t(int, copied, PAGE_SIZE - start);
- skb_fill_page_desc(skb, frag++, pages[n], start, size);
- start = 0;
+ struct page *head = compound_head(pages[n]);
+
+ start += (pages[n] - head) << PAGE_SHIFT;
copied -= size;
n++;
+ if (frag) {
+ skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
+
+ if (head == skb_frag_page(last) &&
+ start == skb_frag_off(last) + skb_frag_size(last)) {
+ skb_frag_size_add(last, size);
+ /* We combined this page, we need to release
+ * a reference. Since compound pages refcount
+ * is shared among many pages, batch the refcount
+ * adjustments to limit false sharing.
+ */
+ last_head = head;
+ refs++;
+ continue;
+ }
+ }
+ if (refs) {
+ page_ref_sub(last_head, refs);
+ refs = 0;
+ }
+ skb_fill_page_desc(skb, frag++, head, start, size);
}
+ if (refs)
+ page_ref_sub(last_head, refs);
}
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 4086d335978c..751e5264fd49 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -98,6 +98,7 @@
#include <net/busy_poll.h>
#include <linux/rtnetlink.h>
#include <linux/stat.h>
+#include <net/dsa.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
@@ -1130,7 +1131,7 @@ EXPORT_SYMBOL(__dev_get_by_flags);
* @name: name string
*
* Network device names need to be valid file names to
- * to allow sysfs to work. We also disallow any kind of
+ * allow sysfs to work. We also disallow any kind of
* whitespace.
*/
bool dev_valid_name(const char *name)
@@ -4840,6 +4841,21 @@ int netif_rx_ni(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_rx_ni);
+int netif_rx_any_context(struct sk_buff *skb)
+{
+ /*
+ * If invoked from contexts which do not invoke bottom half
+ * processing either at return from interrupt or when softrqs are
+ * reenabled, use netif_rx_ni() which invokes bottomhalf processing
+ * directly.
+ */
+ if (in_interrupt())
+ return netif_rx(skb);
+ else
+ return netif_rx_ni(skb);
+}
+EXPORT_SYMBOL(netif_rx_any_context);
+
static __latent_entropy void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -4914,7 +4930,7 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
static inline struct sk_buff *
sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev)
+ struct net_device *orig_dev, bool *another)
{
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
@@ -4958,7 +4974,11 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
* redirecting to another netdev
*/
__skb_push(skb, skb->mac_len);
- skb_do_redirect(skb);
+ if (skb_do_redirect(skb) == -EAGAIN) {
+ __skb_pull(skb, skb->mac_len);
+ *another = true;
+ break;
+ }
return NULL;
case TC_ACT_CONSUMED:
return NULL;
@@ -5147,7 +5167,12 @@ another_round:
skip_taps:
#ifdef CONFIG_NET_INGRESS
if (static_branch_unlikely(&ingress_needed_key)) {
- skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
+ bool another = false;
+
+ skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
+ &another);
+ if (another)
+ goto another_round;
if (!skb)
goto out;
@@ -5192,7 +5217,7 @@ skip_classify:
}
}
- if (unlikely(skb_vlan_tag_present(skb))) {
+ if (unlikely(skb_vlan_tag_present(skb)) && !netdev_uses_dsa(skb->dev)) {
check_vlan_id:
if (skb_vlan_tag_get_id(skb)) {
/* Vlan id is non 0 and vlan_do_receive() above couldn't
@@ -5441,15 +5466,20 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
if (new) {
u32 i;
+ mutex_lock(&new->aux->used_maps_mutex);
+
/* generic XDP does not work with DEVMAPs that can
* have a bpf_prog installed on an entry
*/
for (i = 0; i < new->aux->used_map_cnt; i++) {
- if (dev_map_can_have_prog(new->aux->used_maps[i]))
- return -EINVAL;
- if (cpu_map_prog_allowed(new->aux->used_maps[i]))
+ if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
+ cpu_map_prog_allowed(new->aux->used_maps[i])) {
+ mutex_unlock(&new->aux->used_maps_mutex);
return -EINVAL;
+ }
}
+
+ mutex_unlock(&new->aux->used_maps_mutex);
}
switch (xdp->command) {
@@ -5621,17 +5651,60 @@ static void flush_backlog(struct work_struct *work)
local_bh_enable();
}
+static bool flush_required(int cpu)
+{
+#if IS_ENABLED(CONFIG_RPS)
+ struct softnet_data *sd = &per_cpu(softnet_data, cpu);
+ bool do_flush;
+
+ local_irq_disable();
+ rps_lock(sd);
+
+ /* as insertion into process_queue happens with the rps lock held,
+ * process_queue access may race only with dequeue
+ */
+ do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
+ !skb_queue_empty_lockless(&sd->process_queue);
+ rps_unlock(sd);
+ local_irq_enable();
+
+ return do_flush;
+#endif
+ /* without RPS we can't safely check input_pkt_queue: during a
+ * concurrent remote skb_queue_splice() we can detect as empty both
+ * input_pkt_queue and process_queue even if the latter could end-up
+ * containing a lot of packets.
+ */
+ return true;
+}
+
static void flush_all_backlogs(void)
{
+ static cpumask_t flush_cpus;
unsigned int cpu;
+ /* since we are under rtnl lock protection we can use static data
+ * for the cpumask and avoid allocating on stack the possibly
+ * large mask
+ */
+ ASSERT_RTNL();
+
get_online_cpus();
- for_each_online_cpu(cpu)
- queue_work_on(cpu, system_highpri_wq,
- per_cpu_ptr(&flush_works, cpu));
+ cpumask_clear(&flush_cpus);
+ for_each_online_cpu(cpu) {
+ if (flush_required(cpu)) {
+ queue_work_on(cpu, system_highpri_wq,
+ per_cpu_ptr(&flush_works, cpu));
+ cpumask_set_cpu(cpu, &flush_cpus);
+ }
+ }
- for_each_online_cpu(cpu)
+ /* we can have in flight packet[s] on the cpus we are not flushing,
+ * synchronize_net() in rollback_registered_many() will take care of
+ * them
+ */
+ for_each_cpu(cpu, &flush_cpus)
flush_work(per_cpu_ptr(&flush_works, cpu));
put_online_cpus();
@@ -6293,7 +6366,7 @@ EXPORT_SYMBOL(__napi_schedule);
* @n: napi context
*
* Test if NAPI routine is already running, and if not mark
- * it as running. This is used as a condition variable
+ * it as running. This is used as a condition variable to
* insure only one NAPI poll instance runs. We also make
* sure there is no pending NAPI disable.
*/
@@ -6533,8 +6606,7 @@ EXPORT_SYMBOL(napi_busy_loop);
static void napi_hash_add(struct napi_struct *napi)
{
- if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
- test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
+ if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state))
return;
spin_lock(&napi_hash_lock);
@@ -6555,20 +6627,14 @@ static void napi_hash_add(struct napi_struct *napi)
/* Warning : caller is responsible to make sure rcu grace period
* is respected before freeing memory containing @napi
*/
-bool napi_hash_del(struct napi_struct *napi)
+static void napi_hash_del(struct napi_struct *napi)
{
- bool rcu_sync_needed = false;
-
spin_lock(&napi_hash_lock);
- if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
- rcu_sync_needed = true;
- hlist_del_rcu(&napi->napi_hash_node);
- }
+ hlist_del_init_rcu(&napi->napi_hash_node);
+
spin_unlock(&napi_hash_lock);
- return rcu_sync_needed;
}
-EXPORT_SYMBOL_GPL(napi_hash_del);
static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
{
@@ -6600,7 +6666,11 @@ static void init_gro_hash(struct napi_struct *napi)
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
+ if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
+ return;
+
INIT_LIST_HEAD(&napi->poll_list);
+ INIT_HLIST_NODE(&napi->napi_hash_node);
hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
napi->timer.function = napi_watchdog;
init_gro_hash(napi);
@@ -6653,18 +6723,19 @@ static void flush_gro_hash(struct napi_struct *napi)
}
/* Must be called in process context */
-void netif_napi_del(struct napi_struct *napi)
+void __netif_napi_del(struct napi_struct *napi)
{
- might_sleep();
- if (napi_hash_del(napi))
- synchronize_net();
- list_del_init(&napi->dev_list);
+ if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
+ return;
+
+ napi_hash_del(napi);
+ list_del_rcu(&napi->dev_list);
napi_free_frags(napi);
flush_gro_hash(napi);
napi->gro_bitmask = 0;
}
-EXPORT_SYMBOL(netif_napi_del);
+EXPORT_SYMBOL(__netif_napi_del);
static int napi_poll(struct napi_struct *n, struct list_head *repoll)
{
@@ -6812,9 +6883,10 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
return NULL;
}
-static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+static int ____netdev_has_upper_dev(struct net_device *upper_dev,
+ struct netdev_nested_priv *priv)
{
- struct net_device *dev = data;
+ struct net_device *dev = (struct net_device *)priv->data;
return upper_dev == dev;
}
@@ -6831,10 +6903,14 @@ static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
bool netdev_has_upper_dev(struct net_device *dev,
struct net_device *upper_dev)
{
+ struct netdev_nested_priv priv = {
+ .data = (void *)upper_dev,
+ };
+
ASSERT_RTNL();
return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
- upper_dev);
+ &priv);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
@@ -6851,8 +6927,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
struct net_device *upper_dev)
{
+ struct netdev_nested_priv priv = {
+ .data = (void *)upper_dev,
+ };
+
return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
- upper_dev);
+ &priv);
}
EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
@@ -6997,8 +7077,8 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
static int __netdev_walk_all_upper_dev(struct net_device *dev,
int (*fn)(struct net_device *dev,
- void *data),
- void *data)
+ struct netdev_nested_priv *priv),
+ struct netdev_nested_priv *priv)
{
struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7010,7 +7090,7 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev,
while (1) {
if (now != dev) {
- ret = fn(now, data);
+ ret = fn(now, priv);
if (ret)
return ret;
}
@@ -7046,8 +7126,8 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev,
int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
int (*fn)(struct net_device *dev,
- void *data),
- void *data)
+ struct netdev_nested_priv *priv),
+ struct netdev_nested_priv *priv)
{
struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7058,7 +7138,7 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
while (1) {
if (now != dev) {
- ret = fn(now, data);
+ ret = fn(now, priv);
if (ret)
return ret;
}
@@ -7094,10 +7174,15 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
static bool __netdev_has_upper_dev(struct net_device *dev,
struct net_device *upper_dev)
{
+ struct netdev_nested_priv priv = {
+ .flags = 0,
+ .data = (void *)upper_dev,
+ };
+
ASSERT_RTNL();
return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
- upper_dev);
+ &priv);
}
/**
@@ -7215,8 +7300,8 @@ static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
int netdev_walk_all_lower_dev(struct net_device *dev,
int (*fn)(struct net_device *dev,
- void *data),
- void *data)
+ struct netdev_nested_priv *priv),
+ struct netdev_nested_priv *priv)
{
struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7227,7 +7312,7 @@ int netdev_walk_all_lower_dev(struct net_device *dev,
while (1) {
if (now != dev) {
- ret = fn(now, data);
+ ret = fn(now, priv);
if (ret)
return ret;
}
@@ -7262,8 +7347,8 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
static int __netdev_walk_all_lower_dev(struct net_device *dev,
int (*fn)(struct net_device *dev,
- void *data),
- void *data)
+ struct netdev_nested_priv *priv),
+ struct netdev_nested_priv *priv)
{
struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7275,7 +7360,7 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev,
while (1) {
if (now != dev) {
- ret = fn(now, data);
+ ret = fn(now, priv);
if (ret)
return ret;
}
@@ -7364,22 +7449,34 @@ static u8 __netdev_lower_depth(struct net_device *dev)
return max_depth;
}
-static int __netdev_update_upper_level(struct net_device *dev, void *data)
+static int __netdev_update_upper_level(struct net_device *dev,
+ struct netdev_nested_priv *__unused)
{
dev->upper_level = __netdev_upper_depth(dev) + 1;
return 0;
}
-static int __netdev_update_lower_level(struct net_device *dev, void *data)
+static int __netdev_update_lower_level(struct net_device *dev,
+ struct netdev_nested_priv *priv)
{
dev->lower_level = __netdev_lower_depth(dev) + 1;
+
+#ifdef CONFIG_LOCKDEP
+ if (!priv)
+ return 0;
+
+ if (priv->flags & NESTED_SYNC_IMM)
+ dev->nested_level = dev->lower_level - 1;
+ if (priv->flags & NESTED_SYNC_TODO)
+ net_unlink_todo(dev);
+#endif
return 0;
}
int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
int (*fn)(struct net_device *dev,
- void *data),
- void *data)
+ struct netdev_nested_priv *priv),
+ struct netdev_nested_priv *priv)
{
struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7390,7 +7487,7 @@ int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
while (1) {
if (now != dev) {
- ret = fn(now, data);
+ ret = fn(now, priv);
if (ret)
return ret;
}
@@ -7650,6 +7747,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
void *upper_priv, void *upper_info,
+ struct netdev_nested_priv *priv,
struct netlink_ext_ack *extack)
{
struct netdev_notifier_changeupper_info changeupper_info = {
@@ -7706,9 +7804,9 @@ static int __netdev_upper_dev_link(struct net_device *dev,
__netdev_update_upper_level(dev, NULL);
__netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
- __netdev_update_lower_level(upper_dev, NULL);
+ __netdev_update_lower_level(upper_dev, priv);
__netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
- NULL);
+ priv);
return 0;
@@ -7733,8 +7831,13 @@ int netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
struct netlink_ext_ack *extack)
{
+ struct netdev_nested_priv priv = {
+ .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
+ .data = NULL,
+ };
+
return __netdev_upper_dev_link(dev, upper_dev, false,
- NULL, NULL, extack);
+ NULL, NULL, &priv, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -7757,21 +7860,19 @@ int netdev_master_upper_dev_link(struct net_device *dev,
void *upper_priv, void *upper_info,
struct netlink_ext_ack *extack)
{
+ struct netdev_nested_priv priv = {
+ .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
+ .data = NULL,
+ };
+
return __netdev_upper_dev_link(dev, upper_dev, true,
- upper_priv, upper_info, extack);
+ upper_priv, upper_info, &priv, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
-/**
- * netdev_upper_dev_unlink - Removes a link to upper device
- * @dev: device
- * @upper_dev: new upper device
- *
- * Removes a link to device which is upper to this one. The caller must hold
- * the RTNL lock.
- */
-void netdev_upper_dev_unlink(struct net_device *dev,
- struct net_device *upper_dev)
+static void __netdev_upper_dev_unlink(struct net_device *dev,
+ struct net_device *upper_dev,
+ struct netdev_nested_priv *priv)
{
struct netdev_notifier_changeupper_info changeupper_info = {
.info = {
@@ -7796,9 +7897,28 @@ void netdev_upper_dev_unlink(struct net_device *dev,
__netdev_update_upper_level(dev, NULL);
__netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
- __netdev_update_lower_level(upper_dev, NULL);
+ __netdev_update_lower_level(upper_dev, priv);
__netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
- NULL);
+ priv);
+}
+
+/**
+ * netdev_upper_dev_unlink - Removes a link to upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Removes a link to device which is upper to this one. The caller must hold
+ * the RTNL lock.
+ */
+void netdev_upper_dev_unlink(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ struct netdev_nested_priv priv = {
+ .flags = NESTED_SYNC_TODO,
+ .data = NULL,
+ };
+
+ __netdev_upper_dev_unlink(dev, upper_dev, &priv);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -7834,6 +7954,10 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev,
struct net_device *dev,
struct netlink_ext_ack *extack)
{
+ struct netdev_nested_priv priv = {
+ .flags = 0,
+ .data = NULL,
+ };
int err;
if (!new_dev)
@@ -7841,8 +7965,8 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev,
if (old_dev && new_dev != old_dev)
netdev_adjacent_dev_disable(dev, old_dev);
-
- err = netdev_upper_dev_link(new_dev, dev, extack);
+ err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv,
+ extack);
if (err) {
if (old_dev && new_dev != old_dev)
netdev_adjacent_dev_enable(dev, old_dev);
@@ -7857,6 +7981,11 @@ void netdev_adjacent_change_commit(struct net_device *old_dev,
struct net_device *new_dev,
struct net_device *dev)
{
+ struct netdev_nested_priv priv = {
+ .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
+ .data = NULL,
+ };
+
if (!new_dev || !old_dev)
return;
@@ -7864,7 +7993,7 @@ void netdev_adjacent_change_commit(struct net_device *old_dev,
return;
netdev_adjacent_dev_enable(dev, old_dev);
- netdev_upper_dev_unlink(old_dev, dev);
+ __netdev_upper_dev_unlink(old_dev, dev, &priv);
}
EXPORT_SYMBOL(netdev_adjacent_change_commit);
@@ -7872,13 +8001,18 @@ void netdev_adjacent_change_abort(struct net_device *old_dev,
struct net_device *new_dev,
struct net_device *dev)
{
+ struct netdev_nested_priv priv = {
+ .flags = 0,
+ .data = NULL,
+ };
+
if (!new_dev)
return;
if (old_dev && new_dev != old_dev)
netdev_adjacent_dev_enable(dev, old_dev);
- netdev_upper_dev_unlink(new_dev, dev);
+ __netdev_upper_dev_unlink(new_dev, dev, &priv);
}
EXPORT_SYMBOL(netdev_adjacent_change_abort);
@@ -8647,7 +8781,7 @@ int dev_get_port_parent_id(struct net_device *dev,
if (!first.id_len)
first = *ppid;
else if (memcmp(&first, ppid, sizeof(*ppid)))
- return -ENODATA;
+ return -EOPNOTSUPP;
}
return err;
@@ -9470,7 +9604,7 @@ int __netdev_update_features(struct net_device *dev)
/* driver might be less strict about feature dependencies */
features = netdev_fix_features(dev, features);
- /* some features can't be enabled if they're off an an upper device */
+ /* some features can't be enabled if they're off on an upper device */
netdev_for_each_upper_dev_rcu(dev, upper, iter)
features = netdev_sync_upper_features(dev, upper, features);
@@ -9974,6 +10108,8 @@ int netdev_refcnt_read(const struct net_device *dev)
}
EXPORT_SYMBOL(netdev_refcnt_read);
+#define WAIT_REFS_MIN_MSECS 1
+#define WAIT_REFS_MAX_MSECS 250
/**
* netdev_wait_allrefs - wait until all references are gone.
* @dev: target net_device
@@ -9989,7 +10125,7 @@ EXPORT_SYMBOL(netdev_refcnt_read);
static void netdev_wait_allrefs(struct net_device *dev)
{
unsigned long rebroadcast_time, warning_time;
- int refcnt;
+ int wait = 0, refcnt;
linkwatch_forget_dev(dev);
@@ -10023,7 +10159,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
rebroadcast_time = jiffies;
}
- msleep(250);
+ if (!wait) {
+ rcu_barrier();
+ wait = WAIT_REFS_MIN_MSECS;
+ } else {
+ msleep(wait);
+ wait = min(wait << 1, WAIT_REFS_MAX_MSECS);
+ }
refcnt = netdev_refcnt_read(dev);
@@ -10062,6 +10204,19 @@ static void netdev_wait_allrefs(struct net_device *dev)
void netdev_run_todo(void)
{
struct list_head list;
+#ifdef CONFIG_LOCKDEP
+ struct list_head unlink_list;
+
+ list_replace_init(&net_unlink_list, &unlink_list);
+
+ while (!list_empty(&unlink_list)) {
+ struct net_device *dev = list_first_entry(&unlink_list,
+ struct net_device,
+ unlink_list);
+ list_del(&dev->unlink_list);
+ dev->nested_level = dev->lower_level - 1;
+ }
+#endif
/* Snapshot list, allow later requests */
list_replace_init(&net_todo_list, &list);
@@ -10173,6 +10328,40 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
}
EXPORT_SYMBOL(dev_get_stats);
+/**
+ * dev_fetch_sw_netstats - get per-cpu network device statistics
+ * @s: place to store stats
+ * @netstats: per-cpu network stats to read from
+ *
+ * Read per-cpu network statistics and populate the related fields in @s.
+ */
+void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
+ const struct pcpu_sw_netstats __percpu *netstats)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ const struct pcpu_sw_netstats *stats;
+ struct pcpu_sw_netstats tmp;
+ unsigned int start;
+
+ stats = per_cpu_ptr(netstats, cpu);
+ do {
+ start = u64_stats_fetch_begin_irq(&stats->syncp);
+ tmp.rx_packets = stats->rx_packets;
+ tmp.rx_bytes = stats->rx_bytes;
+ tmp.tx_packets = stats->tx_packets;
+ tmp.tx_bytes = stats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+ s->rx_packets += tmp.rx_packets;
+ s->rx_bytes += tmp.rx_bytes;
+ s->tx_packets += tmp.tx_packets;
+ s->tx_bytes += tmp.tx_bytes;
+ }
+}
+EXPORT_SYMBOL_GPL(dev_fetch_sw_netstats);
+
struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
{
struct netdev_queue *queue = dev_ingress_queue(dev);
@@ -10274,6 +10463,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_segs = GSO_MAX_SEGS;
dev->upper_level = 1;
dev->lower_level = 1;
+#ifdef CONFIG_LOCKDEP
+ dev->nested_level = 0;
+ INIT_LIST_HEAD(&dev->unlink_list);
+#endif
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 54cd568e7c2f..fa1c37ec40c9 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -700,7 +700,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
* larger.
*/
netif_addr_lock_bh(from);
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
@@ -867,7 +867,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -897,7 +897,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -922,7 +922,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
/* See the above comments inside dev_uc_unsync(). */
netif_addr_lock_bh(from);
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 80ec1cd81c64..a578634052a3 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -27,7 +27,6 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/devlink.h>
-#include <net/drop_monitor.h>
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
@@ -84,6 +83,7 @@ EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
@@ -347,8 +347,12 @@ devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
struct devlink_region {
struct devlink *devlink;
+ struct devlink_port *port;
struct list_head list;
- const struct devlink_region_ops *ops;
+ union {
+ const struct devlink_region_ops *ops;
+ const struct devlink_port_region_ops *port_ops;
+ };
struct list_head snapshot_list;
u32 max_snapshots;
u32 cur_snapshots;
@@ -374,6 +378,19 @@ devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
return NULL;
}
+static struct devlink_region *
+devlink_port_region_get_by_name(struct devlink_port *port,
+ const char *region_name)
+{
+ struct devlink_region *region;
+
+ list_for_each_entry(region, &port->region_list, list)
+ if (!strcmp(region->ops->name, region_name))
+ return region;
+
+ return NULL;
+}
+
static struct devlink_snapshot *
devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
{
@@ -462,10 +479,115 @@ static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
return 0;
}
+struct devlink_reload_combination {
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
+};
+
+static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
+ {
+ /* can't reinitialize driver with no down time */
+ .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ .limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
+ },
+};
+
+static bool
+devlink_reload_combination_is_invalid(enum devlink_reload_action action,
+ enum devlink_reload_limit limit)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
+ if (devlink_reload_invalid_combinations[i].action == action &&
+ devlink_reload_invalid_combinations[i].limit == limit)
+ return true;
+ return false;
+}
+
+static bool
+devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
+{
+ return test_bit(action, &devlink->ops->reload_actions);
+}
+
+static bool
+devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
+{
+ return test_bit(limit, &devlink->ops->reload_limits);
+}
+
+static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
+ enum devlink_reload_limit limit, u32 value)
+{
+ struct nlattr *reload_stats_entry;
+
+ reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
+ if (!reload_stats_entry)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) ||
+ nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+ nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
+ goto nla_put_failure;
+ nla_nest_end(msg, reload_stats_entry);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_entry);
+ return -EMSGSIZE;
+}
+
+static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
+{
+ struct nlattr *reload_stats_attr;
+ int i, j, stat_idx;
+ u32 value;
+
+ if (!is_remote)
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
+ else
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
+
+ if (!reload_stats_attr)
+ return -EMSGSIZE;
+
+ for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+ /* Remote stats are shown even if not locally supported. Stats
+ * of actions with unspecified limit are shown though drivers
+ * don't need to register unspecified limit.
+ */
+ if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+ !devlink_reload_limit_is_supported(devlink, j))
+ continue;
+ for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+ if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) ||
+ i == DEVLINK_RELOAD_ACTION_UNSPEC ||
+ devlink_reload_combination_is_invalid(i, j))
+ continue;
+
+ stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
+ if (!is_remote)
+ value = devlink->stats.reload_stats[stat_idx];
+ else
+ value = devlink->stats.remote_reload_stats[stat_idx];
+ if (devlink_reload_stat_put(msg, i, j, value))
+ goto nla_put_failure;
+ }
+ }
+ nla_nest_end(msg, reload_stats_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_attr);
+ return -EMSGSIZE;
+}
+
static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
enum devlink_command cmd, u32 portid,
u32 seq, int flags)
{
+ struct nlattr *dev_stats;
void *hdr;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -477,9 +599,21 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
goto nla_put_failure;
+ dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
+ if (!dev_stats)
+ goto nla_put_failure;
+
+ if (devlink_reload_stats_put(msg, devlink, false))
+ goto dev_stats_nest_cancel;
+ if (devlink_reload_stats_put(msg, devlink, true))
+ goto dev_stats_nest_cancel;
+
+ nla_nest_end(msg, dev_stats);
genlmsg_end(msg, hdr);
return 0;
+dev_stats_nest_cancel:
+ nla_nest_cancel(msg, dev_stats);
nla_put_failure:
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -523,15 +657,20 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return -EMSGSIZE;
switch (devlink_port->attrs.flavour) {
case DEVLINK_PORT_FLAVOUR_PCI_PF:
- if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
- attrs->pci_pf.pf))
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+ attrs->pci_pf.controller) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_pf.external))
return -EMSGSIZE;
break;
case DEVLINK_PORT_FLAVOUR_PCI_VF:
- if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER,
- attrs->pci_vf.pf) ||
- nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER,
- attrs->pci_vf.vf))
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER,
+ attrs->pci_vf.controller) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) ||
+ nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external))
return -EMSGSIZE;
break;
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
@@ -806,8 +945,6 @@ static int devlink_port_type_set(struct devlink *devlink,
int err;
if (devlink->ops->port_type_set) {
- if (port_type == DEVLINK_PORT_TYPE_NOTSET)
- return -EINVAL;
if (port_type == devlink_port->type)
return 0;
err = devlink->ops->port_type_set(devlink_port, port_type);
@@ -2943,9 +3080,9 @@ static void devlink_reload_netns_change(struct devlink *devlink,
DEVLINK_CMD_PARAM_NEW);
}
-static bool devlink_reload_supported(const struct devlink *devlink)
+static bool devlink_reload_supported(const struct devlink_ops *ops)
{
- return devlink->ops->reload_down && devlink->ops->reload_up;
+ return ops->reload_down && ops->reload_up;
}
static void devlink_reload_failed_set(struct devlink *devlink,
@@ -2963,33 +3100,132 @@ bool devlink_is_reload_failed(const struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+static void
+__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
+ enum devlink_reload_limit limit, u32 actions_performed)
+{
+ unsigned long actions = actions_performed;
+ int stat_idx;
+ int action;
+
+ for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
+ stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
+ reload_stats[stat_idx]++;
+ }
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void
+devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
+ actions_performed);
+}
+
+/**
+ * devlink_remote_reload_actions_performed - Update devlink on reload actions
+ * performed which are not a direct result of devlink reload call.
+ *
+ * This should be called by a driver after performing reload actions in case it was not
+ * a result of devlink reload call. For example fw_activate was performed as a result
+ * of devlink reload triggered fw_activate on another host.
+ * The motivation for this function is to keep data on reload actions performed on this
+ * function whether it was done due to direct devlink reload call or not.
+ *
+ * @devlink: devlink
+ * @limit: reload limit
+ * @actions_performed: bitmask of actions performed
+ */
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+ enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ if (WARN_ON(!actions_performed ||
+ actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
+ limit > DEVLINK_RELOAD_LIMIT_MAX))
+ return;
+
+ __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
+ actions_performed);
+}
+EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
+
static int devlink_reload(struct devlink *devlink, struct net *dest_net,
- struct netlink_ext_ack *extack)
+ enum devlink_reload_action action, enum devlink_reload_limit limit,
+ u32 *actions_performed, struct netlink_ext_ack *extack)
{
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
int err;
if (!devlink->reload_enabled)
return -EOPNOTSUPP;
- err = devlink->ops->reload_down(devlink, !!dest_net, extack);
+ memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats));
+ err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
if (err)
return err;
if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
devlink_reload_netns_change(devlink, dest_net);
- err = devlink->ops->reload_up(devlink, extack);
+ err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
devlink_reload_failed_set(devlink, !!err);
- return err;
+ if (err)
+ return err;
+
+ WARN_ON(!(*actions_performed & BIT(action)));
+ /* Catch driver on updating the remote action within devlink reload */
+ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats)));
+ devlink_reload_stats_update(devlink, limit, *actions_performed);
+ return 0;
+}
+
+static int
+devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
+ enum devlink_command cmd, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
+ actions_performed))
+ goto nla_put_failure;
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return -EMSGSIZE;
}
static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
struct net *dest_net = NULL;
+ u32 actions_performed;
int err;
- if (!devlink_reload_supported(devlink))
+ if (!devlink_reload_supported(devlink->ops))
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -3006,20 +3242,67 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
return PTR_ERR(dest_net);
}
- err = devlink_reload(devlink, dest_net, info->extack);
+ if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+ action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
+ else
+ action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
+
+ if (!devlink_reload_action_is_supported(devlink, action)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested reload action is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+
+ limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
+ if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
+ struct nla_bitfield32 limits;
+ u32 limits_selected;
+
+ limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
+ limits_selected = limits.value & limits.selector;
+ if (!limits_selected) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
+ return -EINVAL;
+ }
+ for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
+ if (limits_selected & BIT(limit))
+ break;
+ /* UAPI enables multiselection, but currently it is not used */
+ if (limits_selected != BIT(limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Multiselection of limit is not supported");
+ return -EOPNOTSUPP;
+ }
+ if (!devlink_reload_limit_is_supported(devlink, limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested limit is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_reload_combination_is_invalid(action, limit)) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Requested limit is invalid for this action");
+ return -EINVAL;
+ }
+ }
+ err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
if (dest_net)
put_net(dest_net);
- return err;
+ if (err)
+ return err;
+ /* For backward compatibility generate reply only if attributes used by user */
+ if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
+ return 0;
+
+ return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
+ DEVLINK_CMD_RELOAD, info);
}
static int devlink_nl_flash_update_fill(struct sk_buff *msg,
struct devlink *devlink,
enum devlink_command cmd,
- const char *status_msg,
- const char *component,
- unsigned long done, unsigned long total)
+ struct devlink_flash_notify *params)
{
void *hdr;
@@ -3033,19 +3316,22 @@ static int devlink_nl_flash_update_fill(struct sk_buff *msg,
if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
goto out;
- if (status_msg &&
+ if (params->status_msg &&
nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
- status_msg))
+ params->status_msg))
goto nla_put_failure;
- if (component &&
+ if (params->component &&
nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
- component))
+ params->component))
goto nla_put_failure;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
- done, DEVLINK_ATTR_PAD))
+ params->done, DEVLINK_ATTR_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
- total, DEVLINK_ATTR_PAD))
+ params->total, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
+ params->timeout, DEVLINK_ATTR_PAD))
goto nla_put_failure;
out:
@@ -3059,10 +3345,7 @@ nla_put_failure:
static void __devlink_flash_update_notify(struct devlink *devlink,
enum devlink_command cmd,
- const char *status_msg,
- const char *component,
- unsigned long done,
- unsigned long total)
+ struct devlink_flash_notify *params)
{
struct sk_buff *msg;
int err;
@@ -3075,8 +3358,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
if (!msg)
return;
- err = devlink_nl_flash_update_fill(msg, devlink, cmd, status_msg,
- component, done, total);
+ err = devlink_nl_flash_update_fill(msg, devlink, cmd, params);
if (err)
goto out_free_msg;
@@ -3090,17 +3372,21 @@ out_free_msg:
void devlink_flash_update_begin_notify(struct devlink *devlink)
{
+ struct devlink_flash_notify params = { 0 };
+
__devlink_flash_update_notify(devlink,
DEVLINK_CMD_FLASH_UPDATE,
- NULL, NULL, 0, 0);
+ &params);
}
EXPORT_SYMBOL_GPL(devlink_flash_update_begin_notify);
void devlink_flash_update_end_notify(struct devlink *devlink)
{
+ struct devlink_flash_notify params = { 0 };
+
__devlink_flash_update_notify(devlink,
DEVLINK_CMD_FLASH_UPDATE_END,
- NULL, NULL, 0, 0);
+ &params);
}
EXPORT_SYMBOL_GPL(devlink_flash_update_end_notify);
@@ -3110,31 +3396,78 @@ void devlink_flash_update_status_notify(struct devlink *devlink,
unsigned long done,
unsigned long total)
{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .done = done,
+ .total = total,
+ };
+
__devlink_flash_update_notify(devlink,
DEVLINK_CMD_FLASH_UPDATE_STATUS,
- status_msg, component, done, total);
+ &params);
}
EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
+void devlink_flash_update_timeout_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long timeout)
+{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .timeout = timeout,
+ };
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ &params);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
+
static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
struct genl_info *info)
{
+ struct nlattr *nla_component, *nla_overwrite_mask;
+ struct devlink_flash_update_params params = {};
struct devlink *devlink = info->user_ptr[0];
- const char *file_name, *component;
- struct nlattr *nla_component;
+ u32 supported_params;
if (!devlink->ops->flash_update)
return -EOPNOTSUPP;
if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME])
return -EINVAL;
- file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]);
+
+ supported_params = devlink->ops->supported_flash_update_params;
+
+ params.file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]);
nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT];
- component = nla_component ? nla_data(nla_component) : NULL;
+ if (nla_component) {
+ if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_COMPONENT)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_component,
+ "component update is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+ params.component = nla_data(nla_component);
+ }
+
+ nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
+ if (nla_overwrite_mask) {
+ struct nla_bitfield32 sections;
+
+ if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
+ "overwrite settings are not supported by this device");
+ return -EOPNOTSUPP;
+ }
+ sections = nla_get_bitfield32(nla_overwrite_mask);
+ params.overwrite_mask = sections.value & sections.selector;
+ }
- return devlink->ops->flash_update(devlink, file_name, component,
- info->extack);
+ return devlink->ops->flash_update(devlink, &params, info->extack);
}
static const struct devlink_param devlink_param_generic[] = {
@@ -3188,6 +3521,11 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME,
.type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -3875,6 +4213,11 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
if (err)
goto nla_put_failure;
+ if (region->port)
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ region->port->index))
+ goto nla_put_failure;
+
err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->ops->name);
if (err)
goto nla_put_failure;
@@ -3922,6 +4265,11 @@ devlink_nl_region_notify_build(struct devlink_region *region,
if (err)
goto out_cancel_msg;
+ if (region->port)
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ region->port->index))
+ goto out_cancel_msg;
+
err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
region->ops->name);
if (err)
@@ -4168,16 +4516,30 @@ static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
+ struct devlink_port *port = NULL;
struct devlink_region *region;
const char *region_name;
struct sk_buff *msg;
+ unsigned int index;
int err;
if (!info->attrs[DEVLINK_ATTR_REGION_NAME])
return -EINVAL;
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
- region = devlink_region_get_by_name(devlink, region_name);
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
if (!region)
return -EINVAL;
@@ -4196,10 +4558,75 @@ static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
+static int devlink_nl_cmd_region_get_port_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb,
+ struct devlink_port *port,
+ int *idx,
+ int start)
+{
+ struct devlink_region *region;
+ int err = 0;
+
+ list_for_each_entry(region, &port->region_list, list) {
+ if (*idx < start) {
+ (*idx)++;
+ continue;
+ }
+ err = devlink_nl_region_fill(msg, port->devlink,
+ DEVLINK_CMD_REGION_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, region);
+ if (err)
+ goto out;
+ (*idx)++;
+ }
+
+out:
+ return err;
+}
+
+static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb,
+ struct devlink *devlink,
+ int *idx,
+ int start)
+{
+ struct devlink_region *region;
+ struct devlink_port *port;
+ int err = 0;
+
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(region, &devlink->region_list, list) {
+ if (*idx < start) {
+ (*idx)++;
+ continue;
+ }
+ err = devlink_nl_region_fill(msg, devlink,
+ DEVLINK_CMD_REGION_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, region);
+ if (err)
+ goto out;
+ (*idx)++;
+ }
+
+ list_for_each_entry(port, &devlink->port_list, list) {
+ err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, idx,
+ start);
+ if (err)
+ goto out;
+ }
+
+out:
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+
static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
- struct devlink_region *region;
struct devlink *devlink;
int start = cb->args[0];
int idx = 0;
@@ -4209,25 +4636,10 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
continue;
-
- mutex_lock(&devlink->lock);
- list_for_each_entry(region, &devlink->region_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_region_fill(msg, devlink,
- DEVLINK_CMD_REGION_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, region);
- if (err) {
- mutex_unlock(&devlink->lock);
- goto out;
- }
- idx++;
- }
- mutex_unlock(&devlink->lock);
+ err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink,
+ &idx, start);
+ if (err)
+ goto out;
}
out:
mutex_unlock(&devlink_mutex);
@@ -4240,8 +4652,10 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_snapshot *snapshot;
+ struct devlink_port *port = NULL;
struct devlink_region *region;
const char *region_name;
+ unsigned int index;
u32 snapshot_id;
if (!info->attrs[DEVLINK_ATTR_REGION_NAME] ||
@@ -4251,7 +4665,19 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb,
region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
- region = devlink_region_get_by_name(devlink, region_name);
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
if (!region)
return -EINVAL;
@@ -4268,9 +4694,11 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
{
struct devlink *devlink = info->user_ptr[0];
struct devlink_snapshot *snapshot;
+ struct devlink_port *port = NULL;
struct nlattr *snapshot_id_attr;
struct devlink_region *region;
const char *region_name;
+ unsigned int index;
u32 snapshot_id;
u8 *data;
int err;
@@ -4281,7 +4709,20 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
}
region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
- region = devlink_region_get_by_name(devlink, region_name);
+
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
if (!region) {
NL_SET_ERR_MSG_MOD(info->extack, "The requested region does not exist");
return -EINVAL;
@@ -4317,7 +4758,12 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
}
}
- err = region->ops->snapshot(devlink, info->extack, &data);
+ if (port)
+ err = region->port_ops->snapshot(port, region->port_ops,
+ info->extack, &data);
+ else
+ err = region->ops->snapshot(devlink, region->ops,
+ info->extack, &data);
if (err)
goto err_snapshot_capture;
@@ -4439,10 +4885,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
u64 ret_offset, start_offset, end_offset = U64_MAX;
struct nlattr **attrs = info->attrs;
+ struct devlink_port *port = NULL;
struct devlink_region *region;
struct nlattr *chunks_attr;
const char *region_name;
struct devlink *devlink;
+ unsigned int index;
void *hdr;
int err;
@@ -4463,8 +4911,21 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto out_unlock;
}
+ if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
+
+ port = devlink_port_get_by_index(devlink, index);
+ if (!port)
+ return -ENODEV;
+ }
+
region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
- region = devlink_region_get_by_name(devlink, region_name);
+
+ if (port)
+ region = devlink_port_region_get_by_name(port, region_name);
+ else
+ region = devlink_region_get_by_name(devlink, region_name);
+
if (!region) {
err = -EINVAL;
goto out_unlock;
@@ -4501,6 +4962,11 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
if (err)
goto nla_put_failure;
+ if (region->port)
+ if (nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX,
+ region->port->index))
+ goto nla_put_failure;
+
err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
if (err)
goto nla_put_failure;
@@ -5895,6 +6361,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
list_for_each_entry(devlink, &devlink_list, list) {
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
continue;
+ mutex_lock(&devlink->lock);
list_for_each_entry(port, &devlink->port_list, list) {
mutex_lock(&port->reporters_lock);
list_for_each_entry(reporter, &port->reporter_list, list) {
@@ -5909,12 +6376,14 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&port->reporters_lock);
+ mutex_unlock(&devlink->lock);
goto out;
}
idx++;
}
mutex_unlock(&port->reporters_lock);
}
+ mutex_unlock(&devlink->lock);
}
out:
mutex_unlock(&devlink_mutex);
@@ -6088,6 +6557,28 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
return 0;
}
+static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->test) {
+ devlink_health_reporter_put(reporter);
+ return -EOPNOTSUPP;
+ }
+
+ err = reporter->ops->test(reporter, info->extack);
+
+ devlink_health_reporter_put(reporter);
+ return err;
+}
+
struct devlink_stats {
u64 rx_bytes;
u64 rx_packets;
@@ -6644,6 +7135,24 @@ __devlink_trap_group_action_set(struct devlink *devlink,
struct devlink_trap_item *trap_item;
int err;
+ if (devlink->ops->trap_group_action_set) {
+ err = devlink->ops->trap_group_action_set(devlink, group_item->group,
+ trap_action, extack);
+ if (err)
+ return err;
+
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (strcmp(trap_item->group_item->group->name, group_name))
+ continue;
+ if (trap_item->action != trap_action &&
+ trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP)
+ continue;
+ trap_item->action = trap_action;
+ }
+
+ return 0;
+ }
+
list_for_each_entry(trap_item, &devlink->trap_list, list) {
if (strcmp(trap_item->group_item->group->name, group_name))
continue;
@@ -7000,7 +7509,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_PORT_TYPE] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO,
+ DEVLINK_PORT_TYPE_IB),
[DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
[DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
[DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
@@ -7009,7 +7519,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
[DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
[DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
- [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY,
+ DEVLINK_ESWITCH_MODE_SWITCHDEV),
[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
@@ -7028,6 +7539,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] =
+ NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS),
[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
@@ -7039,9 +7552,12 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
[DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
[DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
+ [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ DEVLINK_RELOAD_ACTION_MAX),
+ [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK),
};
-static const struct genl_ops devlink_nl_ops[] = {
+static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -7309,6 +7825,14 @@ static const struct genl_ops devlink_nl_ops[] = {
DEVLINK_NL_FLAG_NO_LOCK,
},
{
+ .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = devlink_nl_cmd_health_reporter_test_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT |
+ DEVLINK_NL_FLAG_NO_LOCK,
+ },
+ {
.cmd = DEVLINK_CMD_FLASH_UPDATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_flash_update,
@@ -7358,12 +7882,41 @@ static struct genl_family devlink_nl_family __ro_after_init = {
.pre_doit = devlink_nl_pre_doit,
.post_doit = devlink_nl_post_doit,
.module = THIS_MODULE,
- .ops = devlink_nl_ops,
- .n_ops = ARRAY_SIZE(devlink_nl_ops),
+ .small_ops = devlink_nl_ops,
+ .n_small_ops = ARRAY_SIZE(devlink_nl_ops),
.mcgrps = devlink_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
};
+static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
+{
+ const struct devlink_reload_combination *comb;
+ int i;
+
+ if (!devlink_reload_supported(ops)) {
+ if (WARN_ON(ops->reload_actions))
+ return false;
+ return true;
+ }
+
+ if (WARN_ON(!ops->reload_actions ||
+ ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
+ return false;
+
+ if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
+ ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) {
+ comb = &devlink_reload_invalid_combinations[i];
+ if (ops->reload_actions == BIT(comb->action) &&
+ ops->reload_limits == BIT(comb->limit))
+ return false;
+ }
+ return true;
+}
+
/**
* devlink_alloc - Allocate new devlink instance resources
*
@@ -7380,6 +7933,9 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
if (WARN_ON(!ops))
return NULL;
+ if (!devlink_reload_actions_valid(ops))
+ return NULL;
+
devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
if (!devlink)
return NULL;
@@ -7428,7 +7984,7 @@ EXPORT_SYMBOL_GPL(devlink_register);
void devlink_unregister(struct devlink *devlink)
{
mutex_lock(&devlink_mutex);
- WARN_ON(devlink_reload_supported(devlink) &&
+ WARN_ON(devlink_reload_supported(devlink->ops) &&
devlink->reload_enabled);
devlink_notify(devlink, DEVLINK_CMD_DEL);
list_del(&devlink->list);
@@ -7506,7 +8062,8 @@ static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
{
/* Ignore CPU and DSA flavours. */
return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
- devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA;
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA &&
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_UNUSED;
}
#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
@@ -7555,11 +8112,12 @@ int devlink_port_register(struct devlink *devlink,
devlink_port->index = port_index;
devlink_port->registered = true;
spin_lock_init(&devlink_port->type_lock);
+ INIT_LIST_HEAD(&devlink_port->reporter_list);
+ mutex_init(&devlink_port->reporters_lock);
list_add_tail(&devlink_port->list, &devlink->port_list);
INIT_LIST_HEAD(&devlink_port->param_list);
+ INIT_LIST_HEAD(&devlink_port->region_list);
mutex_unlock(&devlink->lock);
- INIT_LIST_HEAD(&devlink_port->reporter_list);
- mutex_init(&devlink_port->reporters_lock);
INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
devlink_port_type_warn_schedule(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
@@ -7576,13 +8134,14 @@ void devlink_port_unregister(struct devlink_port *devlink_port)
{
struct devlink *devlink = devlink_port->devlink;
- WARN_ON(!list_empty(&devlink_port->reporter_list));
- mutex_destroy(&devlink_port->reporters_lock);
devlink_port_type_warn_cancel(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
mutex_lock(&devlink->lock);
list_del(&devlink_port->list);
mutex_unlock(&devlink->lock);
+ WARN_ON(!list_empty(&devlink_port->reporter_list));
+ WARN_ON(!list_empty(&devlink_port->region_list));
+ mutex_destroy(&devlink_port->reporters_lock);
}
EXPORT_SYMBOL_GPL(devlink_port_unregister);
@@ -7600,14 +8159,8 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
}
-/**
- * devlink_port_type_eth_set - Set port type to Ethernet
- *
- * @devlink_port: devlink port
- * @netdev: related netdevice
- */
-void devlink_port_type_eth_set(struct devlink_port *devlink_port,
- struct net_device *netdev)
+static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port,
+ struct net_device *netdev)
{
const struct net_device_ops *ops = netdev->netdev_ops;
@@ -7641,6 +8194,24 @@ void devlink_port_type_eth_set(struct devlink_port *devlink_port,
err = ops->ndo_get_port_parent_id(netdev, &ppid);
WARN_ON(err != -EOPNOTSUPP);
}
+}
+
+/**
+ * devlink_port_type_eth_set - Set port type to Ethernet
+ *
+ * @devlink_port: devlink port
+ * @netdev: related netdevice
+ */
+void devlink_port_type_eth_set(struct devlink_port *devlink_port,
+ struct net_device *netdev)
+{
+ if (netdev)
+ devlink_port_type_netdev_checks(devlink_port, netdev);
+ else
+ dev_warn(devlink_port->devlink->dev,
+ "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n",
+ devlink_port->index);
+
__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, netdev);
}
EXPORT_SYMBOL_GPL(devlink_port_type_eth_set);
@@ -7712,9 +8283,12 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_set);
* devlink_port_attrs_pci_pf_set - Set PCI PF port attributes
*
* @devlink_port: devlink port
+ * @controller: associated controller number for the devlink port instance
* @pf: associated PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
*/
-void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf)
+void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, bool external)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
@@ -7723,8 +8297,9 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u16 pf)
DEVLINK_PORT_FLAVOUR_PCI_PF);
if (ret)
return;
-
+ attrs->pci_pf.controller = controller;
attrs->pci_pf.pf = pf;
+ attrs->pci_pf.external = external;
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
@@ -7732,11 +8307,13 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set);
* devlink_port_attrs_pci_vf_set - Set PCI VF port attributes
*
* @devlink_port: devlink port
+ * @controller: associated controller number for the devlink port instance
* @pf: associated PF for the devlink port instance
* @vf: associated VF of a PF for the devlink port instance
+ * @external: indicates if the port is for an external controller
*/
-void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
- u16 pf, u16 vf)
+void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
+ u16 pf, u16 vf, bool external)
{
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
@@ -7745,8 +8322,10 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_VF);
if (ret)
return;
+ attrs->pci_vf.controller = controller;
attrs->pci_vf.pf = pf;
attrs->pci_vf.vf = vf;
+ attrs->pci_vf.external = external;
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
@@ -7771,15 +8350,30 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
break;
case DEVLINK_PORT_FLAVOUR_CPU:
case DEVLINK_PORT_FLAVOUR_DSA:
+ case DEVLINK_PORT_FLAVOUR_UNUSED:
/* As CPU and DSA ports do not have a netdevice associated
* case should not ever happen.
*/
WARN_ON(1);
return -EINVAL;
case DEVLINK_PORT_FLAVOUR_PCI_PF:
+ if (attrs->pci_pf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_pf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
n = snprintf(name, len, "pf%u", attrs->pci_pf.pf);
break;
case DEVLINK_PORT_FLAVOUR_PCI_VF:
+ if (attrs->pci_vf.external) {
+ n = snprintf(name, len, "c%u", attrs->pci_vf.controller);
+ if (n >= len)
+ return -EINVAL;
+ len -= n;
+ name += n;
+ }
n = snprintf(name, len, "pf%uvf%u",
attrs->pci_vf.pf, attrs->pci_vf.vf);
break;
@@ -8431,7 +9025,7 @@ __devlink_param_driverinit_value_set(struct devlink *devlink,
int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
union devlink_param_value *init_val)
{
- if (!devlink_reload_supported(devlink))
+ if (!devlink_reload_supported(devlink->ops))
return -EOPNOTSUPP;
return __devlink_param_driverinit_value_get(&devlink->param_list,
@@ -8478,7 +9072,7 @@ int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
{
struct devlink *devlink = devlink_port->devlink;
- if (!devlink_reload_supported(devlink))
+ if (!devlink_reload_supported(devlink->ops))
return -EOPNOTSUPP;
return __devlink_param_driverinit_value_get(&devlink_port->param_list,
@@ -8627,6 +9221,57 @@ unlock:
EXPORT_SYMBOL_GPL(devlink_region_create);
/**
+ * devlink_port_region_create - create a new address region for a port
+ *
+ * @port: devlink port
+ * @ops: region operations and name
+ * @region_max_snapshots: Maximum supported number of snapshots for region
+ * @region_size: size of region
+ */
+struct devlink_region *
+devlink_port_region_create(struct devlink_port *port,
+ const struct devlink_port_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ struct devlink *devlink = port->devlink;
+ struct devlink_region *region;
+ int err = 0;
+
+ if (WARN_ON(!ops) || WARN_ON(!ops->destructor))
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&devlink->lock);
+
+ if (devlink_port_region_get_by_name(port, ops->name)) {
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ region = kzalloc(sizeof(*region), GFP_KERNEL);
+ if (!region) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ region->devlink = devlink;
+ region->port = port;
+ region->max_snapshots = region_max_snapshots;
+ region->port_ops = ops;
+ region->size = region_size;
+ INIT_LIST_HEAD(&region->snapshot_list);
+ list_add_tail(&region->list, &port->region_list);
+ devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+ mutex_unlock(&devlink->lock);
+ return region;
+
+unlock:
+ mutex_unlock(&devlink->lock);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devlink_port_region_create);
+
+/**
* devlink_region_destroy - destroy address region
*
* @region: devlink region to destroy
@@ -8803,6 +9448,22 @@ static const struct devlink_trap devlink_trap_generic[] = {
DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
DEVLINK_TRAP(EARLY_DROP, DROP),
+ DEVLINK_TRAP(VXLAN_PARSING, DROP),
+ DEVLINK_TRAP(LLC_SNAP_PARSING, DROP),
+ DEVLINK_TRAP(VLAN_PARSING, DROP),
+ DEVLINK_TRAP(PPPOE_PPP_PARSING, DROP),
+ DEVLINK_TRAP(MPLS_PARSING, DROP),
+ DEVLINK_TRAP(ARP_PARSING, DROP),
+ DEVLINK_TRAP(IP_1_PARSING, DROP),
+ DEVLINK_TRAP(IP_N_PARSING, DROP),
+ DEVLINK_TRAP(GRE_PARSING, DROP),
+ DEVLINK_TRAP(UDP_PARSING, DROP),
+ DEVLINK_TRAP(TCP_PARSING, DROP),
+ DEVLINK_TRAP(IPSEC_PARSING, DROP),
+ DEVLINK_TRAP(SCTP_PARSING, DROP),
+ DEVLINK_TRAP(DCCP_PARSING, DROP),
+ DEVLINK_TRAP(GTP_PARSING, DROP),
+ DEVLINK_TRAP(ESP_PARSING, DROP),
};
#define DEVLINK_TRAP_GROUP(_id) \
@@ -8837,6 +9498,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = {
DEVLINK_TRAP_GROUP(PTP_GENERAL),
DEVLINK_TRAP_GROUP(ACL_SAMPLE),
DEVLINK_TRAP_GROUP(ACL_TRAP),
+ DEVLINK_TRAP_GROUP(PARSER_ERROR_DROPS),
};
static int devlink_trap_generic_verify(const struct devlink_trap *trap)
@@ -9139,20 +9801,19 @@ devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
}
static void
-devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata,
- const struct devlink_trap_item *trap_item,
- struct devlink_port *in_devlink_port,
- const struct flow_action_cookie *fa_cookie)
+devlink_trap_report_metadata_set(struct devlink_trap_metadata *metadata,
+ const struct devlink_trap_item *trap_item,
+ struct devlink_port *in_devlink_port,
+ const struct flow_action_cookie *fa_cookie)
{
- struct devlink_trap_group_item *group_item = trap_item->group_item;
-
- hw_metadata->trap_group_name = group_item->group->name;
- hw_metadata->trap_name = trap_item->trap->name;
- hw_metadata->fa_cookie = fa_cookie;
+ metadata->trap_name = trap_item->trap->name;
+ metadata->trap_group_name = trap_item->group_item->group->name;
+ metadata->fa_cookie = fa_cookie;
+ metadata->trap_type = trap_item->trap->type;
spin_lock(&in_devlink_port->type_lock);
if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
- hw_metadata->input_dev = in_devlink_port->type_dev;
+ metadata->input_dev = in_devlink_port->type_dev;
spin_unlock(&in_devlink_port->type_lock);
}
@@ -9170,21 +9831,17 @@ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
{
struct devlink_trap_item *trap_item = trap_ctx;
- struct net_dm_hw_metadata hw_metadata = {};
devlink_trap_stats_update(trap_item->stats, skb->len);
devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
- /* Control packets were not dropped by the device or encountered an
- * exception during forwarding and therefore should not be reported to
- * the kernel's drop monitor.
- */
- if (trap_item->trap->type == DEVLINK_TRAP_TYPE_CONTROL)
- return;
+ if (trace_devlink_trap_report_enabled()) {
+ struct devlink_trap_metadata metadata = {};
- devlink_trap_report_metadata_fill(&hw_metadata, trap_item,
- in_devlink_port, fa_cookie);
- net_dm_hw_report(skb, &hw_metadata);
+ devlink_trap_report_metadata_set(&metadata, trap_item,
+ in_devlink_port, fa_cookie);
+ trace_devlink_trap_report(devlink, skb, &metadata);
+ }
}
EXPORT_SYMBOL_GPL(devlink_trap_report);
@@ -9543,6 +10200,7 @@ out:
int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
{
+ struct devlink_flash_update_params params = {};
struct devlink *devlink;
int ret;
@@ -9555,8 +10213,10 @@ int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
goto out;
}
+ params.file_name = file_name;
+
mutex_lock(&devlink->lock);
- ret = devlink->ops->flash_update(devlink, file_name, NULL, NULL);
+ ret = devlink->ops->flash_update(devlink, &params, NULL);
mutex_unlock(&devlink->lock);
out:
@@ -9605,6 +10265,7 @@ int devlink_compat_switch_id_get(struct net_device *dev,
static void __net_exit devlink_pernet_pre_exit(struct net *net)
{
struct devlink *devlink;
+ u32 actions_performed;
int err;
/* In case network namespace is getting destroyed, reload
@@ -9613,9 +10274,12 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
mutex_lock(&devlink_mutex);
list_for_each_entry(devlink, &devlink_list, list) {
if (net_eq(devlink_net(devlink), net)) {
- if (WARN_ON(!devlink_reload_supported(devlink)))
+ if (WARN_ON(!devlink_reload_supported(devlink->ops)))
continue;
- err = devlink_reload(devlink, &init_net, NULL);
+ err = devlink_reload(devlink, &init_net,
+ DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ DEVLINK_RELOAD_LIMIT_UNSPEC,
+ &actions_performed, NULL);
if (err && err != -EOPNOTSUPP)
pr_warn("Failed to reload devlink instance into init_net\n");
}
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9704522b0872..571f191c06d9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -26,13 +26,14 @@
#include <linux/bitops.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <net/drop_monitor.h>
#include <net/genetlink.h>
#include <net/netevent.h>
#include <net/flow_offload.h>
+#include <net/devlink.h>
#include <trace/events/skb.h>
#include <trace/events/napi.h>
+#include <trace/events/devlink.h>
#include <asm/unaligned.h>
@@ -114,13 +115,14 @@ struct net_dm_alert_ops {
int work, int budget);
void (*work_item_func)(struct work_struct *work);
void (*hw_work_item_func)(struct work_struct *work);
- void (*hw_probe)(struct sk_buff *skb,
- const struct net_dm_hw_metadata *hw_metadata);
+ void (*hw_trap_probe)(void *ignore, const struct devlink *devlink,
+ struct sk_buff *skb,
+ const struct devlink_trap_metadata *metadata);
};
struct net_dm_skb_cb {
union {
- struct net_dm_hw_metadata *hw_metadata;
+ struct devlink_trap_metadata *hw_metadata;
void *pc;
};
};
@@ -432,8 +434,9 @@ out:
}
static void
-net_dm_hw_summary_probe(struct sk_buff *skb,
- const struct net_dm_hw_metadata *hw_metadata)
+net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink,
+ struct sk_buff *skb,
+ const struct devlink_trap_metadata *metadata)
{
struct net_dm_hw_entries *hw_entries;
struct net_dm_hw_entry *hw_entry;
@@ -441,6 +444,9 @@ net_dm_hw_summary_probe(struct sk_buff *skb,
unsigned long flags;
int i;
+ if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL)
+ return;
+
hw_data = this_cpu_ptr(&dm_hw_cpu_data);
spin_lock_irqsave(&hw_data->lock, flags);
hw_entries = hw_data->hw_entries;
@@ -450,7 +456,7 @@ net_dm_hw_summary_probe(struct sk_buff *skb,
for (i = 0; i < hw_entries->num_entries; i++) {
hw_entry = &hw_entries->entries[i];
- if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name,
+ if (!strncmp(hw_entry->trap_name, metadata->trap_name,
NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
hw_entry->count++;
goto out;
@@ -460,7 +466,7 @@ net_dm_hw_summary_probe(struct sk_buff *skb,
goto out;
hw_entry = &hw_entries->entries[hw_entries->num_entries];
- strlcpy(hw_entry->trap_name, hw_metadata->trap_name,
+ strlcpy(hw_entry->trap_name, metadata->trap_name,
NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
hw_entry->count = 1;
hw_entries->num_entries++;
@@ -479,7 +485,7 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
.napi_poll_probe = trace_napi_poll_hit,
.work_item_func = send_dm_alert,
.hw_work_item_func = net_dm_hw_summary_work,
- .hw_probe = net_dm_hw_summary_probe,
+ .hw_trap_probe = net_dm_hw_trap_summary_probe,
};
static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
@@ -705,7 +711,7 @@ static void net_dm_packet_work(struct work_struct *work)
}
static size_t
-net_dm_flow_action_cookie_size(const struct net_dm_hw_metadata *hw_metadata)
+net_dm_flow_action_cookie_size(const struct devlink_trap_metadata *hw_metadata)
{
return hw_metadata->fa_cookie ?
nla_total_size(hw_metadata->fa_cookie->cookie_len) : 0;
@@ -713,7 +719,7 @@ net_dm_flow_action_cookie_size(const struct net_dm_hw_metadata *hw_metadata)
static size_t
net_dm_hw_packet_report_size(size_t payload_len,
- const struct net_dm_hw_metadata *hw_metadata)
+ const struct devlink_trap_metadata *hw_metadata)
{
size_t size;
@@ -743,7 +749,7 @@ net_dm_hw_packet_report_size(size_t payload_len,
static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
struct sk_buff *skb, size_t payload_len)
{
- struct net_dm_hw_metadata *hw_metadata;
+ struct devlink_trap_metadata *hw_metadata;
struct nlattr *attr;
void *hdr;
@@ -810,56 +816,56 @@ nla_put_failure:
return -EMSGSIZE;
}
-static struct net_dm_hw_metadata *
-net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
+static struct devlink_trap_metadata *
+net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
{
const struct flow_action_cookie *fa_cookie;
- struct net_dm_hw_metadata *n_hw_metadata;
+ struct devlink_trap_metadata *hw_metadata;
const char *trap_group_name;
const char *trap_name;
- n_hw_metadata = kzalloc(sizeof(*hw_metadata), GFP_ATOMIC);
- if (!n_hw_metadata)
+ hw_metadata = kzalloc(sizeof(*hw_metadata), GFP_ATOMIC);
+ if (!hw_metadata)
return NULL;
- trap_group_name = kstrdup(hw_metadata->trap_group_name, GFP_ATOMIC);
+ trap_group_name = kstrdup(metadata->trap_group_name, GFP_ATOMIC);
if (!trap_group_name)
goto free_hw_metadata;
- n_hw_metadata->trap_group_name = trap_group_name;
+ hw_metadata->trap_group_name = trap_group_name;
- trap_name = kstrdup(hw_metadata->trap_name, GFP_ATOMIC);
+ trap_name = kstrdup(metadata->trap_name, GFP_ATOMIC);
if (!trap_name)
goto free_trap_group;
- n_hw_metadata->trap_name = trap_name;
+ hw_metadata->trap_name = trap_name;
- if (hw_metadata->fa_cookie) {
+ if (metadata->fa_cookie) {
size_t cookie_size = sizeof(*fa_cookie) +
- hw_metadata->fa_cookie->cookie_len;
+ metadata->fa_cookie->cookie_len;
- fa_cookie = kmemdup(hw_metadata->fa_cookie, cookie_size,
+ fa_cookie = kmemdup(metadata->fa_cookie, cookie_size,
GFP_ATOMIC);
if (!fa_cookie)
goto free_trap_name;
- n_hw_metadata->fa_cookie = fa_cookie;
+ hw_metadata->fa_cookie = fa_cookie;
}
- n_hw_metadata->input_dev = hw_metadata->input_dev;
- if (n_hw_metadata->input_dev)
- dev_hold(n_hw_metadata->input_dev);
+ hw_metadata->input_dev = metadata->input_dev;
+ if (hw_metadata->input_dev)
+ dev_hold(hw_metadata->input_dev);
- return n_hw_metadata;
+ return hw_metadata;
free_trap_name:
kfree(trap_name);
free_trap_group:
kfree(trap_group_name);
free_hw_metadata:
- kfree(n_hw_metadata);
+ kfree(hw_metadata);
return NULL;
}
static void
-net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata)
+net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata)
{
if (hw_metadata->input_dev)
dev_put(hw_metadata->input_dev);
@@ -871,7 +877,7 @@ net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata)
static void net_dm_hw_packet_report(struct sk_buff *skb)
{
- struct net_dm_hw_metadata *hw_metadata;
+ struct devlink_trap_metadata *hw_metadata;
struct sk_buff *msg;
size_t payload_len;
int rc;
@@ -924,15 +930,19 @@ static void net_dm_hw_packet_work(struct work_struct *work)
}
static void
-net_dm_hw_packet_probe(struct sk_buff *skb,
- const struct net_dm_hw_metadata *hw_metadata)
+net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink,
+ struct sk_buff *skb,
+ const struct devlink_trap_metadata *metadata)
{
- struct net_dm_hw_metadata *n_hw_metadata;
+ struct devlink_trap_metadata *n_hw_metadata;
ktime_t tstamp = ktime_get_real();
struct per_cpu_dm_data *hw_data;
struct sk_buff *nskb;
unsigned long flags;
+ if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL)
+ return;
+
if (!skb_mac_header_was_set(skb))
return;
@@ -940,7 +950,7 @@ net_dm_hw_packet_probe(struct sk_buff *skb,
if (!nskb)
return;
- n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata);
+ n_hw_metadata = net_dm_hw_metadata_copy(metadata);
if (!n_hw_metadata)
goto free;
@@ -975,7 +985,7 @@ static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
.napi_poll_probe = net_dm_packet_trace_napi_poll_hit,
.work_item_func = net_dm_packet_work,
.hw_work_item_func = net_dm_hw_packet_work,
- .hw_probe = net_dm_hw_packet_probe,
+ .hw_trap_probe = net_dm_hw_trap_packet_probe,
};
static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
@@ -983,25 +993,32 @@ static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
[NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops,
};
-void net_dm_hw_report(struct sk_buff *skb,
- const struct net_dm_hw_metadata *hw_metadata)
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops)
{
- rcu_read_lock();
-
- if (!monitor_hw)
- goto out;
+ return register_trace_devlink_trap_report(ops->hw_trap_probe, NULL);
+}
- net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata);
+static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops)
+{
+ unregister_trace_devlink_trap_report(ops->hw_trap_probe, NULL);
+ tracepoint_synchronize_unregister();
+}
+#else
+static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops)
+{
+ return -EOPNOTSUPP;
+}
-out:
- rcu_read_unlock();
+static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops)
+{
}
-EXPORT_SYMBOL_GPL(net_dm_hw_report);
+#endif
static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
{
const struct net_dm_alert_ops *ops;
- int cpu;
+ int cpu, rc;
if (monitor_hw) {
NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
@@ -1025,13 +1042,24 @@ static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
kfree(hw_entries);
}
+ rc = net_dm_hw_probe_register(ops);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to devlink_trap_probe() tracepoint");
+ goto err_module_put;
+ }
+
monitor_hw = true;
return 0;
+
+err_module_put:
+ module_put(THIS_MODULE);
+ return rc;
}
static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
{
+ const struct net_dm_alert_ops *ops;
int cpu;
if (!monitor_hw) {
@@ -1039,12 +1067,11 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
return;
}
+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
monitor_hw = false;
- /* After this call returns we are guaranteed that no CPU is processing
- * any hardware drops.
- */
- synchronize_rcu();
+ net_dm_hw_probe_unregister(ops);
for_each_possible_cpu(cpu) {
struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
@@ -1053,7 +1080,7 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
del_timer_sync(&hw_data->send_timer);
cancel_work_sync(&hw_data->dm_alert_work);
while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
- struct net_dm_hw_metadata *hw_metadata;
+ struct devlink_trap_metadata *hw_metadata;
hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
net_dm_hw_metadata_free(hw_metadata);
@@ -1548,7 +1575,7 @@ static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
[NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG },
};
-static const struct genl_ops dropmon_ops[] = {
+static const struct genl_small_ops dropmon_ops[] = {
{
.cmd = NET_DM_CMD_CONFIG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
@@ -1598,8 +1625,8 @@ static struct genl_family net_drop_monitor_family __ro_after_init = {
.pre_doit = net_dm_nl_pre_doit,
.post_doit = net_dm_nl_post_doit,
.module = THIS_MODULE,
- .ops = dropmon_ops,
- .n_ops = ARRAY_SIZE(dropmon_ops),
+ .small_ops = dropmon_ops,
+ .n_small_ops = ARRAY_SIZE(dropmon_ops),
.mcgrps = dropmon_mcgrps,
.n_mcgrps = ARRAY_SIZE(dropmon_mcgrps),
};
diff --git a/net/core/dst.c b/net/core/dst.c
index d6b6ced0d451..0c01bd8d9d81 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -144,7 +144,7 @@ static void dst_destroy_rcu(struct rcu_head *head)
/* Operations to mark dst as DEAD and clean up the net device referenced
* by dst:
- * 1. put the dst under loopback interface and discard all tx/rx packets
+ * 1. put the dst under blackhole interface and discard all tx/rx packets
* on this route.
* 2. release the net_device
* This function should be called when removing routes from the fib tree
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 51678a528f85..7bcfb16854cb 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -16,7 +16,7 @@
#include <net/ip_tunnels.h>
#include <linux/indirect_call_wrapper.h>
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#if defined(CONFIG_IPV6) && defined(CONFIG_IPV6_MULTIPLE_TABLES)
#ifdef CONFIG_IP_MULTIPLE_TABLES
#define INDIRECT_CALL_MT(f, f2, f1, ...) \
INDIRECT_CALL_INET(f, f2, f1, __VA_ARGS__)
diff --git a/net/core/filter.c b/net/core/filter.c
index 1f647ab986b6..c5e2a1c5fd8d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -76,6 +76,10 @@
#include <net/bpf_sk_storage.h>
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
+#include <net/tls.h>
+
+static const struct bpf_func_proto *
+bpf_sk_base_func_proto(enum bpf_func_id func_id);
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
{
@@ -2160,13 +2164,234 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
return __bpf_redirect_no_mac(skb, dev, flags);
}
+#if IS_ENABLED(CONFIG_IPV6)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst->dev;
+ u32 hh_len = LL_RESERVED_SPACE(dev);
+ const struct in6_addr *nexthop;
+ struct neighbour *neigh;
+
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+ goto out_drop;
+ }
+
+ skb->dev = dev;
+ skb->tstamp = 0;
+
+ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_realloc_headroom(skb, hh_len);
+ if (unlikely(!skb2)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ consume_skb(skb);
+ skb = skb2;
+ }
+
+ rcu_read_lock_bh();
+ nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+ &ipv6_hdr(skb)->daddr);
+ neigh = ip_neigh_gw6(dev, nexthop);
+ if (likely(!IS_ERR(neigh))) {
+ int ret;
+
+ sock_confirm_neigh(skb, neigh);
+ dev_xmit_recursion_inc();
+ ret = neigh_output(neigh, skb, false);
+ dev_xmit_recursion_dec();
+ rcu_read_unlock_bh();
+ return ret;
+ }
+ rcu_read_unlock_bh();
+ IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+out_drop:
+ kfree_skb(skb);
+ return -ENETDOWN;
+}
+
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct net *net = dev_net(dev);
+ int err, ret = NET_XMIT_DROP;
+ struct dst_entry *dst;
+ struct flowi6 fl6 = {
+ .flowi6_flags = FLOWI_FLAG_ANYSRC,
+ .flowi6_mark = skb->mark,
+ .flowlabel = ip6_flowinfo(ip6h),
+ .flowi6_oif = dev->ifindex,
+ .flowi6_proto = ip6h->nexthdr,
+ .daddr = ip6h->daddr,
+ .saddr = ip6h->saddr,
+ };
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+ if (IS_ERR(dst))
+ goto out_drop;
+
+ skb_dst_set(skb, dst);
+
+ err = bpf_out_neigh_v6(net, skb);
+ if (unlikely(net_xmit_eval(err)))
+ dev->stats.tx_errors++;
+ else
+ ret = NET_XMIT_SUCCESS;
+ goto out_xmit;
+out_drop:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+out_xmit:
+ return ret;
+}
+#else
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+#endif /* CONFIG_IPV6 */
+
+#if IS_ENABLED(CONFIG_INET)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct rtable *rt = container_of(dst, struct rtable, dst);
+ struct net_device *dev = dst->dev;
+ u32 hh_len = LL_RESERVED_SPACE(dev);
+ struct neighbour *neigh;
+ bool is_v6gw = false;
+
+ if (dev_xmit_recursion()) {
+ net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
+ goto out_drop;
+ }
+
+ skb->dev = dev;
+ skb->tstamp = 0;
+
+ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_realloc_headroom(skb, hh_len);
+ if (unlikely(!skb2)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ consume_skb(skb);
+ skb = skb2;
+ }
+
+ rcu_read_lock_bh();
+ neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+ if (likely(!IS_ERR(neigh))) {
+ int ret;
+
+ sock_confirm_neigh(skb, neigh);
+ dev_xmit_recursion_inc();
+ ret = neigh_output(neigh, skb, is_v6gw);
+ dev_xmit_recursion_dec();
+ rcu_read_unlock_bh();
+ return ret;
+ }
+ rcu_read_unlock_bh();
+out_drop:
+ kfree_skb(skb);
+ return -ENETDOWN;
+}
+
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+{
+ const struct iphdr *ip4h = ip_hdr(skb);
+ struct net *net = dev_net(dev);
+ int err, ret = NET_XMIT_DROP;
+ struct rtable *rt;
+ struct flowi4 fl4 = {
+ .flowi4_flags = FLOWI_FLAG_ANYSRC,
+ .flowi4_mark = skb->mark,
+ .flowi4_tos = RT_TOS(ip4h->tos),
+ .flowi4_oif = dev->ifindex,
+ .flowi4_proto = ip4h->protocol,
+ .daddr = ip4h->daddr,
+ .saddr = ip4h->saddr,
+ };
+
+ rt = ip_route_output_flow(net, &fl4, NULL);
+ if (IS_ERR(rt))
+ goto out_drop;
+ if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+ ip_rt_put(rt);
+ goto out_drop;
+ }
+
+ skb_dst_set(skb, &rt->dst);
+
+ err = bpf_out_neigh_v4(net, skb);
+ if (unlikely(net_xmit_eval(err)))
+ dev->stats.tx_errors++;
+ else
+ ret = NET_XMIT_SUCCESS;
+ goto out_xmit;
+out_drop:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+out_xmit:
+ return ret;
+}
+#else
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+#endif /* CONFIG_INET */
+
+static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ethhdr *ethh = eth_hdr(skb);
+
+ if (unlikely(skb->mac_header >= skb->network_header))
+ goto out;
+ bpf_push_mac_rcsum(skb);
+ if (is_multicast_ether_addr(ethh->h_dest))
+ goto out;
+
+ skb_pull(skb, sizeof(*ethh));
+ skb_unset_mac_header(skb);
+ skb_reset_network_header(skb);
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return __bpf_redirect_neigh_v4(skb, dev);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return __bpf_redirect_neigh_v6(skb, dev);
+out:
+ kfree_skb(skb);
+ return -ENOTSUPP;
+}
+
+/* Internal, non-exposed redirect flags. */
+enum {
+ BPF_F_NEIGH = (1ULL << 1),
+ BPF_F_PEER = (1ULL << 2),
+#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER)
+};
+
BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
{
struct net_device *dev;
struct sk_buff *clone;
int ret;
- if (unlikely(flags & ~(BPF_F_INGRESS)))
+ if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
return -EINVAL;
dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
@@ -2203,11 +2428,45 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
+int skb_do_redirect(struct sk_buff *skb)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct net *net = dev_net(skb->dev);
+ struct net_device *dev;
+ u32 flags = ri->flags;
+
+ dev = dev_get_by_index_rcu(net, ri->tgt_index);
+ ri->tgt_index = 0;
+ ri->flags = 0;
+ if (unlikely(!dev))
+ goto out_drop;
+ if (flags & BPF_F_PEER) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (unlikely(!ops->ndo_get_peer_dev ||
+ !skb_at_tc_ingress(skb)))
+ goto out_drop;
+ dev = ops->ndo_get_peer_dev(dev);
+ if (unlikely(!dev ||
+ !is_skb_forwardable(dev, skb) ||
+ net_eq(net, dev_net(dev))))
+ goto out_drop;
+ skb->dev = dev;
+ return -EAGAIN;
+ }
+ return flags & BPF_F_NEIGH ?
+ __bpf_redirect_neigh(skb, dev) :
+ __bpf_redirect(skb, dev, flags);
+out_drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- if (unlikely(flags & ~(BPF_F_INGRESS)))
+ if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
return TC_ACT_SHOT;
ri->flags = flags;
@@ -2216,29 +2475,56 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
return TC_ACT_REDIRECT;
}
-int skb_do_redirect(struct sk_buff *skb)
+static const struct bpf_func_proto bpf_redirect_proto = {
+ .func = bpf_redirect,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- struct net_device *dev;
- dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
- ri->tgt_index = 0;
- if (unlikely(!dev)) {
- kfree_skb(skb);
- return -EINVAL;
- }
+ if (unlikely(flags))
+ return TC_ACT_SHOT;
+
+ ri->flags = BPF_F_PEER;
+ ri->tgt_index = ifindex;
- return __bpf_redirect(skb, dev, ri->flags);
+ return TC_ACT_REDIRECT;
}
-static const struct bpf_func_proto bpf_redirect_proto = {
- .func = bpf_redirect,
+static const struct bpf_func_proto bpf_redirect_peer_proto = {
+ .func = bpf_redirect_peer,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ if (unlikely(flags))
+ return TC_ACT_SHOT;
+
+ ri->flags = BPF_F_NEIGH;
+ ri->tgt_index = ifindex;
+
+ return TC_ACT_REDIRECT;
+}
+
+static const struct bpf_func_proto bpf_redirect_neigh_proto = {
+ .func = bpf_redirect_neigh,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
{
msg->apply_bytes = bytes;
@@ -2704,6 +2990,23 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
};
+
+BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
+{
+ struct sock *sk = skb_to_full_sk(skb);
+
+ if (!sk || !sk_fullsock(sk))
+ return 0;
+
+ return sock_cgroup_classid(&sk->sk_cgrp_data);
+}
+
+static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
+ .func = bpf_skb_cgroup_classid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
#endif
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
@@ -3215,6 +3518,48 @@ static u32 __bpf_skb_max_len(const struct sk_buff *skb)
SKB_MAX_ALLOC;
}
+BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+ u32, mode, u64, flags)
+{
+ u32 len_diff_abs = abs(len_diff);
+ bool shrink = len_diff < 0;
+ int ret = 0;
+
+ if (unlikely(flags || mode))
+ return -EINVAL;
+ if (unlikely(len_diff_abs > 0xfffU))
+ return -EFAULT;
+
+ if (!shrink) {
+ ret = skb_cow(skb, len_diff);
+ if (unlikely(ret < 0))
+ return ret;
+ __skb_push(skb, len_diff_abs);
+ memset(skb->data, 0, len_diff_abs);
+ } else {
+ if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
+ return -ENOMEM;
+ __skb_pull(skb, len_diff_abs);
+ }
+ bpf_compute_data_end_sk_skb(skb);
+ if (tls_sw_has_ctx_rx(skb->sk)) {
+ struct strp_msg *rxm = strp_msg(skb);
+
+ rxm->full_len += len_diff;
+ }
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_adjust_room_proto = {
+ .func = sk_skb_adjust_room,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
u32, mode, u64, flags)
{
@@ -3803,19 +4148,18 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-BTF_ID_LIST(bpf_skb_output_btf_ids)
-BTF_ID(struct, sk_buff)
+BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)
const struct bpf_func_proto bpf_skb_output_proto = {
.func = bpf_skb_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_skb_output_btf_ids[0],
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_skb_output_btf_ids,
};
static unsigned short bpf_tunnel_key_af(u64 flags)
@@ -4086,18 +4430,17 @@ static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
{
struct cgroup *cgrp;
+ sk = sk_to_full_sk(sk);
+ if (!sk || !sk_fullsock(sk))
+ return 0;
+
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
return cgroup_id(cgrp);
}
BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
{
- struct sock *sk = skb_to_full_sk(skb);
-
- if (!sk || !sk_fullsock(sk))
- return 0;
-
- return __bpf_sk_cgroup_id(sk);
+ return __bpf_sk_cgroup_id(skb->sk);
}
static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
@@ -4113,6 +4456,10 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
struct cgroup *ancestor;
struct cgroup *cgrp;
+ sk = sk_to_full_sk(sk);
+ if (!sk || !sk_fullsock(sk))
+ return 0;
+
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
ancestor = cgroup_ancestor(cgrp, ancestor_level);
if (!ancestor)
@@ -4124,12 +4471,7 @@ static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
ancestor_level)
{
- struct sock *sk = skb_to_full_sk(skb);
-
- if (!sk || !sk_fullsock(sk))
- return 0;
-
- return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
+ return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level);
}
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
@@ -4149,7 +4491,7 @@ static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
.func = bpf_sk_cgroup_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_SOCKET,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
};
BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
@@ -4161,7 +4503,7 @@ static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
.func = bpf_sk_ancestor_cgroup_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_SOCKET,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg2_type = ARG_ANYTHING,
};
#endif
@@ -4199,24 +4541,23 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-BTF_ID_LIST(bpf_xdp_output_btf_ids)
-BTF_ID(struct, xdp_buff)
+BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff)
const struct bpf_func_proto bpf_xdp_output_proto = {
.func = bpf_xdp_event_output,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &bpf_xdp_output_btf_ids[0],
.arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_MEM,
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
- .btf_id = bpf_xdp_output_btf_ids,
};
BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
{
- return skb->sk ? sock_gen_cookie(skb->sk) : 0;
+ return skb->sk ? __sock_gen_cookie(skb->sk) : 0;
}
static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
@@ -4228,7 +4569,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
{
- return sock_gen_cookie(ctx->sk);
+ return __sock_gen_cookie(ctx->sk);
}
static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
@@ -4240,7 +4581,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
{
- return sock_gen_cookie(ctx);
+ return __sock_gen_cookie(ctx);
}
static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
@@ -4252,7 +4593,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
{
- return sock_gen_cookie(ctx->sk);
+ return __sock_gen_cookie(ctx->sk);
}
static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
@@ -4265,7 +4606,7 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
static u64 __bpf_get_netns_cookie(struct sock *sk)
{
#ifdef CONFIG_NET_NS
- return net_gen_cookie(sk ? sk->sk_net.net : &init_net);
+ return __net_gen_cookie(sk ? sk->sk_net.net : &init_net);
#else
return 0;
#endif
@@ -4313,10 +4654,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-#define SOCKOPT_CC_REINIT (1 << 0)
-
static int _bpf_setsockopt(struct sock *sk, int level, int optname,
- char *optval, int optlen, u32 flags)
+ char *optval, int optlen)
{
char devname[IFNAMSIZ];
int val, valbool;
@@ -4449,16 +4788,15 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sk->sk_prot->setsockopt == tcp_setsockopt) {
if (optname == TCP_CONGESTION) {
char name[TCP_CA_NAME_MAX];
- bool reinit = flags & SOCKOPT_CC_REINIT;
strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
- ret = tcp_set_congestion_control(sk, name, false,
- reinit, true);
+ ret = tcp_set_congestion_control(sk, name, false, true);
} else {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long timeout;
if (optlen != sizeof(int))
return -EINVAL;
@@ -4480,6 +4818,20 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
tp->snd_ssthresh = val;
}
break;
+ case TCP_BPF_DELACK_MAX:
+ timeout = usecs_to_jiffies(val);
+ if (timeout > TCP_DELACK_MAX ||
+ timeout < TCP_TIMEOUT_MIN)
+ return -EINVAL;
+ inet_csk(sk)->icsk_delack_max = timeout;
+ break;
+ case TCP_BPF_RTO_MIN:
+ timeout = usecs_to_jiffies(val);
+ if (timeout > TCP_RTO_MIN ||
+ timeout < TCP_TIMEOUT_MIN)
+ return -EINVAL;
+ inet_csk(sk)->icsk_rto_min = timeout;
+ break;
case TCP_SAVE_SYN:
if (val < 0 || val > 1)
ret = -EINVAL;
@@ -4513,6 +4865,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
else
icsk->icsk_user_timeout = val;
break;
+ case TCP_NOTSENT_LOWAT:
+ tp->notsent_lowat = val;
+ sk->sk_write_space(sk);
+ break;
default:
ret = -EINVAL;
}
@@ -4550,9 +4906,9 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
tp = tcp_sk(sk);
if (optlen <= 0 || !tp->saved_syn ||
- optlen > tp->saved_syn[0])
+ optlen > tcp_saved_syn_len(tp->saved_syn))
goto err_clear;
- memcpy(optval, tp->saved_syn + 1, optlen);
+ memcpy(optval, tp->saved_syn->data, optlen);
break;
default:
goto err_clear;
@@ -4600,9 +4956,7 @@ err_clear:
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
int, level, int, optname, char *, optval, int, optlen)
{
- u32 flags = 0;
- return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen,
- flags);
+ return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen);
}
static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
@@ -4636,11 +4990,7 @@ static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
- u32 flags = 0;
- if (bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
- flags |= SOCKOPT_CC_REINIT;
- return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen,
- flags);
+ return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
}
static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
@@ -4654,9 +5004,99 @@ static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
+ int optname, const u8 **start)
+{
+ struct sk_buff *syn_skb = bpf_sock->syn_skb;
+ const u8 *hdr_start;
+ int ret;
+
+ if (syn_skb) {
+ /* sk is a request_sock here */
+
+ if (optname == TCP_BPF_SYN) {
+ hdr_start = syn_skb->data;
+ ret = tcp_hdrlen(syn_skb);
+ } else if (optname == TCP_BPF_SYN_IP) {
+ hdr_start = skb_network_header(syn_skb);
+ ret = skb_network_header_len(syn_skb) +
+ tcp_hdrlen(syn_skb);
+ } else {
+ /* optname == TCP_BPF_SYN_MAC */
+ hdr_start = skb_mac_header(syn_skb);
+ ret = skb_mac_header_len(syn_skb) +
+ skb_network_header_len(syn_skb) +
+ tcp_hdrlen(syn_skb);
+ }
+ } else {
+ struct sock *sk = bpf_sock->sk;
+ struct saved_syn *saved_syn;
+
+ if (sk->sk_state == TCP_NEW_SYN_RECV)
+ /* synack retransmit. bpf_sock->syn_skb will
+ * not be available. It has to resort to
+ * saved_syn (if it is saved).
+ */
+ saved_syn = inet_reqsk(sk)->saved_syn;
+ else
+ saved_syn = tcp_sk(sk)->saved_syn;
+
+ if (!saved_syn)
+ return -ENOENT;
+
+ if (optname == TCP_BPF_SYN) {
+ hdr_start = saved_syn->data +
+ saved_syn->mac_hdrlen +
+ saved_syn->network_hdrlen;
+ ret = saved_syn->tcp_hdrlen;
+ } else if (optname == TCP_BPF_SYN_IP) {
+ hdr_start = saved_syn->data +
+ saved_syn->mac_hdrlen;
+ ret = saved_syn->network_hdrlen +
+ saved_syn->tcp_hdrlen;
+ } else {
+ /* optname == TCP_BPF_SYN_MAC */
+
+ /* TCP_SAVE_SYN may not have saved the mac hdr */
+ if (!saved_syn->mac_hdrlen)
+ return -ENOENT;
+
+ hdr_start = saved_syn->data;
+ ret = saved_syn->mac_hdrlen +
+ saved_syn->network_hdrlen +
+ saved_syn->tcp_hdrlen;
+ }
+ }
+
+ *start = hdr_start;
+ return ret;
+}
+
BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
+ if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
+ optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
+ int ret, copy_len = 0;
+ const u8 *start;
+
+ ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start);
+ if (ret > 0) {
+ copy_len = ret;
+ if (optlen < copy_len) {
+ copy_len = optlen;
+ ret = -ENOSPC;
+ }
+
+ memcpy(optval, start, copy_len);
+ }
+
+ /* Zero out unused buffer at the end */
+ memset(optval + copy_len, 0, optlen - copy_len);
+
+ return ret;
+ }
+
return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
}
@@ -4794,7 +5234,6 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
- params->ifindex = dev->ifindex;
return 0;
}
@@ -4838,6 +5277,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
fl4.saddr = params->ipv4_src;
fl4.fl4_sport = params->sport;
fl4.fl4_dport = params->dport;
+ fl4.flowi4_multipath_hash = 0;
if (flags & BPF_FIB_LOOKUP_DIRECT) {
u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
@@ -4890,6 +5330,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
dev = nhc->nhc_dev;
params->rt_metric = res.fi->fib_priority;
+ params->ifindex = dev->ifindex;
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
@@ -5015,6 +5456,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
dev = res.nh->fib_nh_dev;
params->rt_metric = res.f6i->fib6_metric;
+ params->ifindex = dev->ifindex;
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here.
@@ -5600,7 +6042,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
BPF_CALL_1(bpf_sk_release, struct sock *, sk)
{
- if (sk_is_refcounted(sk))
+ if (sk && sk_is_refcounted(sk))
sock_gen_put(sk);
return 0;
}
@@ -5609,7 +6051,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
};
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -5991,7 +6433,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
u32 cookie;
int ret;
- if (unlikely(th_len < sizeof(*th)))
+ if (unlikely(!sk || th_len < sizeof(*th)))
return -EINVAL;
/* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
@@ -6044,7 +6486,7 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
.gpl_only = true,
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM,
@@ -6058,7 +6500,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
u32 cookie;
u16 mss;
- if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+ if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4))
return -EINVAL;
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
@@ -6113,7 +6555,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
.gpl_only = true, /* __cookie_v*_init_sequence() is GPL */
.pkt_access = true,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_PTR_TO_MEM,
@@ -6122,7 +6564,7 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
{
- if (flags != 0)
+ if (!sk || flags != 0)
return -EINVAL;
if (!skb_at_tc_ingress(skb))
return -EOPNOTSUPP;
@@ -6146,7 +6588,233 @@ static const struct bpf_func_proto bpf_sk_assign_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_SOCK_COMMON,
+ .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend,
+ u8 search_kind, const u8 *magic,
+ u8 magic_len, bool *eol)
+{
+ u8 kind, kind_len;
+
+ *eol = false;
+
+ while (op < opend) {
+ kind = op[0];
+
+ if (kind == TCPOPT_EOL) {
+ *eol = true;
+ return ERR_PTR(-ENOMSG);
+ } else if (kind == TCPOPT_NOP) {
+ op++;
+ continue;
+ }
+
+ if (opend - op < 2 || opend - op < op[1] || op[1] < 2)
+ /* Something is wrong in the received header.
+ * Follow the TCP stack's tcp_parse_options()
+ * and just bail here.
+ */
+ return ERR_PTR(-EFAULT);
+
+ kind_len = op[1];
+ if (search_kind == kind) {
+ if (!magic_len)
+ return op;
+
+ if (magic_len > kind_len - 2)
+ return ERR_PTR(-ENOMSG);
+
+ if (!memcmp(&op[2], magic, magic_len))
+ return op;
+ }
+
+ op += kind_len;
+ }
+
+ return ERR_PTR(-ENOMSG);
+}
+
+BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
+ void *, search_res, u32, len, u64, flags)
+{
+ bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
+ const u8 *op, *opend, *magic, *search = search_res;
+ u8 search_kind, search_len, copy_len, magic_len;
+ int ret;
+
+ /* 2 byte is the minimal option len except TCPOPT_NOP and
+ * TCPOPT_EOL which are useless for the bpf prog to learn
+ * and this helper disallow loading them also.
+ */
+ if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
+ return -EINVAL;
+
+ search_kind = search[0];
+ search_len = search[1];
+
+ if (search_len > len || search_kind == TCPOPT_NOP ||
+ search_kind == TCPOPT_EOL)
+ return -EINVAL;
+
+ if (search_kind == TCPOPT_EXP || search_kind == 253) {
+ /* 16 or 32 bit magic. +2 for kind and kind length */
+ if (search_len != 4 && search_len != 6)
+ return -EINVAL;
+ magic = &search[2];
+ magic_len = search_len - 2;
+ } else {
+ if (search_len)
+ return -EINVAL;
+ magic = NULL;
+ magic_len = 0;
+ }
+
+ if (load_syn) {
+ ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op);
+ if (ret < 0)
+ return ret;
+
+ opend = op + ret;
+ op += sizeof(struct tcphdr);
+ } else {
+ if (!bpf_sock->skb ||
+ bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
+ /* This bpf_sock->op cannot call this helper */
+ return -EPERM;
+
+ opend = bpf_sock->skb_data_end;
+ op = bpf_sock->skb->data + sizeof(struct tcphdr);
+ }
+
+ op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
+ &eol);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+
+ copy_len = op[1];
+ ret = copy_len;
+ if (copy_len > len) {
+ ret = -ENOSPC;
+ copy_len = len;
+ }
+
+ memcpy(search_res, op, copy_len);
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
+ .func = bpf_sock_ops_load_hdr_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
+ const void *, from, u32, len, u64, flags)
+{
+ u8 new_kind, new_kind_len, magic_len = 0, *opend;
+ const u8 *op, *new_op, *magic = NULL;
+ struct sk_buff *skb;
+ bool eol;
+
+ if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
+ return -EPERM;
+
+ if (len < 2 || flags)
+ return -EINVAL;
+
+ new_op = from;
+ new_kind = new_op[0];
+ new_kind_len = new_op[1];
+
+ if (new_kind_len > len || new_kind == TCPOPT_NOP ||
+ new_kind == TCPOPT_EOL)
+ return -EINVAL;
+
+ if (new_kind_len > bpf_sock->remaining_opt_len)
+ return -ENOSPC;
+
+ /* 253 is another experimental kind */
+ if (new_kind == TCPOPT_EXP || new_kind == 253) {
+ if (new_kind_len < 4)
+ return -EINVAL;
+ /* Match for the 2 byte magic also.
+ * RFC 6994: the magic could be 2 or 4 bytes.
+ * Hence, matching by 2 byte only is on the
+ * conservative side but it is the right
+ * thing to do for the 'search-for-duplication'
+ * purpose.
+ */
+ magic = &new_op[2];
+ magic_len = 2;
+ }
+
+ /* Check for duplication */
+ skb = bpf_sock->skb;
+ op = skb->data + sizeof(struct tcphdr);
+ opend = bpf_sock->skb_data_end;
+
+ op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len,
+ &eol);
+ if (!IS_ERR(op))
+ return -EEXIST;
+
+ if (PTR_ERR(op) != -ENOMSG)
+ return PTR_ERR(op);
+
+ if (eol)
+ /* The option has been ended. Treat it as no more
+ * header option can be written.
+ */
+ return -ENOSPC;
+
+ /* No duplication found. Store the header option. */
+ memcpy(opend, from, new_kind_len);
+
+ bpf_sock->remaining_opt_len -= new_kind_len;
+ bpf_sock->skb_data_end += new_kind_len;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
+ .func = bpf_sock_ops_store_hdr_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
+ u32, len, u64, flags)
+{
+ if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
+ return -EPERM;
+
+ if (flags || len < 2)
+ return -EINVAL;
+
+ if (len > bpf_sock->remaining_opt_len)
+ return -ENOSPC;
+
+ bpf_sock->remaining_opt_len -= len;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
+ .func = bpf_sock_ops_reserve_hdr_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
@@ -6163,6 +6831,7 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_change_tail ||
func == sk_skb_change_tail ||
func == bpf_skb_adjust_room ||
+ func == sk_skb_adjust_room ||
func == bpf_skb_pull_data ||
func == sk_skb_pull_data ||
func == bpf_clone_redirect ||
@@ -6179,6 +6848,9 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_lwt_seg6_adjust_srh ||
func == bpf_lwt_seg6_action ||
#endif
+#ifdef CONFIG_INET
+ func == bpf_sock_ops_store_hdr_opt ||
+#endif
func == bpf_lwt_in_push_encap ||
func == bpf_lwt_xmit_push_encap)
return true;
@@ -6297,7 +6969,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6316,7 +6988,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6418,6 +7090,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return bpf_get_skb_set_tunnel_proto(func_id);
case BPF_FUNC_redirect:
return &bpf_redirect_proto;
+ case BPF_FUNC_redirect_neigh:
+ return &bpf_redirect_neigh_proto;
+ case BPF_FUNC_redirect_peer:
+ return &bpf_redirect_peer_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
case BPF_FUNC_get_hash_recalc:
@@ -6448,6 +7124,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
#endif
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ case BPF_FUNC_skb_cgroup_classid:
+ return &bpf_skb_cgroup_classid_proto;
+#endif
#ifdef CONFIG_SOCK_CGROUP_DATA
case BPF_FUNC_skb_cgroup_id:
return &bpf_skb_cgroup_id_proto;
@@ -6477,7 +7157,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_assign_proto;
#endif
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6518,7 +7198,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_gen_syncookie_proto;
#endif
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6550,11 +7230,17 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
#ifdef CONFIG_INET
+ case BPF_FUNC_load_hdr_opt:
+ return &bpf_sock_ops_load_hdr_opt_proto;
+ case BPF_FUNC_store_hdr_opt:
+ return &bpf_sock_ops_store_hdr_opt_proto;
+ case BPF_FUNC_reserve_hdr_opt:
+ return &bpf_sock_ops_reserve_hdr_opt_proto;
case BPF_FUNC_tcp_sock:
return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6600,7 +7286,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6621,6 +7307,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &sk_skb_change_tail_proto;
case BPF_FUNC_skb_change_head:
return &sk_skb_change_head_proto;
+ case BPF_FUNC_skb_adjust_room:
+ return &sk_skb_adjust_room_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
@@ -6642,7 +7330,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_lookup_tcp_proto;
#endif
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6653,7 +7341,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_flow_dissector_load_bytes_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -6680,7 +7368,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -7065,8 +7753,6 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig,
bool indirect = BPF_MODE(orig->code) == BPF_IND;
struct bpf_insn *insn = insn_buf;
- /* We're guaranteed here that CTX is in R6. */
- *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
if (!indirect) {
*insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
} else {
@@ -7074,6 +7760,8 @@ static int bpf_gen_ld_abs(const struct bpf_insn *orig,
if (orig->imm)
*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
}
+ /* We're guaranteed here that CTX is in R6. */
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
switch (BPF_SIZE(orig->code)) {
case BPF_B:
@@ -7349,6 +8037,20 @@ static bool sock_ops_is_valid_access(int off, int size,
return false;
info->reg_type = PTR_TO_SOCKET_OR_NULL;
break;
+ case offsetof(struct bpf_sock_ops, skb_data):
+ if (size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_PACKET;
+ break;
+ case offsetof(struct bpf_sock_ops, skb_data_end):
+ if (size != sizeof(__u64))
+ return false;
+ info->reg_type = PTR_TO_PACKET_END;
+ break;
+ case offsetof(struct bpf_sock_ops, skb_tcp_flags):
+ bpf_ctx_record_field_size(info, size_default);
+ return bpf_ctx_narrow_access_ok(off, size,
+ size_default);
default:
if (size != size_default)
return false;
@@ -8450,17 +9152,22 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
switch (si->off) {
- case offsetof(struct bpf_sock_ops, op) ...
+ case offsetof(struct bpf_sock_ops, op):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ op),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, op));
+ break;
+
+ case offsetof(struct bpf_sock_ops, replylong[0]) ...
offsetof(struct bpf_sock_ops, replylong[3]):
- BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, op) !=
- sizeof_field(struct bpf_sock_ops_kern, op));
BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
sizeof_field(struct bpf_sock_ops_kern, reply));
BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
sizeof_field(struct bpf_sock_ops_kern, replylong));
off = si->off;
- off -= offsetof(struct bpf_sock_ops, op);
- off += offsetof(struct bpf_sock_ops_kern, op);
+ off -= offsetof(struct bpf_sock_ops, replylong[0]);
+ off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
off);
@@ -8681,6 +9388,49 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct bpf_sock_ops, sk):
SOCK_OPS_GET_SK();
break;
+ case offsetof(struct bpf_sock_ops, skb_data_end):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ skb_data_end),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ skb_data_end));
+ break;
+ case offsetof(struct bpf_sock_ops, skb_data):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ skb),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ skb));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct sk_buff, data));
+ break;
+ case offsetof(struct bpf_sock_ops, skb_len):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ skb),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ skb));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct sk_buff, len));
+ break;
+ case offsetof(struct bpf_sock_ops, skb_tcp_flags):
+ off = offsetof(struct sk_buff, cb);
+ off += offsetof(struct tcp_skb_cb, tcp_flags);
+ *target_size = sizeof_field(struct tcp_skb_cb, tcp_flags);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
+ skb),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern,
+ skb));
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb,
+ tcp_flags),
+ si->dst_reg, si->dst_reg, off);
+ break;
}
return insn - insn_buf;
}
@@ -9355,7 +10105,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id);
}
}
@@ -9505,24 +10255,13 @@ BTF_SOCK_TYPE_xxx
u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
#endif
-static bool check_arg_btf_id(u32 btf_id, u32 arg)
-{
- int i;
-
- /* only one argument, no need to check arg */
- for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
- if (btf_sock_ids[i] == btf_id)
- return true;
- return false;
-}
-
BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
{
/* tcp6_sock type is not generated in dwarf and hence btf,
* trigger an explicit type generation here.
*/
BTF_TYPE_EMIT(struct tcp6_sock);
- if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
+ if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
sk->sk_family == AF_INET6)
return (unsigned long)sk;
@@ -9533,14 +10272,13 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
.func = bpf_skc_to_tcp6_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
};
BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
{
- if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+ if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
return (unsigned long)sk;
return (unsigned long)NULL;
@@ -9550,20 +10288,25 @@ const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
.func = bpf_skc_to_tcp_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
};
BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
{
+ /* BTF types for tcp_timewait_sock and inet_timewait_sock are not
+ * generated if CONFIG_INET=n. Trigger an explicit generation here.
+ */
+ BTF_TYPE_EMIT(struct inet_timewait_sock);
+ BTF_TYPE_EMIT(struct tcp_timewait_sock);
+
#ifdef CONFIG_INET
- if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
+ if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
return (unsigned long)sk;
#endif
#if IS_BUILTIN(CONFIG_IPV6)
- if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
+ if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
return (unsigned long)sk;
#endif
@@ -9574,20 +10317,19 @@ const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
.func = bpf_skc_to_tcp_timewait_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
};
BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
{
#ifdef CONFIG_INET
- if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
return (unsigned long)sk;
#endif
#if IS_BUILTIN(CONFIG_IPV6)
- if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
return (unsigned long)sk;
#endif
@@ -9598,8 +10340,7 @@ const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
.func = bpf_skc_to_tcp_request_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
};
@@ -9609,7 +10350,7 @@ BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
* trigger an explicit type generation here.
*/
BTF_TYPE_EMIT(struct udp6_sock);
- if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
+ if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
return (unsigned long)sk;
@@ -9620,7 +10361,37 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
.func = bpf_skc_to_udp6_sock,
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
- .arg1_type = ARG_PTR_TO_BTF_ID,
- .check_btf_id = check_arg_btf_id,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
};
+
+static const struct bpf_func_proto *
+bpf_sk_base_func_proto(enum bpf_func_id func_id)
+{
+ const struct bpf_func_proto *func;
+
+ switch (func_id) {
+ case BPF_FUNC_skc_to_tcp6_sock:
+ func = &bpf_skc_to_tcp6_sock_proto;
+ break;
+ case BPF_FUNC_skc_to_tcp_sock:
+ func = &bpf_skc_to_tcp_sock_proto;
+ break;
+ case BPF_FUNC_skc_to_tcp_timewait_sock:
+ func = &bpf_skc_to_tcp_timewait_sock_proto;
+ break;
+ case BPF_FUNC_skc_to_tcp_request_sock:
+ func = &bpf_skc_to_tcp_request_sock_proto;
+ break;
+ case BPF_FUNC_skc_to_udp6_sock:
+ func = &bpf_skc_to_udp6_sock_proto;
+ break;
+ default:
+ return bpf_base_func_proto(func_id);
+ }
+
+ if (!perfmon_capable())
+ return NULL;
+
+ return func;
+}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 29806eb765cf..e21950a2c897 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -932,8 +932,14 @@ bool __skb_flow_dissect(const struct net *net,
int offset = 0;
ops = skb->dev->dsa_ptr->tag_ops;
- if (ops->flow_dissect &&
- !ops->flow_dissect(skb, &proto, &offset)) {
+ /* Tail taggers don't break flow dissection */
+ if (!ops->tail_tag) {
+ if (ops->flow_dissect)
+ ops->flow_dissect(skb, &proto, &offset);
+ else
+ dsa_tag_generic_flow_dissect(skb,
+ &proto,
+ &offset);
hlen -= offset;
nhoff += offset;
}
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 6bbd06f7dc7d..c714e6a9dad4 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -116,6 +116,12 @@ static int dev_seq_show(struct seq_file *seq, void *v)
return 0;
}
+static u32 softnet_backlog_len(struct softnet_data *sd)
+{
+ return skb_queue_len_lockless(&sd->input_pkt_queue) +
+ skb_queue_len_lockless(&sd->process_queue);
+}
+
static struct softnet_data *softnet_get_online(loff_t *pos)
{
struct softnet_data *sd = NULL;
@@ -159,12 +165,17 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
rcu_read_unlock();
#endif
+ /* the index is the CPU id owing this sd. Since offline CPUs are not
+ * displayed, it would be othrwise not trivial for the user-space
+ * mapping the data a specific CPU
+ */
seq_printf(seq,
- "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
sd->processed, sd->dropped, sd->time_squeeze, 0,
0, 0, 0, 0, /* was fastroute */
0, /* was cpu_collision */
- sd->received_rps, flow_limit_count);
+ sd->received_rps, flow_limit_count,
+ softnet_backlog_len(sd), (int)seq->index);
return 0;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index efec66fa78b7..94fff0700bdd 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1158,8 +1158,8 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
* belongs to the root device it will be reported with just the
* traffic class, so just "0" for TC 0 for example.
*/
- return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
- sprintf(buf, "%u\n", tc);
+ return dev->num_tc < 0 ? sprintf(buf, "%d%d\n", tc, dev->num_tc) :
+ sprintf(buf, "%d\n", tc);
}
#ifdef CONFIG_XPS
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index dcd61aca343e..dbc66b896287 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -19,6 +19,7 @@
#include <linux/net_namespace.h>
#include <linux/sched/task.h>
#include <linux/uidgid.h>
+#include <linux/cookie.h>
#include <net/sock.h>
#include <net/netlink.h>
@@ -69,16 +70,16 @@ EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
-static atomic64_t cookie_gen;
+DEFINE_COOKIE(net_cookie);
-u64 net_gen_cookie(struct net *net)
+u64 __net_gen_cookie(struct net *net)
{
while (1) {
u64 res = atomic64_read(&net->net_cookie);
if (res)
return res;
- res = atomic64_inc_return(&cookie_gen);
+ res = gen_cookie_next(&net_cookie);
atomic64_cmpxchg(&net->net_cookie, 0, res);
}
}
@@ -251,10 +252,10 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
if (refcount_read(&net->count) == 0)
return NETNSA_NSID_NOT_ASSIGNED;
- spin_lock(&net->nsid_lock);
+ spin_lock_bh(&net->nsid_lock);
id = __peernet2id(net, peer);
if (id >= 0) {
- spin_unlock(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
return id;
}
@@ -264,12 +265,12 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
* just been idr_remove()'d from there in cleanup_net().
*/
if (!maybe_get_net(peer)) {
- spin_unlock(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
return NETNSA_NSID_NOT_ASSIGNED;
}
id = alloc_netid(net, peer, -1);
- spin_unlock(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
put_net(peer);
if (id < 0)
@@ -534,20 +535,20 @@ static void unhash_nsid(struct net *net, struct net *last)
for_each_net(tmp) {
int id;
- spin_lock(&tmp->nsid_lock);
+ spin_lock_bh(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock(&tmp->nsid_lock);
+ spin_unlock_bh(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
GFP_KERNEL);
if (tmp == last)
break;
}
- spin_lock(&net->nsid_lock);
+ spin_lock_bh(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
}
static LLIST_HEAD(cleanup_list);
@@ -760,9 +761,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(peer);
}
- spin_lock(&net->nsid_lock);
+ spin_lock_bh(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
err = -EEXIST;
NL_SET_BAD_ATTR(extack, nla);
NL_SET_ERR_MSG(extack,
@@ -771,7 +772,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
}
err = alloc_netid(net, peer, nsid);
- spin_unlock(&net->nsid_lock);
+ spin_unlock_bh(&net->nsid_lock);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
nlh, GFP_KERNEL);
@@ -1101,7 +1102,10 @@ static int __init net_ns_init(void)
panic("Could not allocate generic netns");
rcu_assign_pointer(init_net.gen, ng);
- net_gen_cookie(&init_net);
+
+ preempt_disable();
+ __net_gen_cookie(&init_net);
+ preempt_enable();
down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2338753e936b..c310c7c1cef7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -297,7 +297,7 @@ static int netpoll_owner_active(struct net_device *dev)
{
struct napi_struct *napi;
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner == smp_processor_id())
return 1;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 44fdbb9c6e53..105978604ffd 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -922,7 +922,7 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->min_pkt_size = value;
pkt_dev->cur_pkt_size = value;
}
- sprintf(pg_result, "OK: min_pkt_size=%u",
+ sprintf(pg_result, "OK: min_pkt_size=%d",
pkt_dev->min_pkt_size);
return count;
}
@@ -939,7 +939,7 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->max_pkt_size = value;
pkt_dev->cur_pkt_size = value;
}
- sprintf(pg_result, "OK: max_pkt_size=%u",
+ sprintf(pg_result, "OK: max_pkt_size=%d",
pkt_dev->max_pkt_size);
return count;
}
@@ -959,7 +959,7 @@ static ssize_t pktgen_if_write(struct file *file,
pkt_dev->max_pkt_size = value;
pkt_dev->cur_pkt_size = value;
}
- sprintf(pg_result, "OK: pkt_size=%u", pkt_dev->min_pkt_size);
+ sprintf(pg_result, "OK: pkt_size=%d", pkt_dev->min_pkt_size);
return count;
}
@@ -981,7 +981,7 @@ static ssize_t pktgen_if_write(struct file *file,
i += len;
pkt_dev->nfrags = value;
- sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags);
+ sprintf(pg_result, "OK: frags=%d", pkt_dev->nfrags);
return count;
}
if (!strcmp(name, "delay")) {
@@ -1146,7 +1146,7 @@ static ssize_t pktgen_if_write(struct file *file,
(!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))))
return -ENOTSUPP;
pkt_dev->burst = value < 1 ? 1 : value;
- sprintf(pg_result, "OK: burst=%d", pkt_dev->burst);
+ sprintf(pg_result, "OK: burst=%u", pkt_dev->burst);
return count;
}
if (!strcmp(name, "node")) {
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index d964a5147f22..e33fde06d528 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -107,6 +107,36 @@ unsigned int ptp_classify_raw(const struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ptp_classify_raw);
+struct ptp_header *ptp_parse_header(struct sk_buff *skb, unsigned int type)
+{
+ u8 *ptr = skb_mac_header(skb);
+
+ if (type & PTP_CLASS_VLAN)
+ ptr += VLAN_HLEN;
+
+ switch (type & PTP_CLASS_PMASK) {
+ case PTP_CLASS_IPV4:
+ ptr += IPV4_HLEN(ptr) + UDP_HLEN;
+ break;
+ case PTP_CLASS_IPV6:
+ ptr += IP6_HLEN + UDP_HLEN;
+ break;
+ case PTP_CLASS_L2:
+ break;
+ default:
+ return NULL;
+ }
+
+ ptr += ETH_HLEN;
+
+ /* Ensure that the entire header is present in this packet. */
+ if (ptr + sizeof(struct ptp_header) > skb->data + skb->len)
+ return NULL;
+
+ return (struct ptp_header *)ptr;
+}
+EXPORT_SYMBOL_GPL(ptp_parse_header);
+
void __init ptp_classifier_init(void)
{
static struct sock_filter ptp_filter[] __initdata = {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6faf73d6a0f7..1ba8f0163744 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -712,11 +712,10 @@ EXPORT_SYMBOL(kfree_skb_list);
*
* Must only be called from net_ratelimit()-ed paths.
*
- * Dumps up to can_dump_full whole packets if full_pkt, headers otherwise.
+ * Dumps whole packets if full_pkt, only headers otherwise.
*/
void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
{
- static atomic_t can_dump_full = ATOMIC_INIT(5);
struct skb_shared_info *sh = skb_shinfo(skb);
struct net_device *dev = skb->dev;
struct sock *sk = skb->sk;
@@ -726,9 +725,6 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
int i, len, seg_len;
if (full_pkt)
- full_pkt = atomic_dec_if_positive(&can_dump_full) >= 0;
-
- if (full_pkt)
len = skb->len;
else
len = min_t(int, skb->len, MAX_HEADER + 128);
@@ -895,9 +891,6 @@ void __kfree_skb_defer(struct sk_buff *skb)
void napi_consume_skb(struct sk_buff *skb, int budget)
{
- if (unlikely(!skb))
- return;
-
/* Zero budget indicate non-NAPI context called us, like netpoll */
if (unlikely(!budget)) {
dev_consume_skb_any(skb);
@@ -2725,19 +2718,20 @@ EXPORT_SYMBOL(skb_checksum);
/* Both of above in one bottle. */
__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
- u8 *to, int len, __wsum csum)
+ u8 *to, int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
struct sk_buff *frag_iter;
int pos = 0;
+ __wsum csum = 0;
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
csum = csum_partial_copy_nocheck(skb->data + offset, to,
- copy, csum);
+ copy);
if ((len -= copy) == 0)
return csum;
offset += copy;
@@ -2767,7 +2761,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
vaddr = kmap_atomic(p);
csum2 = csum_partial_copy_nocheck(vaddr + p_off,
to + copied,
- p_len, 0);
+ p_len);
kunmap_atomic(vaddr);
csum = csum_block_add(csum, csum2, pos);
pos += p_len;
@@ -2793,7 +2787,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
copy = len;
csum2 = skb_copy_and_csum_bits(frag_iter,
offset - start,
- to, copy, 0);
+ to, copy);
csum = csum_block_add(csum, csum2, pos);
if ((len -= copy) == 0)
return csum;
@@ -3013,7 +3007,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
csum = 0;
if (csstart != skb->len)
csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
- skb->len - csstart, 0);
+ skb->len - csstart);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
long csstuff = csstart + skb->csum_offset;
@@ -3934,7 +3928,7 @@ normal:
skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb,
len),
- len, 0);
+ len);
SKB_GSO_CB(nskb)->csum_start =
skb_headroom(nskb) + doffset;
} else {
@@ -5561,6 +5555,73 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
}
EXPORT_SYMBOL(skb_vlan_push);
+/**
+ * skb_eth_pop() - Drop the Ethernet header at the head of a packet
+ *
+ * @skb: Socket buffer to modify
+ *
+ * Drop the Ethernet header of @skb.
+ *
+ * Expects that skb->data points to the mac header and that no VLAN tags are
+ * present.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_eth_pop(struct sk_buff *skb)
+{
+ if (!pskb_may_pull(skb, ETH_HLEN) || skb_vlan_tagged(skb) ||
+ skb_network_offset(skb) < ETH_HLEN)
+ return -EPROTO;
+
+ skb_pull_rcsum(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
+
+ return 0;
+}
+EXPORT_SYMBOL(skb_eth_pop);
+
+/**
+ * skb_eth_push() - Add a new Ethernet header at the head of a packet
+ *
+ * @skb: Socket buffer to modify
+ * @dst: Destination MAC address of the new header
+ * @src: Source MAC address of the new header
+ *
+ * Prepend @skb with a new Ethernet header.
+ *
+ * Expects that skb->data points to the mac header, which must be empty.
+ *
+ * Returns 0 on success, -errno otherwise.
+ */
+int skb_eth_push(struct sk_buff *skb, const unsigned char *dst,
+ const unsigned char *src)
+{
+ struct ethhdr *eth;
+ int err;
+
+ if (skb_network_offset(skb) || skb_vlan_tag_present(skb))
+ return -EPROTO;
+
+ err = skb_cow_head(skb, sizeof(*eth));
+ if (err < 0)
+ return err;
+
+ skb_push(skb, sizeof(*eth));
+ skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
+
+ eth = eth_hdr(skb);
+ ether_addr_copy(eth->h_dest, dst);
+ ether_addr_copy(eth->h_source, src);
+ eth->h_proto = skb->protocol;
+
+ skb_postpush_rcsum(skb, eth, sizeof(*eth));
+
+ return 0;
+}
+EXPORT_SYMBOL(skb_eth_push);
+
/* Update the ethertype of hdr and the skb csum value if required. */
static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
__be16 ethertype)
@@ -5622,7 +5683,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
lse->label_stack_entry = mpls_lse;
skb_postpush_rcsum(skb, lse, MPLS_HLEN);
- if (ethernet)
+ if (ethernet && mac_len >= ETH_HLEN)
skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
skb->protocol = mpls_proto;
@@ -5662,7 +5723,7 @@ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
- if (ethernet) {
+ if (ethernet && mac_len >= ETH_HLEN) {
struct ethhdr *hdr;
/* use mpls_hdr() to get ethertype to account for VLANs. */
@@ -5955,8 +6016,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
size = SKB_WITH_OVERHEAD(ksize(data));
memcpy((struct skb_shared_info *)(data + size),
- skb_shinfo(skb), offsetof(struct skb_shared_info,
- frags[skb_shinfo(skb)->nr_frags]));
+ skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
if (skb_orphan_frags(skb, gfp_mask)) {
kfree(data);
return -ENOMEM;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 649583158983..654182ecf87b 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -433,10 +433,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
- if (ingress)
- return sk_psock_skb_ingress(psock, skb);
- else
+ if (!ingress) {
+ if (!sock_writeable(psock->sk))
+ return -EAGAIN;
return skb_send_sock_locked(psock->sk, skb, off, len);
+ }
+ return sk_psock_skb_ingress(psock, skb);
}
static void sk_psock_backlog(struct work_struct *work)
@@ -494,14 +496,34 @@ end:
struct sk_psock *sk_psock_init(struct sock *sk, int node)
{
- struct sk_psock *psock = kzalloc_node(sizeof(*psock),
- GFP_ATOMIC | __GFP_NOWARN,
- node);
- if (!psock)
- return NULL;
+ struct sk_psock *psock;
+ struct proto *prot;
+
+ write_lock_bh(&sk->sk_callback_lock);
+
+ if (inet_csk_has_ulp(sk)) {
+ psock = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ if (sk->sk_user_data) {
+ psock = ERR_PTR(-EBUSY);
+ goto out;
+ }
+
+ psock = kzalloc_node(sizeof(*psock), GFP_ATOMIC | __GFP_NOWARN, node);
+ if (!psock) {
+ psock = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ prot = READ_ONCE(sk->sk_prot);
psock->sk = sk;
- psock->eval = __SK_NONE;
+ psock->eval = __SK_NONE;
+ psock->sk_proto = prot;
+ psock->saved_unhash = prot->unhash;
+ psock->saved_close = prot->close;
+ psock->saved_write_space = sk->sk_write_space;
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
@@ -516,6 +538,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
rcu_assign_sk_user_data_nocopy(sk, psock);
sock_hold(sk);
+out:
+ write_unlock_bh(&sk->sk_callback_lock);
return psock;
}
EXPORT_SYMBOL_GPL(sk_psock_init);
@@ -603,6 +627,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
rcu_assign_sk_user_data(sk, NULL);
if (psock->progs.skb_parser)
sk_psock_stop_strp(sk, psock);
+ else if (psock->progs.skb_verdict)
+ sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
@@ -660,19 +686,8 @@ EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
struct sk_buff *skb)
{
- int ret;
-
- skb->sk = psock->sk;
bpf_compute_data_end_sk_skb(skb);
- ret = bpf_prog_run_pin_on_cpu(prog, skb);
- /* strparser clones the skb before handing it to a upper layer,
- * meaning skb_orphan has been called. We NULL sk on the way out
- * to ensure we don't trigger a BUG_ON() in skb/sk operations
- * later and because we are not charging the memory of this skb
- * to any socket yet.
- */
- skb->sk = NULL;
- return ret;
+ return bpf_prog_run_pin_on_cpu(prog, skb);
}
static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
@@ -687,38 +702,35 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
{
struct sk_psock *psock_other;
struct sock *sk_other;
- bool ingress;
sk_other = tcp_skb_bpf_redirect_fetch(skb);
+ /* This error is a buggy BPF program, it returned a redirect
+ * return code, but then didn't set a redirect interface.
+ */
if (unlikely(!sk_other)) {
kfree_skb(skb);
return;
}
psock_other = sk_psock(sk_other);
+ /* This error indicates the socket is being torn down or had another
+ * error that caused the pipe to break. We can't send a packet on
+ * a socket that is in this state so we drop the skb.
+ */
if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
kfree_skb(skb);
return;
}
- ingress = tcp_skb_bpf_ingress(skb);
- if ((!ingress && sock_writeable(sk_other)) ||
- (ingress &&
- atomic_read(&sk_other->sk_rmem_alloc) <=
- sk_other->sk_rcvbuf)) {
- if (!ingress)
- skb_set_owner_w(skb, sk_other);
- skb_queue_tail(&psock_other->ingress_skb, skb);
- schedule_work(&psock_other->work);
- } else {
- kfree_skb(skb);
- }
+ skb_queue_tail(&psock_other->ingress_skb, skb);
+ schedule_work(&psock_other->work);
}
-static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict)
+static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
{
switch (verdict) {
case __SK_REDIRECT:
+ skb_set_owner_r(skb, sk);
sk_psock_skb_redirect(skb);
break;
case __SK_PASS:
@@ -736,11 +748,17 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
rcu_read_lock();
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
+ /* We skip full set_owner_r here because if we do a SK_PASS
+ * or SK_DROP we can skip skb memory accounting and use the
+ * TLS context.
+ */
+ skb->sk = psock->sk;
tcp_skb_bpf_redirect_clear(skb);
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+ skb->sk = NULL;
}
- sk_psock_tls_verdict_apply(skb, ret);
+ sk_psock_tls_verdict_apply(skb, psock->sk, ret);
rcu_read_unlock();
return ret;
}
@@ -749,7 +767,9 @@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
static void sk_psock_verdict_apply(struct sk_psock *psock,
struct sk_buff *skb, int verdict)
{
+ struct tcp_skb_cb *tcp;
struct sock *sk_other;
+ int err = -EIO;
switch (verdict) {
case __SK_PASS:
@@ -758,16 +778,24 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
goto out_free;
}
- if (atomic_read(&sk_other->sk_rmem_alloc) <=
- sk_other->sk_rcvbuf) {
- struct tcp_skb_cb *tcp = TCP_SKB_CB(skb);
- tcp->bpf.flags |= BPF_F_INGRESS;
+ tcp = TCP_SKB_CB(skb);
+ tcp->bpf.flags |= BPF_F_INGRESS;
+
+ /* If the queue is empty then we can submit directly
+ * into the msg queue. If its not empty we have to
+ * queue work otherwise we may get OOO data. Otherwise,
+ * if sk_psock_skb_ingress errors will be handled by
+ * retrying later from workqueue.
+ */
+ if (skb_queue_empty(&psock->ingress_skb)) {
+ err = sk_psock_skb_ingress(psock, skb);
+ }
+ if (err < 0) {
skb_queue_tail(&psock->ingress_skb, skb);
schedule_work(&psock->work);
- break;
}
- goto out_free;
+ break;
case __SK_REDIRECT:
sk_psock_skb_redirect(skb);
break;
@@ -792,9 +820,9 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
kfree_skb(skb);
goto out;
}
+ skb_set_owner_r(skb, sk);
prog = READ_ONCE(psock->progs.skb_verdict);
if (likely(prog)) {
- skb_orphan(skb);
tcp_skb_bpf_redirect_clear(skb);
ret = sk_psock_bpf_run(psock, prog, skb);
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
@@ -817,8 +845,11 @@ static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
rcu_read_lock();
prog = READ_ONCE(psock->progs.skb_parser);
- if (likely(prog))
+ if (likely(prog)) {
+ skb->sk = psock->sk;
ret = sk_psock_bpf_run(psock, prog, skb);
+ skb->sk = NULL;
+ }
rcu_read_unlock();
return ret;
}
@@ -842,6 +873,57 @@ static void sk_psock_strp_data_ready(struct sock *sk)
rcu_read_unlock();
}
+static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
+ unsigned int offset, size_t orig_len)
+{
+ struct sock *sk = (struct sock *)desc->arg.data;
+ struct sk_psock *psock;
+ struct bpf_prog *prog;
+ int ret = __SK_DROP;
+ int len = skb->len;
+
+ /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (!skb) {
+ desc->error = -ENOMEM;
+ return 0;
+ }
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (unlikely(!psock)) {
+ len = 0;
+ kfree_skb(skb);
+ goto out;
+ }
+ skb_set_owner_r(skb, sk);
+ prog = READ_ONCE(psock->progs.skb_verdict);
+ if (likely(prog)) {
+ tcp_skb_bpf_redirect_clear(skb);
+ ret = sk_psock_bpf_run(psock, prog, skb);
+ ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+ }
+ sk_psock_verdict_apply(psock, skb, ret);
+out:
+ rcu_read_unlock();
+ return len;
+}
+
+static void sk_psock_verdict_data_ready(struct sock *sk)
+{
+ struct socket *sock = sk->sk_socket;
+ read_descriptor_t desc;
+
+ if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+ return;
+
+ desc.arg.data = sk;
+ desc.error = 0;
+ desc.count = 1;
+
+ sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
+}
+
static void sk_psock_write_space(struct sock *sk)
{
struct sk_psock *psock;
@@ -871,6 +953,19 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
return strp_init(&psock->parser.strp, sk, &cb);
}
+void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
+{
+ struct sk_psock_parser *parser = &psock->parser;
+
+ if (parser->enabled)
+ return;
+
+ parser->saved_data_ready = sk->sk_data_ready;
+ sk->sk_data_ready = sk_psock_verdict_data_ready;
+ sk->sk_write_space = sk_psock_write_space;
+ parser->enabled = true;
+}
+
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
{
struct sk_psock_parser *parser = &psock->parser;
@@ -896,3 +991,15 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
strp_stop(&parser->strp);
parser->enabled = false;
}
+
+void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
+{
+ struct sk_psock_parser *parser = &psock->parser;
+
+ if (!parser->enabled)
+ return;
+
+ sk->sk_data_ready = parser->saved_data_ready;
+ parser->saved_data_ready = NULL;
+ parser->enabled = false;
+}
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c5c6b18eff4..4e8729357122 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -413,18 +413,6 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
return 0;
}
-static void sock_warn_obsolete_bsdism(const char *name)
-{
- static int warned;
- static char warncomm[TASK_COMM_LEN];
- if (strcmp(warncomm, current->comm) && warned < 5) {
- strcpy(warncomm, current->comm);
- pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
- warncomm, name);
- warned++;
- }
-}
-
static bool sock_needs_netstamp(const struct sock *sk)
{
switch (sk->sk_family) {
@@ -769,7 +757,6 @@ static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
} else {
sock_reset_flag(sk, SOCK_RCVTSTAMP);
sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
}
}
@@ -984,7 +971,6 @@ set_sndbuf:
break;
case SO_BSDCOMPAT:
- sock_warn_obsolete_bsdism("setsockopt");
break;
case SO_PASSCRED:
@@ -1007,8 +993,6 @@ set_sndbuf:
__sock_set_timestamps(sk, valbool, true, true);
break;
case SO_TIMESTAMPING_NEW:
- sock_set_flag(sk, SOCK_TSTAMP_NEW);
- fallthrough;
case SO_TIMESTAMPING_OLD:
if (val & ~SOF_TIMESTAMPING_MASK) {
ret = -EINVAL;
@@ -1037,16 +1021,14 @@ set_sndbuf:
}
sk->sk_tsflags = val;
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
+
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
sock_enable_timestamp(sk,
SOCK_TIMESTAMPING_RX_SOFTWARE);
- else {
- if (optname == SO_TIMESTAMPING_NEW)
- sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-
+ else
sock_disable_timestamp(sk,
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
- }
break;
case SO_RCVLOWAT:
@@ -1387,7 +1369,6 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_BSDCOMPAT:
- sock_warn_obsolete_bsdism("getsockopt");
break;
case SO_TIMESTAMP_OLD:
@@ -2961,6 +2942,13 @@ void sk_stop_timer(struct sock *sk, struct timer_list* timer)
}
EXPORT_SYMBOL(sk_stop_timer);
+void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
+{
+ if (del_timer_sync(timer))
+ __sock_put(sk);
+}
+EXPORT_SYMBOL(sk_stop_timer_sync);
+
void sock_init_data(struct socket *sock, struct sock *sk)
{
sk_init_common(sk);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index c13ffbd33d8d..c9c45b935f99 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -11,7 +11,7 @@
#include <linux/tcp.h>
#include <linux/workqueue.h>
#include <linux/nospec.h>
-
+#include <linux/cookie.h>
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
@@ -19,16 +19,17 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;
-static atomic64_t cookie_gen;
-u64 sock_gen_cookie(struct sock *sk)
+DEFINE_COOKIE(sock_cookie);
+
+u64 __sock_gen_cookie(struct sock *sk)
{
while (1) {
u64 res = atomic64_read(&sk->sk_cookie);
if (res)
return res;
- res = atomic64_inc_return(&cookie_gen);
+ res = gen_cookie_next(&sock_cookie);
atomic64_cmpxchg(&sk->sk_cookie, 0, res);
}
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 119f52a99dc1..ddc899e83313 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
#include <linux/bpf.h>
+#include <linux/btf_ids.h>
#include <linux/filter.h>
#include <linux/errno.h>
#include <linux/file.h>
@@ -147,8 +148,8 @@ static void sock_map_add_link(struct sk_psock *psock,
static void sock_map_del_link(struct sock *sk,
struct sk_psock *psock, void *link_raw)
{
+ bool strp_stop = false, verdict_stop = false;
struct sk_psock_link *link, *tmp;
- bool strp_stop = false;
spin_lock_bh(&psock->link_lock);
list_for_each_entry_safe(link, tmp, &psock->link, list) {
@@ -158,14 +159,19 @@ static void sock_map_del_link(struct sock *sk,
map);
if (psock->parser.enabled && stab->progs.skb_parser)
strp_stop = true;
+ if (psock->parser.enabled && stab->progs.skb_verdict)
+ verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
}
}
spin_unlock_bh(&psock->link_lock);
- if (strp_stop) {
+ if (strp_stop || verdict_stop) {
write_lock_bh(&sk->sk_callback_lock);
- sk_psock_stop_strp(sk, psock);
+ if (strp_stop)
+ sk_psock_stop_strp(sk, psock);
+ else
+ sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
}
}
@@ -184,8 +190,6 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
{
struct proto *prot;
- sock_owned_by_me(sk);
-
switch (sk->sk_type) {
case SOCK_STREAM:
prot = tcp_bpf_get_proto(sk, psock);
@@ -231,20 +235,21 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
{
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
struct sk_psock *psock;
- bool skb_progs;
int ret;
skb_verdict = READ_ONCE(progs->skb_verdict);
- skb_parser = READ_ONCE(progs->skb_parser);
- skb_progs = skb_parser && skb_verdict;
- if (skb_progs) {
+ if (skb_verdict) {
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
if (IS_ERR(skb_verdict))
return PTR_ERR(skb_verdict);
+ }
+
+ skb_parser = READ_ONCE(progs->skb_parser);
+ if (skb_parser) {
skb_parser = bpf_prog_inc_not_zero(skb_parser);
if (IS_ERR(skb_parser)) {
- bpf_prog_put(skb_verdict);
- return PTR_ERR(skb_parser);
+ ret = PTR_ERR(skb_parser);
+ goto out_put_skb_verdict;
}
}
@@ -253,7 +258,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
msg_parser = bpf_prog_inc_not_zero(msg_parser);
if (IS_ERR(msg_parser)) {
ret = PTR_ERR(msg_parser);
- goto out;
+ goto out_put_skb_parser;
}
}
@@ -265,15 +270,16 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
if (psock) {
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
- (skb_progs && READ_ONCE(psock->progs.skb_parser))) {
+ (skb_parser && READ_ONCE(psock->progs.skb_parser)) ||
+ (skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
sk_psock_put(sk, psock);
ret = -EBUSY;
goto out_progs;
}
} else {
psock = sk_psock_init(sk, map->numa_node);
- if (!psock) {
- ret = -ENOMEM;
+ if (IS_ERR(psock)) {
+ ret = PTR_ERR(psock);
goto out_progs;
}
}
@@ -286,28 +292,32 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
goto out_drop;
write_lock_bh(&sk->sk_callback_lock);
- if (skb_progs && !psock->parser.enabled) {
+ if (skb_parser && skb_verdict && !psock->parser.enabled) {
ret = sk_psock_init_strp(sk, psock);
- if (ret) {
- write_unlock_bh(&sk->sk_callback_lock);
- goto out_drop;
- }
+ if (ret)
+ goto out_unlock_drop;
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
psock_set_prog(&psock->progs.skb_parser, skb_parser);
sk_psock_start_strp(sk, psock);
+ } else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
+ psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+ sk_psock_start_verdict(sk,psock);
}
write_unlock_bh(&sk->sk_callback_lock);
return 0;
+out_unlock_drop:
+ write_unlock_bh(&sk->sk_callback_lock);
out_drop:
sk_psock_put(sk, psock);
out_progs:
if (msg_parser)
bpf_prog_put(msg_parser);
-out:
- if (skb_progs) {
- bpf_prog_put(skb_verdict);
+out_put_skb_parser:
+ if (skb_parser)
bpf_prog_put(skb_parser);
- }
+out_put_skb_verdict:
+ if (skb_verdict)
+ bpf_prog_put(skb_verdict);
return ret;
}
@@ -322,8 +332,8 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
if (!psock) {
psock = sk_psock_init(sk, map->numa_node);
- if (!psock)
- return -ENOMEM;
+ if (IS_ERR(psock))
+ return PTR_ERR(psock);
}
ret = sock_map_init_proto(sk, psock);
@@ -384,7 +394,7 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
struct sock *sk;
sk = __sock_map_lookup_elem(map, *(u32 *)key);
- if (!sk || !sk_fullsock(sk))
+ if (!sk)
return NULL;
if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
return NULL;
@@ -402,7 +412,7 @@ static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
if (!sk)
return ERR_PTR(-ENOENT);
- sock_gen_cookie(sk);
+ __sock_gen_cookie(sk);
return &sk->sk_cookie;
}
@@ -478,8 +488,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
link = sk_psock_init_link();
if (!link)
@@ -563,10 +571,12 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
return false;
}
-static int sock_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
+static int sock_hash_update_common(struct bpf_map *map, void *key,
+ struct sock *sk, u64 flags);
+
+int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
+ u64 flags)
{
- u32 idx = *(u32 *)key;
struct socket *sock;
struct sock *sk;
int ret;
@@ -595,14 +605,41 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
sock_map_sk_acquire(sk);
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
+ else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
+ ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
else
- ret = sock_map_update_common(map, idx, sk, flags);
+ ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
return ret;
}
+static int sock_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
+{
+ struct sock *sk = (struct sock *)value;
+ int ret;
+
+ if (unlikely(!sk || !sk_fullsock(sk)))
+ return -EINVAL;
+
+ if (!sock_map_sk_is_suitable(sk))
+ return -EOPNOTSUPP;
+
+ local_bh_disable();
+ bh_lock_sock(sk);
+ if (!sock_map_sk_state_allowed(sk))
+ ret = -EOPNOTSUPP;
+ else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
+ ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
+ else
+ ret = sock_hash_update_common(map, key, sk, flags);
+ bh_unlock_sock(sk);
+ local_bh_enable();
+ return ret;
+}
+
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops,
struct bpf_map *, map, void *, key, u64, flags)
{
@@ -681,8 +718,116 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+struct sock_map_seq_info {
+ struct bpf_map *map;
+ struct sock *sk;
+ u32 index;
+};
+
+struct bpf_iter__sockmap {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_map *, map);
+ __bpf_md_ptr(void *, key);
+ __bpf_md_ptr(struct sock *, sk);
+};
+
+DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta,
+ struct bpf_map *map, void *key,
+ struct sock *sk)
+
+static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info)
+{
+ if (unlikely(info->index >= info->map->max_entries))
+ return NULL;
+
+ info->sk = __sock_map_lookup_elem(info->map, info->index);
+
+ /* can't return sk directly, since that might be NULL */
+ return info;
+}
+
+static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(rcu)
+{
+ struct sock_map_seq_info *info = seq->private;
+
+ if (*pos == 0)
+ ++*pos;
+
+ /* pairs with sock_map_seq_stop */
+ rcu_read_lock();
+ return sock_map_seq_lookup_elem(info);
+}
+
+static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ __must_hold(rcu)
+{
+ struct sock_map_seq_info *info = seq->private;
+
+ ++*pos;
+ ++info->index;
+
+ return sock_map_seq_lookup_elem(info);
+}
+
+static int sock_map_seq_show(struct seq_file *seq, void *v)
+ __must_hold(rcu)
+{
+ struct sock_map_seq_info *info = seq->private;
+ struct bpf_iter__sockmap ctx = {};
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, !v);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (v) {
+ ctx.key = &info->index;
+ ctx.sk = info->sk;
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static void sock_map_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ if (!v)
+ (void)sock_map_seq_show(seq, NULL);
+
+ /* pairs with sock_map_seq_start */
+ rcu_read_unlock();
+}
+
+static const struct seq_operations sock_map_seq_ops = {
+ .start = sock_map_seq_start,
+ .next = sock_map_seq_next,
+ .stop = sock_map_seq_stop,
+ .show = sock_map_seq_show,
+};
+
+static int sock_map_init_seq_private(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct sock_map_seq_info *info = priv_data;
+
+ info->map = aux->map;
+ return 0;
+}
+
+static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
+ .seq_ops = &sock_map_seq_ops,
+ .init_seq_private = sock_map_init_seq_private,
+ .seq_priv_size = sizeof(struct sock_map_seq_info),
+};
+
static int sock_map_btf_id;
const struct bpf_map_ops sock_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_map_alloc,
.map_free = sock_map_free,
.map_get_next_key = sock_map_get_next_key,
@@ -694,6 +839,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_stab",
.map_btf_id = &sock_map_btf_id,
+ .iter_seq_info = &sock_map_iter_seq_info,
};
struct bpf_shtab_elem {
@@ -855,8 +1001,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
WARN_ON_ONCE(!rcu_read_lock_held());
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
link = sk_psock_init_link();
if (!link)
@@ -915,45 +1059,6 @@ out_free:
return ret;
}
-static int sock_hash_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
-{
- struct socket *sock;
- struct sock *sk;
- int ret;
- u64 ufd;
-
- if (map->value_size == sizeof(u64))
- ufd = *(u64 *)value;
- else
- ufd = *(u32 *)value;
- if (ufd > S32_MAX)
- return -EINVAL;
-
- sock = sockfd_lookup(ufd, &ret);
- if (!sock)
- return ret;
- sk = sock->sk;
- if (!sk) {
- ret = -EINVAL;
- goto out;
- }
- if (!sock_map_sk_is_suitable(sk)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
-
- sock_map_sk_acquire(sk);
- if (!sock_map_sk_state_allowed(sk))
- ret = -EOPNOTSUPP;
- else
- ret = sock_hash_update_common(map, key, sk, flags);
- sock_map_sk_release(sk);
-out:
- fput(sock->file);
- return ret;
-}
-
static int sock_hash_get_next_key(struct bpf_map *map, void *key,
void *key_next)
{
@@ -971,7 +1076,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
if (!elem)
goto find_first_elem;
- elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
+ elem_next = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&elem->node)),
struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
@@ -983,7 +1088,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
find_first_elem:
for (; i < htab->buckets_num; i++) {
head = &sock_hash_select_bucket(htab, i)->head;
- elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+ elem_next = hlist_entry_safe(rcu_dereference(hlist_first_rcu(head)),
struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
@@ -1119,7 +1224,7 @@ static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
if (!sk)
return ERR_PTR(-ENOENT);
- sock_gen_cookie(sk);
+ __sock_gen_cookie(sk);
return &sk->sk_cookie;
}
@@ -1128,7 +1233,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key)
struct sock *sk;
sk = __sock_hash_lookup_elem(map, key);
- if (!sk || !sk_fullsock(sk))
+ if (!sk)
return NULL;
if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
return NULL;
@@ -1217,12 +1322,128 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
.arg4_type = ARG_ANYTHING,
};
+struct sock_hash_seq_info {
+ struct bpf_map *map;
+ struct bpf_shtab *htab;
+ u32 bucket_id;
+};
+
+static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info,
+ struct bpf_shtab_elem *prev_elem)
+{
+ const struct bpf_shtab *htab = info->htab;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
+ struct hlist_node *node;
+
+ /* try to find next elem in the same bucket */
+ if (prev_elem) {
+ node = rcu_dereference(hlist_next_rcu(&prev_elem->node));
+ elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
+ if (elem)
+ return elem;
+
+ /* no more elements, continue in the next bucket */
+ info->bucket_id++;
+ }
+
+ for (; info->bucket_id < htab->buckets_num; info->bucket_id++) {
+ bucket = &htab->buckets[info->bucket_id];
+ node = rcu_dereference(hlist_first_rcu(&bucket->head));
+ elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
+ if (elem)
+ return elem;
+ }
+
+ return NULL;
+}
+
+static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(rcu)
+{
+ struct sock_hash_seq_info *info = seq->private;
+
+ if (*pos == 0)
+ ++*pos;
+
+ /* pairs with sock_hash_seq_stop */
+ rcu_read_lock();
+ return sock_hash_seq_find_next(info, NULL);
+}
+
+static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ __must_hold(rcu)
+{
+ struct sock_hash_seq_info *info = seq->private;
+
+ ++*pos;
+ return sock_hash_seq_find_next(info, v);
+}
+
+static int sock_hash_seq_show(struct seq_file *seq, void *v)
+ __must_hold(rcu)
+{
+ struct sock_hash_seq_info *info = seq->private;
+ struct bpf_iter__sockmap ctx = {};
+ struct bpf_shtab_elem *elem = v;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, !elem);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (elem) {
+ ctx.key = elem->key;
+ ctx.sk = elem->sk;
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static void sock_hash_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ if (!v)
+ (void)sock_hash_seq_show(seq, NULL);
+
+ /* pairs with sock_hash_seq_start */
+ rcu_read_unlock();
+}
+
+static const struct seq_operations sock_hash_seq_ops = {
+ .start = sock_hash_seq_start,
+ .next = sock_hash_seq_next,
+ .stop = sock_hash_seq_stop,
+ .show = sock_hash_seq_show,
+};
+
+static int sock_hash_init_seq_private(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct sock_hash_seq_info *info = priv_data;
+
+ info->map = aux->map;
+ info->htab = container_of(aux->map, struct bpf_shtab, map);
+ return 0;
+}
+
+static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
+ .seq_ops = &sock_hash_seq_ops,
+ .init_seq_private = sock_hash_init_seq_private,
+ .seq_priv_size = sizeof(struct sock_hash_seq_info),
+};
+
static int sock_hash_map_btf_id;
const struct bpf_map_ops sock_hash_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_hash_alloc,
.map_free = sock_hash_free,
.map_get_next_key = sock_hash_get_next_key,
- .map_update_elem = sock_hash_update_elem,
+ .map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_hash_delete_elem,
.map_lookup_elem = sock_hash_lookup,
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
@@ -1230,6 +1451,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_shtab",
.map_btf_id = &sock_hash_map_btf_id,
+ .iter_seq_info = &sock_hash_iter_seq_info,
};
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
@@ -1340,3 +1562,62 @@ void sock_map_close(struct sock *sk, long timeout)
release_sock(sk);
saved_close(sk, timeout);
}
+
+static int sock_map_iter_attach_target(struct bpf_prog *prog,
+ union bpf_iter_link_info *linfo,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_map *map;
+ int err = -EINVAL;
+
+ if (!linfo->map.map_fd)
+ return -EBADF;
+
+ map = bpf_map_get_with_uref(linfo->map.map_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ if (map->map_type != BPF_MAP_TYPE_SOCKMAP &&
+ map->map_type != BPF_MAP_TYPE_SOCKHASH)
+ goto put_map;
+
+ if (prog->aux->max_rdonly_access > map->key_size) {
+ err = -EACCES;
+ goto put_map;
+ }
+
+ aux->map = map;
+ return 0;
+
+put_map:
+ bpf_map_put_with_uref(map);
+ return err;
+}
+
+static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux)
+{
+ bpf_map_put_with_uref(aux->map);
+}
+
+static struct bpf_iter_reg sock_map_iter_reg = {
+ .target = "sockmap",
+ .attach_target = sock_map_iter_attach_target,
+ .detach_target = sock_map_iter_detach_target,
+ .show_fdinfo = bpf_iter_map_show_fdinfo,
+ .fill_link_info = bpf_iter_map_fill_link_info,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__sockmap, key),
+ PTR_TO_RDONLY_BUF_OR_NULL },
+ { offsetof(struct bpf_iter__sockmap, sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+};
+
+static int __init bpf_sockmap_iter_init(void)
+{
+ sock_map_iter_reg.ctx_arg_info[1].btf_id =
+ btf_sock_ids[BTF_SOCK_TYPE_SOCK];
+ return bpf_iter_reg_target(&sock_map_iter_reg);
+}
+late_initcall(bpf_sockmap_iter_init);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 6ada114bbcca..d86d8d11cfe4 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -22,7 +22,7 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
-static int two __maybe_unused = 2;
+static int two = 2;
static int three = 3;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
@@ -546,7 +546,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = &two,
},
{
.procname = "devconf_inherit_init_net",
@@ -587,6 +587,19 @@ static struct ctl_table netns_core_table[] = {
{ }
};
+static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str)
+{
+ /* fallback tunnels for initns only */
+ if (!strncmp(str, "initns", 6))
+ sysctl_fb_tunnels_only_for_init_net = 1;
+ /* no fallback tunnels anywhere */
+ else if (!strncmp(str, "none", 4))
+ sysctl_fb_tunnels_only_for_init_net = 2;
+
+ return 1;
+}
+__setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
+
static __net_init int sysctl_core_net_init(struct net *net)
{
struct ctl_table *tbl;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 84dde5a2066e..16014ad19406 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1426,6 +1426,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
{
const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1];
+ int prio;
int err;
if (!ops)
@@ -1475,6 +1476,13 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
struct dcbnl_buffer *buffer =
nla_data(ieee[DCB_ATTR_DCB_BUFFER]);
+ for (prio = 0; prio < ARRAY_SIZE(buffer->prio2buffer); prio++) {
+ if (buffer->prio2buffer[prio] >= DCBX_MAX_BUFFERS) {
+ err = -EINVAL;
+ goto err;
+ }
+ }
+
err = ops->dcbnl_setbuffer(netdev, buffer);
if (err)
goto err;
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 0a72510d5de1..8f3dd3b1d2d0 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -274,7 +274,7 @@ void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
/**
* dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
* This routine is called when the peer acknowledges the receipt of Ack Vectors
- * up to and including @ackno. While based on on section A.3 of RFC 4340, here
+ * up to and including @ackno. While based on section A.3 of RFC 4340, here
* are additional precautions to prevent corrupted buffer state. In particular,
* we use tail_ackno to identify outdated records; it always marks the earliest
* packet of group (2) in 11.4.2.
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 9c28c8251125..bb3d70664dde 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -495,7 +495,8 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
rcu_read_lock();
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- rcu_dereference(ireq->ireq_opt));
+ rcu_dereference(ireq->ireq_opt),
+ inet_sk(sk)->tos);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -537,7 +538,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
local_bh_disable();
bh_lock_sock(ctl_sk);
err = ip_build_and_send_pkt(skb, ctl_sk,
- rxiph->daddr, rxiph->saddr, NULL);
+ rxiph->daddr, rxiph->saddr, NULL,
+ inet_sk(ctl_sk)->tos);
bh_unlock_sock(ctl_sk);
if (net_xmit_eval(err) == 0) {
@@ -731,7 +733,7 @@ int dccp_invalid_packet(struct sk_buff *skb)
return 1;
}
/*
- * If P.Data Offset is too too large for packet, drop packet and return
+ * If P.Data Offset is too large for packet, drop packet and return
*/
if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) {
DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff);
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 0e06dfc32273..a934d2932373 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -85,7 +85,7 @@ static void dccp_retransmit_timer(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
/*
- * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
+ * More than 4MSL (8 minutes) has passed, a RESET(aborted) was
* sent, no need to retransmit, this sock is dead.
*/
if (dccp_write_timeout(sk))
@@ -176,7 +176,6 @@ static void dccp_delack_timer(struct timer_list *t)
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
/* Try again later. */
- icsk->icsk_ack.blocked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer,
jiffies + TCP_DELACK_MIN);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 1ce9ba8cf545..2131bf2b3a67 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -225,6 +225,15 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, skb->dev);
+ if (unlikely(cpu_dp->ds->untag_bridge_pvid)) {
+ nskb = dsa_untag_bridge_pvid(skb);
+ if (!nskb) {
+ kfree_skb(skb);
+ return 0;
+ }
+ skb = nskb;
+ }
+
s = this_cpu_ptr(p->stats64);
u64_stats_update_begin(&s->syncp);
s->rx_packets++;
@@ -330,11 +339,7 @@ EXPORT_SYMBOL_GPL(call_dsa_notifiers);
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
{
- struct dsa_devlink_priv *dl_priv;
- struct dsa_switch *ds;
-
- dl_priv = devlink_priv(dl);
- ds = dl_priv->ds;
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
if (!ds->ops->devlink_param_get)
return -EOPNOTSUPP;
@@ -346,11 +351,7 @@ EXPORT_SYMBOL_GPL(dsa_devlink_param_get);
int dsa_devlink_param_set(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
{
- struct dsa_devlink_priv *dl_priv;
- struct dsa_switch *ds;
-
- dl_priv = devlink_priv(dl);
- ds = dl_priv->ds;
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
if (!ds->ops->devlink_param_set)
return -EOPNOTSUPP;
@@ -412,6 +413,36 @@ void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
}
EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister);
+struct devlink_region *
+dsa_devlink_region_create(struct dsa_switch *ds,
+ const struct devlink_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ return devlink_region_create(ds->devlink, ops, region_max_snapshots,
+ region_size);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_region_create);
+
+struct devlink_region *
+dsa_devlink_port_region_create(struct dsa_switch *ds,
+ int port,
+ const struct devlink_port_region_ops *ops,
+ u32 region_max_snapshots, u64 region_size)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+
+ return devlink_port_region_create(&dp->devlink_port, ops,
+ region_max_snapshots,
+ region_size);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_port_region_create);
+
+void dsa_devlink_region_destroy(struct devlink_region *region)
+{
+ devlink_region_destroy(region);
+}
+EXPORT_SYMBOL_GPL(dsa_devlink_region_destroy);
+
struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
{
if (!netdev || !dsa_slave_dev_check(netdev))
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index c0ffc7a2b65f..183003e45762 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -21,9 +21,6 @@
static DEFINE_MUTEX(dsa2_mutex);
LIST_HEAD(dsa_tree_list);
-static const struct devlink_ops dsa_devlink_ops = {
-};
-
struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
{
struct dsa_switch_tree *dst;
@@ -254,22 +251,11 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
static int dsa_port_setup(struct dsa_port *dp)
{
- struct dsa_switch *ds = dp->ds;
- struct dsa_switch_tree *dst = ds->dst;
- const unsigned char *id = (const unsigned char *)&dst->index;
- const unsigned char len = sizeof(dst->index);
struct devlink_port *dlp = &dp->devlink_port;
bool dsa_port_link_registered = false;
- bool devlink_port_registered = false;
- struct devlink_port_attrs attrs = {};
- struct devlink *dl = ds->devlink;
bool dsa_port_enabled = false;
int err = 0;
- attrs.phys.port_number = dp->index;
- memcpy(attrs.switch_id.id, id, len);
- attrs.switch_id.id_len = len;
-
if (dp->setup)
return 0;
@@ -278,14 +264,6 @@ static int dsa_port_setup(struct dsa_port *dp)
dsa_port_disable(dp);
break;
case DSA_PORT_TYPE_CPU:
- memset(dlp, 0, sizeof(*dlp));
- attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU;
- devlink_port_attrs_set(dlp, &attrs);
- err = devlink_port_register(dl, dlp, dp->index);
- if (err)
- break;
- devlink_port_registered = true;
-
err = dsa_port_link_register_of(dp);
if (err)
break;
@@ -298,14 +276,6 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_DSA:
- memset(dlp, 0, sizeof(*dlp));
- attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA;
- devlink_port_attrs_set(dlp, &attrs);
- err = devlink_port_register(dl, dlp, dp->index);
- if (err)
- break;
- devlink_port_registered = true;
-
err = dsa_port_link_register_of(dp);
if (err)
break;
@@ -318,14 +288,6 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_USER:
- memset(dlp, 0, sizeof(*dlp));
- attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
- devlink_port_attrs_set(dlp, &attrs);
- err = devlink_port_register(dl, dlp, dp->index);
- if (err)
- break;
- devlink_port_registered = true;
-
dp->mac = of_get_mac_address(dp->dn);
err = dsa_slave_create(dp);
if (err)
@@ -339,8 +301,6 @@ static int dsa_port_setup(struct dsa_port *dp)
dsa_port_disable(dp);
if (err && dsa_port_link_registered)
dsa_port_link_unregister_of(dp);
- if (err && devlink_port_registered)
- devlink_port_unregister(dlp);
if (err)
return err;
@@ -349,10 +309,50 @@ static int dsa_port_setup(struct dsa_port *dp)
return 0;
}
-static void dsa_port_teardown(struct dsa_port *dp)
+static int dsa_port_devlink_setup(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_switch_tree *dst = dp->ds->dst;
+ struct devlink_port_attrs attrs = {};
+ struct devlink *dl = dp->ds->devlink;
+ const unsigned char *id;
+ unsigned char len;
+ int err;
+
+ id = (const unsigned char *)&dst->index;
+ len = sizeof(dst->index);
+ attrs.phys.port_number = dp->index;
+ memcpy(attrs.switch_id.id, id, len);
+ attrs.switch_id.id_len = len;
+ memset(dlp, 0, sizeof(*dlp));
+
+ switch (dp->type) {
+ case DSA_PORT_TYPE_UNUSED:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED;
+ break;
+ case DSA_PORT_TYPE_CPU:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU;
+ break;
+ case DSA_PORT_TYPE_DSA:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA;
+ break;
+ case DSA_PORT_TYPE_USER:
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ break;
+ }
+
+ devlink_port_attrs_set(dlp, &attrs);
+ err = devlink_port_register(dl, dlp, dp->index);
+
+ if (!err)
+ dp->devlink_port_setup = true;
+
+ return err;
+}
+
+static void dsa_port_teardown(struct dsa_port *dp)
+{
if (!dp->setup)
return;
@@ -362,16 +362,13 @@ static void dsa_port_teardown(struct dsa_port *dp)
case DSA_PORT_TYPE_CPU:
dsa_port_disable(dp);
dsa_tag_driver_put(dp->tag_ops);
- devlink_port_unregister(dlp);
dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_DSA:
dsa_port_disable(dp);
- devlink_port_unregister(dlp);
dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
- devlink_port_unregister(dlp);
if (dp->slave) {
dsa_slave_destroy(dp->slave);
dp->slave = NULL;
@@ -382,9 +379,35 @@ static void dsa_port_teardown(struct dsa_port *dp)
dp->setup = false;
}
+static void dsa_port_devlink_teardown(struct dsa_port *dp)
+{
+ struct devlink_port *dlp = &dp->devlink_port;
+
+ if (dp->devlink_port_setup)
+ devlink_port_unregister(dlp);
+ dp->devlink_port_setup = false;
+}
+
+static int dsa_devlink_info_get(struct devlink *dl,
+ struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+
+ if (ds->ops->devlink_info_get)
+ return ds->ops->devlink_info_get(ds, req, extack);
+
+ return -EOPNOTSUPP;
+}
+
+static const struct devlink_ops dsa_devlink_ops = {
+ .info_get = dsa_devlink_info_get,
+};
+
static int dsa_switch_setup(struct dsa_switch *ds)
{
struct dsa_devlink_priv *dl_priv;
+ struct dsa_port *dp;
int err;
if (ds->setup)
@@ -410,9 +433,20 @@ static int dsa_switch_setup(struct dsa_switch *ds)
if (err)
goto free_devlink;
+ /* Setup devlink port instances now, so that the switch
+ * setup() can register regions etc, against the ports
+ */
+ list_for_each_entry(dp, &ds->dst->ports, list) {
+ if (dp->ds == ds) {
+ err = dsa_port_devlink_setup(dp);
+ if (err)
+ goto unregister_devlink_ports;
+ }
+ }
+
err = dsa_switch_register_notifier(ds);
if (err)
- goto unregister_devlink;
+ goto unregister_devlink_ports;
err = ds->ops->setup(ds);
if (err < 0)
@@ -440,7 +474,10 @@ static int dsa_switch_setup(struct dsa_switch *ds)
unregister_notifier:
dsa_switch_unregister_notifier(ds);
-unregister_devlink:
+unregister_devlink_ports:
+ list_for_each_entry(dp, &ds->dst->ports, list)
+ if (dp->ds == ds)
+ dsa_port_devlink_teardown(dp);
devlink_unregister(ds->devlink);
free_devlink:
devlink_free(ds->devlink);
@@ -451,6 +488,8 @@ free_devlink:
static void dsa_switch_teardown(struct dsa_switch *ds)
{
+ struct dsa_port *dp;
+
if (!ds->setup)
return;
@@ -463,6 +502,9 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
ds->ops->teardown(ds);
if (ds->devlink) {
+ list_for_each_entry(dp, &ds->dst->ports, list)
+ if (dp->ds == ds)
+ dsa_port_devlink_teardown(dp);
devlink_unregister(ds->devlink);
devlink_free(ds->devlink);
ds->devlink = NULL;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 1653e3377cb3..12998bf04e55 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -7,6 +7,7 @@
#ifndef __DSA_PRIV_H
#define __DSA_PRIV_H
+#include <linux/if_bridge.h>
#include <linux/phy.h>
#include <linux/netdevice.h>
#include <linux/netpoll.h>
@@ -164,8 +165,6 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct switchdev_trans *trans);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags);
-int dsa_port_vid_del(struct dsa_port *dp, u16 vid);
int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
@@ -196,6 +195,65 @@ dsa_slave_to_master(const struct net_device *dev)
return dp->cpu_dp->master;
}
+/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged
+ * frames as untagged, since the bridge will not untag them.
+ */
+static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
+{
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct net_device *br = dp->bridge_dev;
+ struct net_device *dev = skb->dev;
+ struct net_device *upper_dev;
+ u16 vid, pvid, proto;
+ int err;
+
+ if (!br || br_vlan_enabled(br))
+ return skb;
+
+ err = br_vlan_get_proto(br, &proto);
+ if (err)
+ return skb;
+
+ /* Move VLAN tag from data to hwaccel */
+ if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) {
+ skb = skb_vlan_untag(skb);
+ if (!skb)
+ return NULL;
+ }
+
+ if (!skb_vlan_tag_present(skb))
+ return skb;
+
+ vid = skb_vlan_tag_get_id(skb);
+
+ /* We already run under an RCU read-side critical section since
+ * we are called from netif_receive_skb_list_internal().
+ */
+ err = br_vlan_get_pvid_rcu(dev, &pvid);
+ if (err)
+ return skb;
+
+ if (vid != pvid)
+ return skb;
+
+ /* The sad part about attempting to untag from DSA is that we
+ * don't know, unless we check, if the skb will end up in
+ * the bridge's data path - br_allowed_ingress() - or not.
+ * For example, there might be an 8021q upper for the
+ * default_pvid of the bridge, which will steal VLAN-tagged traffic
+ * from the bridge's data path. This is a configuration that DSA
+ * supports because vlan_filtering is 0. In that case, we should
+ * definitely keep the tag, to make sure it keeps working.
+ */
+ upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid);
+ if (upper_dev)
+ return skb;
+
+ __vlan_hwaccel_clear_tag(skb);
+
+ return skb;
+}
+
/* switch.c */
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 61615ebc70e9..c91de041a91d 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -259,6 +259,18 @@ static void dsa_netdev_ops_set(struct net_device *dev,
dev->dsa_ptr->netdev_ops = ops;
}
+static void dsa_master_set_promiscuity(struct net_device *dev, int inc)
+{
+ const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops;
+
+ if (!ops->promisc_on_master)
+ return;
+
+ rtnl_lock();
+ dev_set_promiscuity(dev, inc);
+ rtnl_unlock();
+}
+
static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
char *buf)
{
@@ -314,9 +326,12 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
dev->dsa_ptr = cpu_dp;
lockdep_set_class(&dev->addr_list_lock,
&dsa_master_addr_list_lock_key);
+
+ dsa_master_set_promiscuity(dev, 1);
+
ret = dsa_master_ethtool_setup(dev);
if (ret)
- return ret;
+ goto out_err_reset_promisc;
dsa_netdev_ops_set(dev, &dsa_netdev_ops);
@@ -329,6 +344,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
out_err_ndo_teardown:
dsa_netdev_ops_set(dev, NULL);
dsa_master_ethtool_teardown(dev);
+out_err_reset_promisc:
+ dsa_master_set_promiscuity(dev, -1);
return ret;
}
@@ -338,6 +355,7 @@ void dsa_master_teardown(struct net_device *dev)
dsa_netdev_ops_set(dev, NULL);
dsa_master_ethtool_teardown(dev);
dsa_master_reset_mtu(dev);
+ dsa_master_set_promiscuity(dev, -1);
dev->dsa_ptr = NULL;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index e23ece229c7e..73569c9af3cc 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -193,11 +193,44 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
}
+/* Must be called under rcu_read_lock() */
static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
bool vlan_filtering)
{
struct dsa_switch *ds = dp->ds;
- int i;
+ int err, i;
+
+ /* VLAN awareness was off, so the question is "can we turn it on".
+ * We may have had 8021q uppers, those need to go. Make sure we don't
+ * enter an inconsistent state: deny changing the VLAN awareness state
+ * as long as we have 8021q uppers.
+ */
+ if (vlan_filtering && dsa_is_user_port(ds, dp->index)) {
+ struct net_device *upper_dev, *slave = dp->slave;
+ struct net_device *br = dp->bridge_dev;
+ struct list_head *iter;
+
+ netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) {
+ struct bridge_vlan_info br_info;
+ u16 vid;
+
+ if (!is_vlan_dev(upper_dev))
+ continue;
+
+ vid = vlan_dev_vlan_id(upper_dev);
+
+ /* br_vlan_get_info() returns -EINVAL or -ENOENT if the
+ * device, respectively the VID is not found, returning
+ * 0 means success, which is a failure for us here.
+ */
+ err = br_vlan_get_info(br, vid, &br_info);
+ if (err == 0) {
+ dev_err(ds->dev, "Must remove upper %s first\n",
+ upper_dev->name);
+ return false;
+ }
+ }
+ }
if (!ds->vlan_filtering_is_global)
return true;
@@ -232,28 +265,38 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct dsa_switch *ds = dp->ds;
int err;
- /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
- if (switchdev_trans_ph_prepare(trans))
- return 0;
+ if (switchdev_trans_ph_prepare(trans)) {
+ bool apply;
- if (!ds->ops->port_vlan_filtering)
- return 0;
+ if (!ds->ops->port_vlan_filtering)
+ return -EOPNOTSUPP;
- if (!dsa_port_can_apply_vlan_filtering(dp, vlan_filtering))
- return -EINVAL;
+ /* We are called from dsa_slave_switchdev_blocking_event(),
+ * which is not under rcu_read_lock(), unlike
+ * dsa_slave_switchdev_event().
+ */
+ rcu_read_lock();
+ apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering);
+ rcu_read_unlock();
+ if (!apply)
+ return -EINVAL;
+ }
if (dsa_port_is_vlan_filtering(dp) == vlan_filtering)
return 0;
- err = ds->ops->port_vlan_filtering(ds, dp->index,
- vlan_filtering);
+ err = ds->ops->port_vlan_filtering(ds, dp->index, vlan_filtering,
+ trans);
if (err)
return err;
- if (ds->vlan_filtering_is_global)
- ds->vlan_filtering = vlan_filtering;
- else
- dp->vlan_filtering = vlan_filtering;
+ if (switchdev_trans_ph_commit(trans)) {
+ if (ds->vlan_filtering_is_global)
+ ds->vlan_filtering = vlan_filtering;
+ else
+ dp->vlan_filtering = vlan_filtering;
+ }
+
return 0;
}
@@ -433,39 +476,6 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
}
-int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags)
-{
- struct switchdev_obj_port_vlan vlan = {
- .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .flags = flags,
- .vid_begin = vid,
- .vid_end = vid,
- };
- struct switchdev_trans trans;
- int err;
-
- trans.ph_prepare = true;
- err = dsa_port_vlan_add(dp, &vlan, &trans);
- if (err)
- return err;
-
- trans.ph_prepare = false;
- return dsa_port_vlan_add(dp, &vlan, &trans);
-}
-EXPORT_SYMBOL(dsa_port_vid_add);
-
-int dsa_port_vid_del(struct dsa_port *dp, u16 vid)
-{
- struct switchdev_obj_port_vlan vlan = {
- .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .vid_begin = vid,
- .vid_end = vid,
- };
-
- return dsa_port_vlan_del(dp, &vlan);
-}
-EXPORT_SYMBOL(dsa_port_vid_del);
-
static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
{
struct device_node *phy_dn;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9af1a2d0cec4..3bc5ca40c9fb 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -303,13 +303,36 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
return ret;
}
+/* Must be called under rcu_read_lock() */
+static int
+dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct net_device *upper_dev;
+ struct list_head *iter;
+
+ netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) {
+ u16 vid;
+
+ if (!is_vlan_dev(upper_dev))
+ continue;
+
+ vid = vlan_dev_vlan_id(upper_dev);
+ if (vid >= vlan->vid_begin && vid <= vlan->vid_end)
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
static int dsa_slave_vlan_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
{
+ struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
struct switchdev_obj_port_vlan vlan;
- int err;
+ int vid, err;
if (obj->orig_dev != dev)
return -EOPNOTSUPP;
@@ -319,6 +342,17 @@ static int dsa_slave_vlan_add(struct net_device *dev,
vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
+ /* Deny adding a bridge VLAN when there is already an 802.1Q upper with
+ * the same VID.
+ */
+ if (trans->ph_prepare && br_vlan_enabled(dp->bridge_dev)) {
+ rcu_read_lock();
+ err = dsa_slave_vlan_check_for_8021q_uppers(dev, &vlan);
+ rcu_read_unlock();
+ if (err)
+ return err;
+ }
+
err = dsa_port_vlan_add(dp, &vlan, trans);
if (err)
return err;
@@ -333,6 +367,12 @@ static int dsa_slave_vlan_add(struct net_device *dev,
if (err)
return err;
+ for (vid = vlan.vid_begin; vid <= vlan.vid_end; vid++) {
+ err = vlan_vid_add(master, htons(ETH_P_8021Q), vid);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -376,7 +416,10 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
static int dsa_slave_vlan_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
+ struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct switchdev_obj_port_vlan *vlan;
+ int vid, err;
if (obj->orig_dev != dev)
return -EOPNOTSUPP;
@@ -384,10 +427,19 @@ static int dsa_slave_vlan_del(struct net_device *dev,
if (dsa_port_skip_vlan_configuration(dp))
return 0;
+ vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+
/* Do not deprogram the CPU port as it may be shared with other user
* ports which can be members of this VLAN as well.
*/
- return dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
+ err = dsa_port_vlan_del(dp, vlan);
+ if (err)
+ return err;
+
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++)
+ vlan_vid_del(master, htons(ETH_P_8021Q), vid);
+
+ return 0;
}
static int dsa_slave_port_obj_del(struct net_device *dev,
@@ -1169,28 +1221,9 @@ static void dsa_slave_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
struct dsa_slave_priv *p = netdev_priv(dev);
- struct pcpu_sw_netstats *s;
- unsigned int start;
- int i;
netdev_stats_to_stats64(stats, &dev->stats);
- for_each_possible_cpu(i) {
- u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
-
- s = per_cpu_ptr(p->stats64, i);
- do {
- start = u64_stats_fetch_begin_irq(&s->syncp);
- tx_packets = s->tx_packets;
- tx_bytes = s->tx_bytes;
- rx_packets = s->rx_packets;
- rx_bytes = s->rx_bytes;
- } while (u64_stats_fetch_retry_irq(&s->syncp, start));
-
- stats->tx_packets += tx_packets;
- stats->tx_bytes += tx_bytes;
- stats->rx_packets += rx_packets;
- stats->rx_bytes += rx_bytes;
- }
+ dev_fetch_sw_netstats(stats, p->stats64);
}
static int dsa_slave_get_rxnfc(struct net_device *dev,
@@ -1232,64 +1265,66 @@ static int dsa_slave_get_ts_info(struct net_device *dev,
static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
u16 vid)
{
+ struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- struct bridge_vlan_info info;
+ struct switchdev_obj_port_vlan vlan = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .vid_begin = vid,
+ .vid_end = vid,
+ /* This API only allows programming tagged, non-PVID VIDs */
+ .flags = 0,
+ };
+ struct switchdev_trans trans;
int ret;
- /* Check for a possible bridge VLAN entry now since there is no
- * need to emulate the switchdev prepare + commit phase.
- */
- if (dp->bridge_dev) {
- if (dsa_port_skip_vlan_configuration(dp))
- return 0;
+ /* User port... */
+ trans.ph_prepare = true;
+ ret = dsa_port_vlan_add(dp, &vlan, &trans);
+ if (ret)
+ return ret;
- /* br_vlan_get_info() returns -EINVAL or -ENOENT if the
- * device, respectively the VID is not found, returning
- * 0 means success, which is a failure for us here.