Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetoot...
authorDavid S. Miller <davem@davemloft.net>
Mon, 26 Sep 2016 03:52:22 +0000 (23:52 -0400)
committerDavid S. Miller <davem@davemloft.net>
Mon, 26 Sep 2016 03:52:22 +0000 (23:52 -0400)
Johan Hedberg says:

====================
pull request: bluetooth-next 2016-09-25

Here are a few more Bluetooth & 802.15.4 patches for the 4.9 kernel that
have popped up during the past week:

 - New USB ID for QCA_ROME Bluetooth device
 - NULL pointer dereference fix for Bluetooth mgmt sockets
 - Fixes for BCSP driver
 - Fix for updating LE scan response

Please let me know if there are any issues pulling. Thanks.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
482 files changed:
Documentation/devicetree/bindings/mmc/sdhci-st.txt
Documentation/devicetree/bindings/net/ethernet.txt
Documentation/devicetree/bindings/net/mediatek-net.txt
Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
Documentation/media/uapi/cec/cec-ioc-dqevent.rst
MAINTAINERS
Makefile
arch/alpha/include/asm/uaccess.h
arch/arc/include/asm/uaccess.h
arch/arm/boot/dts/bcm2835-rpi.dtsi
arch/arm/boot/dts/bcm283x.dtsi
arch/arm/boot/dts/stih407-family.dtsi
arch/arm/boot/dts/stih410.dtsi
arch/arm/common/locomo.c
arch/arm/common/sa1111.c
arch/arm/configs/keystone_defconfig
arch/arm/configs/multi_v7_defconfig
arch/arm/crypto/aes-ce-glue.c
arch/arm/include/asm/pgtable-2level-hwdef.h
arch/arm/include/asm/pgtable-3level-hwdef.h
arch/arm/kvm/arm.c
arch/arm/kvm/mmu.c
arch/arm/mach-exynos/suspend.c
arch/arm/mach-pxa/lubbock.c
arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
arch/arm/mm/mmu.c
arch/arm/xen/enlighten.c
arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
arch/arm64/boot/dts/apm/apm-storm.dtsi
arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi [new symlink]
arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts
arch/arm64/boot/dts/broadcom/bcm2837.dtsi
arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi [new symlink]
arch/arm64/boot/dts/broadcom/bcm283x.dtsi [new symlink]
arch/arm64/boot/dts/broadcom/ns2.dtsi
arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
arch/arm64/boot/dts/exynos/exynos7.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
arch/arm64/boot/dts/marvell/armada-ap806.dtsi
arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi
arch/arm64/boot/dts/xilinx/zynqmp.dtsi
arch/arm64/crypto/aes-glue.c
arch/avr32/include/asm/uaccess.h
arch/avr32/kernel/avr32_ksyms.c
arch/avr32/lib/copy_user.S
arch/blackfin/include/asm/uaccess.h
arch/cris/include/asm/uaccess.h
arch/frv/include/asm/uaccess.h
arch/hexagon/include/asm/uaccess.h
arch/ia64/include/asm/uaccess.h
arch/m32r/include/asm/uaccess.h
arch/metag/include/asm/uaccess.h
arch/microblaze/include/asm/uaccess.h
arch/mips/include/asm/uaccess.h
arch/mn10300/include/asm/uaccess.h
arch/mn10300/lib/usercopy.c
arch/nios2/include/asm/uaccess.h
arch/openrisc/include/asm/uaccess.h
arch/parisc/include/asm/uaccess.h
arch/powerpc/include/asm/cpu_has_feature.h
arch/powerpc/include/asm/uaccess.h
arch/powerpc/kernel/idle_book3s.S
arch/powerpc/platforms/powernv/pci-ioda.c
arch/s390/include/asm/uaccess.h
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/vsie.c
arch/score/include/asm/uaccess.h
arch/sh/include/asm/uaccess.h
arch/sh/include/asm/uaccess_64.h
arch/sparc/include/asm/uaccess_32.h
arch/x86/boot/compressed/eboot.c
arch/x86/events/amd/core.c
arch/x86/events/amd/uncore.c
arch/x86/events/intel/bts.c
arch/x86/events/intel/core.c
arch/x86/events/intel/cqm.c
arch/x86/events/intel/ds.c
arch/x86/events/intel/pt.c
arch/x86/include/asm/uaccess.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/kvmclock.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/pmu_amd.c
arch/x86/kvm/x86.c
arch/x86/pci/fixup.c
crypto/blkcipher.c
crypto/cryptd.c
crypto/echainiv.c
drivers/base/power/runtime.c
drivers/clk/sunxi-ng/ccu-sun8i-h3.c
drivers/clk/sunxi-ng/ccu_nk.c
drivers/clk/sunxi/clk-a10-pll2.c
drivers/clk/sunxi/clk-sun8i-mbus.c
drivers/firmware/efi/efi.c
drivers/firmware/efi/libstub/efi-stub-helper.c
drivers/firmware/efi/libstub/fdt.c
drivers/firmware/efi/libstub/random.c
drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
drivers/gpu/drm/drm_ioc32.c
drivers/gpu/drm/exynos/exynos_drm_fb.c
drivers/gpu/drm/exynos/exynos_drm_fimc.c
drivers/gpu/drm/exynos/exynos_drm_g2d.c
drivers/gpu/drm/exynos/exynos_drm_gsc.c
drivers/gpu/drm/exynos/exynos_drm_rotator.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_gem_gtt.c
drivers/gpu/drm/i915/i915_vgpu.c
drivers/gpu/drm/i915/intel_dvo.c
drivers/gpu/drm/i915/intel_opregion.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_psr.c
drivers/gpu/drm/vc4/vc4_bo.c
drivers/gpu/drm/vc4/vc4_validate_shaders.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mcg.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/sw/rdmavt/mr.c
drivers/infiniband/sw/rxe/rxe.c
drivers/infiniband/sw/rxe/rxe_comp.c
drivers/infiniband/sw/rxe/rxe_net.c
drivers/infiniband/sw/rxe/rxe_net.h
drivers/infiniband/sw/rxe/rxe_recv.c
drivers/infiniband/sw/rxe/rxe_req.c
drivers/infiniband/sw/rxe/rxe_resp.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/irqchip/irq-atmel-aic.c
drivers/irqchip/irq-atmel-aic5.c
drivers/md/md.c
drivers/md/raid5-cache.c
drivers/md/raid5.c
drivers/media/cec-edid.c
drivers/media/pci/cx23885/cx23885-417.c
drivers/media/pci/saa7134/saa7134-dvb.c
drivers/media/pci/saa7134/saa7134-empress.c
drivers/media/platform/Kconfig
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h
drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
drivers/media/platform/rcar-fcp.c
drivers/mmc/host/omap.c
drivers/mmc/host/omap_hsmmc.c
drivers/mmc/host/sdhci-st.c
drivers/net/can/flexcan.c
drivers/net/can/ifi_canfd/ifi_canfd.c
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.h
drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
drivers/net/ethernet/broadcom/bnx2.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
drivers/net/ethernet/brocade/bna/bnad_ethtool.c
drivers/net/ethernet/cavium/thunder/nic.h
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/ibm/emac/core.c
drivers/net/ethernet/intel/fm10k/fm10k.h
drivers/net/ethernet/intel/fm10k/fm10k_iov.c
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
drivers/net/ethernet/intel/i40evf/i40e_common.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.h
drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
drivers/net/ethernet/intel/i40evf/i40evf.h
drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mediatek/mtk_eth_soc.h
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/eq.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/fw.h
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
drivers/net/ethernet/sfc/mcdi.c
drivers/net/ethernet/sfc/sriov.c
drivers/net/ethernet/sfc/sriov.h
drivers/net/ethernet/smsc/smc91x.c
drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/phy/mdio-xgene.c
drivers/net/usb/r8152.c
drivers/net/wireless/intel/iwlwifi/mvm/tx.c
drivers/net/xen-netback/common.h
drivers/net/xen-netback/hash.c
drivers/net/xen-netback/interface.c
drivers/net/xen-netback/netback.c
drivers/net/xen-netback/xenbus.c
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/pci/remove.c
drivers/pcmcia/ds.c
drivers/pcmcia/pxa2xx_base.c
drivers/pcmcia/pxa2xx_base.h
drivers/pcmcia/sa1111_badge4.c
drivers/pcmcia/sa1111_generic.c
drivers/pcmcia/sa1111_jornada720.c
drivers/pcmcia/sa1111_lubbock.c
drivers/pcmcia/sa1111_neponset.c
drivers/pcmcia/sa11xx_base.c
drivers/pcmcia/soc_common.c
drivers/rapidio/rio_cm.c
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3_main.c
drivers/s390/net/qeth_l3_sys.c
drivers/staging/media/cec/TODO
drivers/staging/media/cec/cec-adap.c
drivers/staging/media/cec/cec-api.c
drivers/staging/media/cec/cec-core.c
drivers/staging/media/pulse8-cec/pulse8-cec.c
drivers/usb/core/config.c
drivers/usb/musb/Kconfig
drivers/usb/serial/usb-serial-simple.c
fs/aio.c
fs/autofs4/expire.c
fs/cifs/cifsfs.c
fs/cifs/cifsproto.h
fs/cifs/connect.c
fs/ioctl.c
fs/nfs/file.c
fs/nfs/nfs4proc.c
fs/nfs/pnfs.c
fs/notify/fanotify/fanotify.c
fs/notify/fanotify/fanotify_user.c
fs/notify/group.c
fs/notify/notification.c
fs/ocfs2/alloc.c
fs/ocfs2/cluster/tcp_internal.h
fs/ocfs2/dlm/dlmconvert.c
fs/ocfs2/file.c
fs/ocfs2/suballoc.c
fs/proc/kcore.c
fs/ramfs/file-mmu.c
include/asm-generic/uaccess.h
include/linux/cec-funcs.h
include/linux/cec.h
include/linux/compiler.h
include/linux/cpuhotplug.h
include/linux/efi.h
include/linux/fsnotify_backend.h
include/linux/if_link.h
include/linux/irq.h
include/linux/ktime.h
include/linux/mlx4/cmd.h
include/linux/mlx4/device.h
include/linux/mlx4/qp.h
include/linux/netdevice.h
include/linux/netfilter.h
include/linux/netfilter/nf_conntrack_common.h
include/linux/netfilter/nf_conntrack_proto_gre.h
include/linux/netfilter_ingress.h
include/linux/pagemap.h
include/linux/phy.h
include/linux/uio.h
include/media/cec.h
include/net/dsa.h
include/net/netfilter/br_netfilter.h
include/net/netfilter/nf_conntrack_l3proto.h
include/net/netfilter/nf_conntrack_synproxy.h
include/net/netfilter/nf_log.h
include/net/netfilter/nf_queue.h
include/net/netfilter/nf_tables.h
include/net/netfilter/nf_tables_bridge.h [deleted file]
include/net/netfilter/nf_tables_core.h
include/net/netfilter/nf_tables_ipv4.h
include/net/netfilter/nf_tables_ipv6.h
include/net/netns/netfilter.h
include/net/sctp/sm.h
include/net/sock.h
include/net/tc_act/tc_vlan.h
include/net/xfrm.h
include/rxrpc/packet.h
include/trace/events/rxrpc.h
include/uapi/linux/bpf.h
include/uapi/linux/if_link.h
include/uapi/linux/if_tunnel.h
include/uapi/linux/netfilter/nf_log.h [new file with mode: 0644]
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/netfilter/nfnetlink_conntrack.h
include/uapi/linux/netfilter/xt_hashlimit.h
include/uapi/linux/pkt_sched.h
include/uapi/linux/xfrm.h
kernel/cgroup.c
kernel/events/core.c
kernel/events/ring_buffer.c
kernel/sched/core.c
lib/iov_iter.c
mm/debug.c
mm/khugepaged.c
mm/memcontrol.c
mm/memory_hotplug.c
mm/page_io.c
mm/swapfile.c
mm/usercopy.c
net/batman-adv/bat_v_elp.c
net/batman-adv/routing.c
net/bridge/br_netfilter_hooks.c
net/bridge/br_netfilter_ipv6.c
net/bridge/netfilter/ebt_log.c
net/bridge/netfilter/ebt_redirect.c
net/bridge/netfilter/ebtables.c
net/bridge/netfilter/nf_tables_bridge.c
net/bridge/netfilter/nft_reject_bridge.c
net/core/dev.c
net/core/filter.c
net/core/rtnetlink.c
net/core/sock.c
net/dsa/slave.c
net/ipv4/ip_input.c
net/ipv4/ip_vti.c
net/ipv4/ipmr.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
net/ipv4/netfilter/nf_log_arp.c
net/ipv4/netfilter/nf_log_ipv4.c
net/ipv4/netfilter/nf_nat_proto_gre.c
net/ipv4/netfilter/nf_tables_arp.c
net/ipv4/netfilter/nf_tables_ipv4.c
net/ipv4/netfilter/nft_chain_route_ipv4.c
net/ipv4/route.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_vti.c
net/ipv6/ip6mr.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
net/ipv6/netfilter/nf_log_ipv6.c
net/ipv6/netfilter/nf_tables_ipv6.c
net/ipv6/netfilter/nft_chain_route_ipv6.c
net/ipv6/route.c
net/ipv6/xfrm6_input.c
net/ipv6/xfrm6_tunnel.c
net/irda/af_irda.c
net/mac80211/agg-rx.c
net/mac80211/agg-tx.c
net/mac80211/mesh_hwmp.c
net/mac80211/mesh_pathtbl.c
net/mac80211/sta_info.c
net/mac80211/tx.c
net/netfilter/Makefile
net/netfilter/core.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_ftp.c
net/netfilter/nf_conntrack_h323_main.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_conntrack_seqadj.c
net/netfilter/nf_conntrack_sip.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_internals.h
net/netfilter/nf_log_common.c
net/netfilter/nf_nat_core.c
net/netfilter/nf_queue.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_core.c
net/netfilter/nf_tables_inet.c
net/netfilter/nf_tables_netdev.c
net/netfilter/nf_tables_trace.c
net/netfilter/nfnetlink_cthelper.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue.c
net/netfilter/nft_bitwise.c
net/netfilter/nft_byteorder.c
net/netfilter/nft_cmp.c
net/netfilter/nft_ct.c
net/netfilter/nft_dynset.c
net/netfilter/nft_exthdr.c
net/netfilter/nft_hash.c
net/netfilter/nft_immediate.c
net/netfilter/nft_log.c
net/netfilter/nft_lookup.c
net/netfilter/nft_meta.c
net/netfilter/nft_numgen.c
net/netfilter/nft_payload.c
net/netfilter/nft_queue.c
net/netfilter/nft_quota.c
net/netfilter/nft_range.c [new file with mode: 0644]
net/netfilter/xt_RATEEST.c
net/netfilter/xt_TCPMSS.c
net/netfilter/xt_TEE.c
net/netfilter/xt_connlimit.c
net/netfilter/xt_hashlimit.c
net/netfilter/xt_helper.c
net/netfilter/xt_recent.c
net/rxrpc/ar-internal.h
net/rxrpc/call_event.c
net/rxrpc/call_object.c
net/rxrpc/conn_event.c
net/rxrpc/conn_object.c
net/rxrpc/input.c
net/rxrpc/misc.c
net/rxrpc/output.c
net/rxrpc/peer_event.c
net/rxrpc/peer_object.c
net/rxrpc/recvmsg.c
net/rxrpc/rxkad.c
net/rxrpc/sendmsg.c
net/rxrpc/sysctl.c
net/sched/cls_route.c
net/sched/sch_fq.c
net/sctp/input.c
net/sctp/outqueue.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c
net/wireless/nl80211.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
scripts/faddr2line [new file with mode: 0755]
tools/lguest/lguest.c

index 88faa91..3cd4c43 100644 (file)
@@ -10,7 +10,7 @@ Required properties:
                        subsystem (mmcss) inside the FlashSS (available in STiH407 SoC
                        family).
 
-- clock-names:         Should be "mmc".
+- clock-names:         Should be "mmc" and "icn".  (NB: The latter is not compulsory)
                        See: Documentation/devicetree/bindings/resource-names.txt
 - clocks:              Phandle to the clock.
                        See: Documentation/devicetree/bindings/clock/clock-bindings.txt
index 5d88f37..e1d7681 100644 (file)
@@ -11,8 +11,8 @@ The following properties are common to the Ethernet controllers:
   the maximum frame size (there's contradiction in ePAPR).
 - phy-mode: string, operation mode of the PHY interface; supported values are
   "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id",
-  "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto
-  standard property;
+  "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii", "trgmii"; this is now a
+  de-facto standard property;
 - phy-connection-type: the same as "phy-mode" property but described in ePAPR;
 - phy-handle: phandle, specifies a reference to a node representing a PHY
   device; this property is described in ePAPR and so preferred;
index 6103e55..f095257 100644 (file)
@@ -31,7 +31,10 @@ Optional properties:
 Required properties:
 - compatible: Should be "mediatek,eth-mac"
 - reg: The number of the MAC
-- phy-handle: see ethernet.txt file in the same directory.
+- phy-handle: see ethernet.txt file in the same directory and
+       the phy-mode "trgmii" required being provided when reg
+       is equal to 0 and the MAC uses fixed-link to connect
+       with internal switch such as MT7530.
 
 Example:
 
index 04ee900..201d483 100644 (file)
@@ -144,7 +144,7 @@ logical address types are already defined will return with error ``EBUSY``.
 
        -  ``flags``
 
-       -  Flags. No flags are defined yet, so set this to 0.
+       -  Flags. See :ref:`cec-log-addrs-flags` for a list of available flags.
 
     -  .. row 7
 
@@ -201,6 +201,25 @@ logical address types are already defined will return with error ``EBUSY``.
           give the CEC framework more information about the device type, even
           though the framework won't use it directly in the CEC message.
 
+.. _cec-log-addrs-flags:
+
+.. flat-table:: Flags for struct cec_log_addrs
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       3 1 4
+
+
+    -  .. _`CEC-LOG-ADDRS-FL-ALLOW-UNREG-FALLBACK`:
+
+       -  ``CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK``
+
+       -  1
+
+       -  By default if no logical address of the requested type can be claimed, then
+         it will go back to the unconfigured state. If this flag is set, then it will
+         fallback to the Unregistered logical address. Note that if the Unregistered
+         logical address was explicitly requested, then this flag has no effect.
+
 .. _cec-versions:
 
 .. flat-table:: CEC Versions
index 7a6d6d0..2e1e739 100644 (file)
@@ -64,7 +64,8 @@ it is guaranteed that the state did change in between the two events.
 
        -  ``phys_addr``
 
-       -  The current physical address.
+       -  The current physical address. This is ``CEC_PHYS_ADDR_INVALID`` if no
+          valid physical address is set.
 
     -  .. row 2
 
@@ -72,7 +73,10 @@ it is guaranteed that the state did change in between the two events.
 
        -  ``log_addr_mask``
 
-       -  The current set of claimed logical addresses.
+       -  The current set of claimed logical addresses. This is 0 if no logical
+          addresses are claimed or if ``phys_addr`` is ``CEC_PHYS_ADDR_INVALID``.
+         If bit 15 is set (``1 << CEC_LOG_ADDR_UNREGISTERED``) then this device
+         has the unregistered logical address. In that case all other bits are 0.
 
 
 
index ce80b36..efd69c7 100644 (file)
@@ -1634,6 +1634,7 @@ N:        rockchip
 ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
 M:     Kukjin Kim <kgene@kernel.org>
 M:     Krzysztof Kozlowski <krzk@kernel.org>
+R:     Javier Martinez Canillas <javier@osg.samsung.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:     Maintained
@@ -2509,7 +2510,7 @@ S:        Supported
 F:     kernel/bpf/
 
 BROADCOM B44 10/100 ETHERNET DRIVER
-M:     Gary Zambrano <zambrano@broadcom.com>
+M:     Michael Chan <michael.chan@broadcom.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/broadcom/b44.*
@@ -6111,7 +6112,7 @@ S:        Supported
 F:     drivers/cpufreq/intel_pstate.c
 
 INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
-M:     Maik Broemme <mbroemme@plusserver.de>
+M:     Maik Broemme <mbroemme@libmpq.org>
 L:     linux-fbdev@vger.kernel.org
 S:     Maintained
 F:     Documentation/fb/intelfb.txt
@@ -8169,6 +8170,15 @@ S:       Maintained
 W:     https://fedorahosted.org/dropwatch/
 F:     net/core/drop_monitor.c
 
+NETWORKING [DSA]
+M:     Andrew Lunn <andrew@lunn.ch>
+M:     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+M:     Florian Fainelli <f.fainelli@gmail.com>
+S:     Maintained
+F:     net/dsa/
+F:     include/net/dsa.h
+F:     drivers/net/dsa/
+
 NETWORKING [GENERAL]
 M:     "David S. Miller" <davem@davemloft.net>
 L:     netdev@vger.kernel.org
@@ -12585,7 +12595,7 @@ F:      include/linux/if_*vlan.h
 F:     net/8021q/
 
 VLYNQ BUS
-M:     Florian Fainelli <florian@openwrt.org>
+M:     Florian Fainelli <f.fainelli@gmail.com>
 L:     openwrt-devel@lists.openwrt.org (subscribers-only)
 S:     Maintained
 F:     drivers/vlynq/vlynq.c
index 1a8c8dd..74e22c2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 8
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
index c419b43..466e42e 100644 (file)
@@ -371,14 +371,6 @@ __copy_tofrom_user_nocheck(void *to, const void *from, long len)
        return __cu_len;
 }
 
-extern inline long
-__copy_tofrom_user(void *to, const void *from, long len, const void __user *validate)
-{
-       if (__access_ok((unsigned long)validate, len, get_fs()))
-               len = __copy_tofrom_user_nocheck(to, from, len);
-       return len;
-}
-
 #define __copy_to_user(to, from, n)                                    \
 ({                                                                     \
        __chk_user_ptr(to);                                             \
@@ -393,17 +385,22 @@ __copy_tofrom_user(void *to, const void *from, long len, const void __user *vali
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
 
-
 extern inline long
 copy_to_user(void __user *to, const void *from, long n)
 {
-       return __copy_tofrom_user((__force void *)to, from, n, to);
+       if (likely(__access_ok((unsigned long)to, n, get_fs())))
+               n = __copy_tofrom_user_nocheck((__force void *)to, from, n);
+       return n;
 }
 
 extern inline long
 copy_from_user(void *to, const void __user *from, long n)
 {
-       return __copy_tofrom_user(to, (__force void *)from, n, from);
+       if (likely(__access_ok((unsigned long)from, n, get_fs())))
+               n = __copy_tofrom_user_nocheck(to, (__force void *)from, n);
+       else
+               memset(to, 0, n);
+       return n;
 }
 
 extern void __do_clear_user(void);
index a78d567..41faf17 100644 (file)
        "2:     ;nop\n"                         \
        "       .section .fixup, \"ax\"\n"      \
        "       .align 4\n"                     \
-       "3:     mov %0, %3\n"                   \
+       "3:     # return -EFAULT\n"             \
+       "       mov %0, %3\n"                   \
+       "       # zero out dst ptr\n"           \
+       "       mov %1,  0\n"                   \
        "       j   2b\n"                       \
        "       .previous\n"                    \
        "       .section __ex_table, \"a\"\n"   \
        "2:     ;nop\n"                         \
        "       .section .fixup, \"ax\"\n"      \
        "       .align 4\n"                     \
-       "3:     mov %0, %3\n"                   \
+       "3:     # return -EFAULT\n"             \
+       "       mov %0, %3\n"                   \
+       "       # zero out dst ptr\n"           \
+       "       mov %1,  0\n"                   \
+       "       mov %R1, 0\n"                   \
        "       j   2b\n"                       \
        "       .previous\n"                    \
        "       .section __ex_table, \"a\"\n"   \
index caf2707..e9b47b2 100644 (file)
@@ -2,6 +2,7 @@
 
 / {
        memory {
+               device_type = "memory";
                reg = <0 0x10000000>;
        };
 
index b982522..445624a 100644 (file)
@@ -2,7 +2,6 @@
 #include <dt-bindings/clock/bcm2835.h>
 #include <dt-bindings/clock/bcm2835-aux.h>
 #include <dt-bindings/gpio/gpio.h>
-#include "skeleton.dtsi"
 
 /* This include file covers the common peripherals and configuration between
  * bcm2835 and bcm2836 implementations, leaving the CPU configuration to
@@ -13,6 +12,8 @@
        compatible = "brcm,bcm2835";
        model = "BCM2835";
        interrupt-parent = <&intc>;
+       #address-cells = <1>;
+       #size-cells = <1>;
 
        chosen {
                bootargs = "earlyprintk console=ttyAMA0";
index d294e82..8b063ab 100644 (file)
                        interrupt-names = "mmcirq";
                        pinctrl-names = "default";
                        pinctrl-0 = <&pinctrl_mmc0>;
-                       clock-names = "mmc";
-                       clocks = <&clk_s_c0_flexgen CLK_MMC_0>;
+                       clock-names = "mmc", "icn";
+                       clocks = <&clk_s_c0_flexgen CLK_MMC_0>,
+                                <&clk_s_c0_flexgen CLK_RX_ICN_HVA>;
                        bus-width = <8>;
                        non-removable;
                };
                        interrupt-names = "mmcirq";
                        pinctrl-names = "default";
                        pinctrl-0 = <&pinctrl_sd1>;
-                       clock-names = "mmc";
-                       clocks = <&clk_s_c0_flexgen CLK_MMC_1>;
+                       clock-names = "mmc", "icn";
+                       clocks = <&clk_s_c0_flexgen CLK_MMC_1>,
+                                <&clk_s_c0_flexgen CLK_RX_ICN_HVA>;
                        resets = <&softreset STIH407_MMC1_SOFTRESET>;
                        bus-width = <4>;
                };
index 18ed1ad..4031886 100644 (file)
@@ -41,7 +41,8 @@
                        compatible = "st,st-ohci-300x";
                        reg = <0x9a03c00 0x100>;
                        interrupts = <GIC_SPI 180 IRQ_TYPE_NONE>;
-                       clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
+                       clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
+                                <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
                        resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>,
                                 <&softreset STIH407_USB2_PORT0_SOFTRESET>;
                        reset-names = "power", "softreset";
@@ -57,7 +58,8 @@
                        interrupts = <GIC_SPI 151 IRQ_TYPE_NONE>;
                        pinctrl-names = "default";
                        pinctrl-0 = <&pinctrl_usb0>;
-                       clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
+                       clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
+                                <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
                        resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>,
                                 <&softreset STIH407_USB2_PORT0_SOFTRESET>;
                        reset-names = "power", "softreset";
@@ -71,7 +73,8 @@
                        compatible = "st,st-ohci-300x";
                        reg = <0x9a83c00 0x100>;
                        interrupts = <GIC_SPI 181 IRQ_TYPE_NONE>;
-                       clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
+                       clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
+                                <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
                        resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>,
                                 <&softreset STIH407_USB2_PORT1_SOFTRESET>;
                        reset-names = "power", "softreset";
@@ -87,7 +90,8 @@
                        interrupts = <GIC_SPI 153 IRQ_TYPE_NONE>;
                        pinctrl-names = "default";
                        pinctrl-0 = <&pinctrl_usb1>;
-                       clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
+                       clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
+                                <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
                        resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>,
                                 <&softreset STIH407_USB2_PORT1_SOFTRESET>;
                        reset-names = "power", "softreset";
index 0e97b4b..6c7b068 100644 (file)
@@ -140,7 +140,7 @@ static struct locomo_dev_info locomo_devices[] = {
 
 static void locomo_handler(struct irq_desc *desc)
 {
-       struct locomo *lchip = irq_desc_get_chip_data(desc);
+       struct locomo *lchip = irq_desc_get_handler_data(desc);
        int req, i;
 
        /* Acknowledge the parent IRQ */
@@ -200,8 +200,7 @@ static void locomo_setup_irq(struct locomo *lchip)
         * Install handler for IRQ_LOCOMO_HW.
         */
        irq_set_irq_type(lchip->irq, IRQ_TYPE_EDGE_FALLING);
-       irq_set_chip_data(lchip->irq, lchip);
-       irq_set_chained_handler(lchip->irq, locomo_handler);
+       irq_set_chained_handler_and_data(lchip->irq, locomo_handler, lchip);
 
        /* Install handlers for IRQ_LOCOMO_* */
        for ( ; irq <= lchip->irq_base + 3; irq++) {
index fb0a0a4..2e076c4 100644 (file)
@@ -472,8 +472,8 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
         * specifies that S0ReadyInt and S1ReadyInt should be '1'.
         */
        sa1111_writel(0, irqbase + SA1111_INTPOL0);
-       sa1111_writel(SA1111_IRQMASK_HI(IRQ_S0_READY_NINT) |
-                     SA1111_IRQMASK_HI(IRQ_S1_READY_NINT),
+       sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) |
+                     BIT(IRQ_S1_READY_NINT & 31),
                      irqbase + SA1111_INTPOL1);
 
        /* clear all IRQs */
@@ -754,7 +754,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
        if (sachip->irq != NO_IRQ) {
                ret = sa1111_setup_irq(sachip, pd->irq_base);
                if (ret)
-                       goto err_unmap;
+                       goto err_clk;
        }
 
 #ifdef CONFIG_ARCH_SA1100
@@ -799,6 +799,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
 
        return 0;
 
+ err_clk:
+       clk_disable(sachip->clk);
  err_unmap:
        iounmap(sachip->base);
  err_clk_unprep:
@@ -869,9 +871,9 @@ struct sa1111_save_data {
 
 #ifdef CONFIG_PM
 
-static int sa1111_suspend(struct platform_device *dev, pm_message_t state)
+static int sa1111_suspend_noirq(struct device *dev)
 {
-       struct sa1111 *sachip = platform_get_drvdata(dev);
+       struct sa1111 *sachip = dev_get_drvdata(dev);
        struct sa1111_save_data *save;
        unsigned long flags;
        unsigned int val;
@@ -934,9 +936,9 @@ static int sa1111_suspend(struct platform_device *dev, pm_message_t state)
  *     restored by their respective drivers, and must be called
  *     via LDM after this function.
  */
-static int sa1111_resume(struct platform_device *dev)
+static int sa1111_resume_noirq(struct device *dev)
 {
-       struct sa1111 *sachip = platform_get_drvdata(dev);
+       struct sa1111 *sachip = dev_get_drvdata(dev);
        struct sa1111_save_data *save;
        unsigned long flags, id;
        void __iomem *base;
@@ -952,7 +954,7 @@ static int sa1111_resume(struct platform_device *dev)
        id = sa1111_readl(sachip->base + SA1111_SKID);
        if ((id & SKID_ID_MASK) != SKID_SA1111_ID) {
                __sa1111_remove(sachip);
-               platform_set_drvdata(dev, NULL);
+               dev_set_drvdata(dev, NULL);
                kfree(save);
                return 0;
        }
@@ -1003,8 +1005,8 @@ static int sa1111_resume(struct platform_device *dev)
 }
 
 #else
-#define sa1111_suspend NULL
-#define sa1111_resume  NULL
+#define sa1111_suspend_noirq NULL
+#define sa1111_resume_noirq  NULL
 #endif
 
 static int sa1111_probe(struct platform_device *pdev)
@@ -1017,7 +1019,7 @@ static int sa1111_probe(struct platform_device *pdev)
                return -EINVAL;
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
-               return -ENXIO;
+               return irq;
 
        return __sa1111_probe(&pdev->dev, mem, irq);
 }
@@ -1038,6 +1040,11 @@ static int sa1111_remove(struct platform_device *pdev)
        return 0;
 }
 
+static struct dev_pm_ops sa1111_pm_ops = {
+       .suspend_noirq = sa1111_suspend_noirq,
+       .resume_noirq = sa1111_resume_noirq,
+};
+
 /*
  *     Not sure if this should be on the system bus or not yet.
  *     We really want some way to register a system device at
@@ -1050,10 +1057,9 @@ static int sa1111_remove(struct platform_device *pdev)
 static struct platform_driver sa1111_device_driver = {
        .probe          = sa1111_probe,
        .remove         = sa1111_remove,
-       .suspend        = sa1111_suspend,
-       .resume         = sa1111_resume,
        .driver         = {
                .name   = "sa1111",
+               .pm     = &sa1111_pm_ops,
        },
 };
 
index 71b42e6..78cd2f1 100644 (file)
@@ -161,6 +161,7 @@ CONFIG_USB_MON=y
 CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_STORAGE=y
 CONFIG_USB_DWC3=y
+CONFIG_NOP_USB_XCEIV=y
 CONFIG_KEYSTONE_USB_PHY=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
index 2c8665c..ea3566f 100644 (file)
@@ -781,7 +781,7 @@ CONFIG_MXS_DMA=y
 CONFIG_DMA_BCM2835=y
 CONFIG_DMA_OMAP=y
 CONFIG_QCOM_BAM_DMA=y
-CONFIG_XILINX_VDMA=y
+CONFIG_XILINX_DMA=y
 CONFIG_DMA_SUN6I=y
 CONFIG_STAGING=y
 CONFIG_SENSORS_ISL29018=y
index da3c042..aef022a 100644 (file)
@@ -284,7 +284,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
                err = blkcipher_walk_done(desc, &walk,
                                          walk.nbytes % AES_BLOCK_SIZE);
        }
-       if (nbytes) {
+       if (walk.nbytes % AES_BLOCK_SIZE) {
                u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
                u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
                u8 __aligned(8) tail[AES_BLOCK_SIZE];
index d0131ee..3f82e9d 100644 (file)
@@ -47,6 +47,7 @@
 #define PMD_SECT_WB            (PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
 #define PMD_SECT_MINICACHE     (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE)
 #define PMD_SECT_WBWA          (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
+#define PMD_SECT_CACHE_MASK    (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
 #define PMD_SECT_NONSHARED_DEV (PMD_SECT_TEX(2))
 
 /*
index f8f1cff..4cd664a 100644 (file)
@@ -62,6 +62,7 @@
 #define PMD_SECT_WT            (_AT(pmdval_t, 2) << 2) /* normal inner write-through */
 #define PMD_SECT_WB            (_AT(pmdval_t, 3) << 2) /* normal inner write-back */
 #define PMD_SECT_WBWA          (_AT(pmdval_t, 7) << 2) /* normal inner write-alloc */
+#define PMD_SECT_CACHE_MASK    (_AT(pmdval_t, 7) << 2)
 
 /*
  * + Level 3 descriptor (PTE)
index 75f130e..c94b90d 100644 (file)
@@ -158,8 +158,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 {
        int i;
 
-       kvm_free_stage2_pgd(kvm);
-
        for (i = 0; i < KVM_MAX_VCPUS; ++i) {
                if (kvm->vcpus[i]) {
                        kvm_arch_vcpu_free(kvm->vcpus[i]);
index 29d0b23..e9a5c0e 100644 (file)
@@ -1714,7 +1714,8 @@ int kvm_mmu_init(void)
                 kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
 
        if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
-           hyp_idmap_start <  kern_hyp_va(~0UL)) {
+           hyp_idmap_start <  kern_hyp_va(~0UL) &&
+           hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
                /*
                 * The idmap page is intersecting with the VA space,
                 * it is not safe to continue further.
@@ -1893,6 +1894,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
+       kvm_free_stage2_pgd(kvm);
 }
 
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
index 3750575..06332f6 100644 (file)
@@ -255,6 +255,12 @@ static int __init exynos_pmu_irq_init(struct device_node *node,
                return -ENOMEM;
        }
 
+       /*
+        * Clear the OF_POPULATED flag set in of_irq_init so that
+        * later the Exynos PMU platform device won't be skipped.
+        */
+       of_node_clear_flag(node, OF_POPULATED);
+
        return 0;
 }
 
index 7245f33..d6159f8 100644 (file)
@@ -137,6 +137,18 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = {
        // no D+ pullup; lubbock can't connect/disconnect in software
 };
 
+static void lubbock_init_pcmcia(void)
+{
+       struct clk *clk;
+
+       /* Add an alias for the SA1111 PCMCIA clock */
+       clk = clk_get_sys("pxa2xx-pcmcia", NULL);
+       if (!IS_ERR(clk)) {
+               clkdev_create(clk, NULL, "1800");
+               clk_put(clk);
+       }
+}
+
 static struct resource sa1111_resources[] = {
        [0] = {
                .start  = 0x10000000,
@@ -467,6 +479,8 @@ static void __init lubbock_init(void)
        pxa_set_btuart_info(NULL);
        pxa_set_stuart_info(NULL);
 
+       lubbock_init_pcmcia();
+
        clk_add_alias("SA1111_CLK", NULL, "GPIO11_CLK", NULL);
        pxa_set_udc_info(&udc_info);
        pxa_set_fb_info(NULL, &sharp_lm8v31);
index 62437b5..73e3adb 100644 (file)
 
 #define REGULATOR_IRQ_MASK     BIT(2)  /* IRQ2, active low */
 
-static void __iomem *irqc;
-
-static const u8 da9063_mask_regs[] = {
-       DA9063_REG_IRQ_MASK_A,
-       DA9063_REG_IRQ_MASK_B,
-       DA9063_REG_IRQ_MASK_C,
-       DA9063_REG_IRQ_MASK_D,
-};
-
-/* DA9210 System Control and Event Registers */
+/* start of DA9210 System Control and Event Registers */
 #define DA9210_REG_MASK_A              0x54
-#define DA9210_REG_MASK_B              0x55
-
-static const u8 da9210_mask_regs[] = {
-       DA9210_REG_MASK_A,
-       DA9210_REG_MASK_B,
-};
-
-static void da9xxx_mask_irqs(struct i2c_client *client, const u8 regs[],
-                            unsigned int nregs)
-{
-       unsigned int i;
 
-       dev_info(&client->dev, "Masking %s interrupt sources\n", client->name);
+static void __iomem *irqc;
 
-       for (i = 0; i < nregs; i++) {
-               int error = i2c_smbus_write_byte_data(client, regs[i], ~0);
-               if (error) {
-                       dev_err(&client->dev, "i2c error %d\n", error);
-                       return;
-               }
-       }
-}
+/* first byte sets the memory pointer, following are consecutive reg values */
+static u8 da9063_irq_clr[] = { DA9063_REG_IRQ_MASK_A, 0xff, 0xff, 0xff, 0xff };
+static u8 da9210_irq_clr[] = { DA9210_REG_MASK_A, 0xff, 0xff };
+
+static struct i2c_msg da9xxx_msgs[2] = {
+       {
+               .addr = 0x58,
+               .len = ARRAY_SIZE(da9063_irq_clr),
+               .buf = da9063_irq_clr,
+       }, {
+               .addr = 0x68,
+               .len = ARRAY_SIZE(da9210_irq_clr),
+               .buf = da9210_irq_clr,
+       },
+};
 
 static int regulator_quirk_notify(struct notifier_block *nb,
                                  unsigned long action, void *data)
@@ -93,12 +80,15 @@ static int regulator_quirk_notify(struct notifier_block *nb,
        client = to_i2c_client(dev);
        dev_dbg(dev, "Detected %s\n", client->name);
 
-       if ((client->addr == 0x58 && !strcmp(client->name, "da9063")))
-               da9xxx_mask_irqs(client, da9063_mask_regs,
-                                ARRAY_SIZE(da9063_mask_regs));
-       else if (client->addr == 0x68 && !strcmp(client->name, "da9210"))
-               da9xxx_mask_irqs(client, da9210_mask_regs,
-                                ARRAY_SIZE(da9210_mask_regs));
+       if ((client->addr == 0x58 && !strcmp(client->name, "da9063")) ||
+           (client->addr == 0x68 && !strcmp(client->name, "da9210"))) {
+               int ret;
+
+               dev_info(&client->dev, "clearing da9063/da9210 interrupts\n");
+               ret = i2c_transfer(client->adapter, da9xxx_msgs, ARRAY_SIZE(da9xxx_msgs));
+               if (ret != ARRAY_SIZE(da9xxx_msgs))
+                       dev_err(&client->dev, "i2c error %d\n", ret);
+       }
 
        mon = ioread32(irqc + IRQC_MONITOR);
        if (mon & REGULATOR_IRQ_MASK)
index 6344913..30fe03f 100644 (file)
@@ -137,7 +137,7 @@ void __init init_default_cache_policy(unsigned long pmd)
 
        initial_pmd_value = pmd;
 
-       pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE;
+       pmd &= PMD_SECT_CACHE_MASK;
 
        for (i = 0; i < ARRAY_SIZE(cache_policies); i++)
                if (cache_policies[i].pmd == pmd) {
index 3d2cef6..f193414 100644 (file)
@@ -170,9 +170,6 @@ static int xen_starting_cpu(unsigned int cpu)
        pr_info("Xen: initializing cpu%d\n", cpu);
        vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
 
-       /* Direct vCPU id mapping for ARM guests. */
-       per_cpu(xen_vcpu_id, cpu) = cpu;
-
        info.mfn = virt_to_gfn(vcpup);
        info.offset = xen_offset_in_page(vcpup);
 
@@ -330,6 +327,7 @@ static int __init xen_guest_init(void)
 {
        struct xen_add_to_physmap xatp;
        struct shared_info *shared_info_page = NULL;
+       int cpu;
 
        if (!xen_domain())
                return 0;
@@ -380,7 +378,8 @@ static int __init xen_guest_init(void)
                return -ENOMEM;
 
        /* Direct vCPU id mapping for ARM guests. */
-       per_cpu(xen_vcpu_id, 0) = 0;
+       for_each_possible_cpu(cpu)
+               per_cpu(xen_vcpu_id, cpu) = cpu;
 
        xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
        if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
index 445aa67..c2b9bcb 100644 (file)
                /* Local timer */
                timer {
                        compatible = "arm,armv8-timer";
-                       interrupts = <1 13 0xf01>,
-                                    <1 14 0xf01>,
-                                    <1 11 0xf01>,
-                                    <1 10 0xf01>;
+                       interrupts = <1 13 0xf08>,
+                                    <1 14 0xf08>,
+                                    <1 11 0xf08>,
+                                    <1 10 0xf08>;
                };
 
                timer0: timer0@ffc03000 {
index e502c24..bf6c8d0 100644 (file)
        timer {
                compatible = "arm,armv8-timer";
                interrupts = <GIC_PPI 13
-                       (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>,
+                       (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>,
                             <GIC_PPI 14
-                       (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>,
+                       (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>,
                             <GIC_PPI 11
-                       (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>,
+                       (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>,
                             <GIC_PPI 10
-                       (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>;
+                       (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>;
        };
 
        xtal: xtal-clk {
index d5c3435..31ea70a 100644 (file)
 
        timer {
                compatible = "arm,armv8-timer";
-               interrupts = <1 0 0xff01>,      /* Secure Phys IRQ */
-                            <1 13 0xff01>,     /* Non-secure Phys IRQ */
-                            <1 14 0xff01>,     /* Virt IRQ */
-                            <1 15 0xff01>;     /* Hyp IRQ */
+               interrupts = <1 0 0xff08>,      /* Secure Phys IRQ */
+                            <1 13 0xff08>,     /* Non-secure Phys IRQ */
+                            <1 14 0xff08>,     /* Virt IRQ */
+                            <1 15 0xff08>;     /* Hyp IRQ */
                clock-frequency = <50000000>;
        };
 
diff --git a/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi
new file mode 120000 (symlink)
index 0000000..3937b77
--- /dev/null
@@ -0,0 +1 @@
+../../../../arm/boot/dts/bcm2835-rpi.dtsi
\ No newline at end of file
index 6f47dd2..7841b72 100644 (file)
@@ -1,7 +1,7 @@
 /dts-v1/;
 #include "bcm2837.dtsi"
-#include "../../../../arm/boot/dts/bcm2835-rpi.dtsi"
-#include "../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi"
+#include "bcm2835-rpi.dtsi"
+#include "bcm283x-rpi-smsc9514.dtsi"
 
 / {
        compatible = "raspberrypi,3-model-b", "brcm,bcm2837";
index f2a31d0..8216bbb 100644 (file)
@@ -1,4 +1,4 @@
-#include "../../../../arm/boot/dts/bcm283x.dtsi"
+#include "bcm283x.dtsi"
 
 / {
        compatible = "brcm,bcm2836";
diff --git a/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi
new file mode 120000 (symlink)
index 0000000..dca7c05
--- /dev/null
@@ -0,0 +1 @@
+../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi
\ No newline at end of file
diff --git a/arch/arm64/boot/dts/broadcom/bcm283x.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi
new file mode 120000 (symlink)
index 0000000..5f54e4c
--- /dev/null
@@ -0,0 +1 @@
+../../../../arm/boot/dts/bcm283x.dtsi
\ No newline at end of file
index f53b095..d4a12fa 100644 (file)
        timer {
                compatible = "arm,armv8-timer";
                interrupts = <GIC_PPI 13 (GIC_CPU_MASK_RAW(0xff) |
-                             IRQ_TYPE_EDGE_RISING)>,
+                             IRQ_TYPE_LEVEL_LOW)>,
                             <GIC_PPI 14 (GIC_CPU_MASK_RAW(0xff) |
-                             IRQ_TYPE_EDGE_RISING)>,
+                             IRQ_TYPE_LEVEL_LOW)>,
                             <GIC_PPI 11 (GIC_CPU_MASK_RAW(0xff) |
-                             IRQ_TYPE_EDGE_RISING)>,
+                             IRQ_TYPE_LEVEL_LOW)>,
                             <GIC_PPI 10 (GIC_CPU_MASK_RAW(0xff) |
-                             IRQ_TYPE_EDGE_RISING)>;
+                             IRQ_TYPE_LEVEL_LOW)>;
        };
 
        pmu {
index 2eb9b22..04dc8a8 100644 (file)
 
        timer {
                compatible = "arm,armv8-timer";
-               interrupts = <1 13 0xff01>,
-                            <1 14 0xff01>,
-                            <1 11 0xff01>,
-                            <1 10 0xff01>;
+               interrupts = <1 13 4>,
+                            <1 14 4>,
+                            <1 11 4>,
+                            <1 10 4>;
        };
 
        pmu {
index ca663df..1628315 100644 (file)
 
                timer {
                        compatible = "arm,armv8-timer";
-                       interrupts = <1 13 0xff01>,
-                                    <1 14 0xff01>,
-                                    <1 11 0xff01>,
-                                    <1 10 0xff01>;
+                       interrupts = <1 13 0xff08>,
+                                    <1 14 0xff08>,
+                                    <1 11 0xff08>,
+                                    <1 10 0xff08>;
                };
 
                pmu_system_controller: system-controller@105c0000 {
index e669fbd..a67e210 100644 (file)
 
        timer {
                compatible = "arm,armv8-timer";
-               interrupts = <1 13 0x1>, /* Physical Secure PPI */
-                            <1 14 0x1>, /* Physical Non-Secure PPI */
-                            <1 11 0x1>, /* Virtual PPI */
-                            <1 10 0x1>; /* Hypervisor PPI */
+               interrupts = <1 13 0xf08>, /* Physical Secure PPI */
+                            <1 14 0xf08>, /* Physical Non-Secure PPI */
+                            <1 11 0xf08>, /* Virtual PPI */
+                            <1 10 0xf08>; /* Hypervisor PPI */
        };
 
        pmu {
index 21023a3..e3b6034 100644 (file)
 
        timer {
                compatible = "arm,armv8-timer";
-               interrupts = <1 13 0x8>, /* Physical Secure PPI, active-low */
-                            <1 14 0x8>, /* Physical Non-Secure PPI, active-low */
-                            <1 11 0x8>, /* Virtual PPI, active-low */
-                            <1 10 0x8>; /* Hypervisor PPI, active-low */
+               interrupts = <1 13 4>, /* Physical Secure PPI, active-low */
+                            <1 14 4>, /* Physical Non-Secure PPI, active-low */
+                            <1 11 4>, /* Virtual PPI, active-low */
+                            <1 10 4>; /* Hypervisor PPI, active-low */
        };
 
        pmu {
index eab1a42..c2a6745 100644 (file)
 
                        timer {
                                compatible = "arm,armv8-timer";
-                               interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>,
-                                            <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>,
-                                            <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>,
-                                            <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>;
+                               interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                                            <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                                            <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+                                            <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
                        };
 
                        odmi: odmi@300000 {
index c223915..d73bdc8 100644 (file)
 
        timer {
                compatible = "arm,armv8-timer";
-               interrupts = <1 13 0xf01>,
-                            <1 14 0xf01>,
-                            <1 11 0xf01>,
-                            <1 10 0xf01>;
+               interrupts = <1 13 4>,
+                            <1 14 4>,
+                            <1 11 4>,
+                            <1 10 4>;
        };
 
        soc {
index e595f22..3e2e51f 100644 (file)
        timer {
                compatible = "arm,armv8-timer";
                interrupt-parent = <&gic>;
-               interrupts = <1 13 0xf01>,
-                            <1 14 0xf01>,
-                            <1 11 0xf01>,
-                            <1 10 0xf01>;
+               interrupts = <1 13 0xf08>,
+                            <1 14 0xf08>,
+                            <1 11 0xf08>,
+                            <1 10 0xf08>;
        };
 
        amba_apu {
index 5c88804..6b2aa0f 100644 (file)
@@ -216,7 +216,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
                err = blkcipher_walk_done(desc, &walk,
                                          walk.nbytes % AES_BLOCK_SIZE);
        }
-       if (nbytes) {
+       if (walk.nbytes % AES_BLOCK_SIZE) {
                u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
                u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
                u8 __aligned(8) tail[AES_BLOCK_SIZE];
index 68cf638..b1ec1fa 100644 (file)
@@ -74,7 +74,7 @@ extern __kernel_size_t __copy_user(void *to, const void *from,
 
 extern __kernel_size_t copy_to_user(void __user *to, const void *from,
                                    __kernel_size_t n);
-extern __kernel_size_t copy_from_user(void *to, const void __user *from,
+extern __kernel_size_t ___copy_from_user(void *to, const void __user *from,
                                      __kernel_size_t n);
 
 static inline __kernel_size_t __copy_to_user(void __user *to, const void *from,
@@ -88,6 +88,15 @@ static inline __kernel_size_t __copy_from_user(void *to,
 {
        return __copy_user(to, (const void __force *)from, n);
 }
+static inline __kernel_size_t copy_from_user(void *to,
+                                              const void __user *from,
+                                              __kernel_size_t n)
+{
+       size_t res = ___copy_from_user(to, from, n);
+       if (unlikely(res))
+               memset(to + (n - res), 0, res);
+       return res;
+}
 
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
index d93ead0..7c6cf14 100644 (file)
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(copy_page);
 /*
  * Userspace access stuff.
  */
-EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(___copy_from_user);
 EXPORT_SYMBOL(copy_to_user);
 EXPORT_SYMBOL(__copy_user);
 EXPORT_SYMBOL(strncpy_from_user);
index ea59c04..0753734 100644 (file)
         */
        .text
        .align  1
-       .global copy_from_user
-       .type   copy_from_user, @function
-copy_from_user:
+       .global ___copy_from_user
+       .type   ___copy_from_user, @function
+___copy_from_user:
        branch_if_kernel r8, __copy_user
        ret_if_privileged r8, r11, r10, r10
        rjmp    __copy_user
-       .size   copy_from_user, . - copy_from_user
+       .size   ___copy_from_user, . - ___copy_from_user
 
        .global copy_to_user
        .type   copy_to_user, @function
index 12f5d68..0a2a700 100644 (file)
@@ -171,11 +171,12 @@ static inline int bad_user_access_length(void)
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       if (access_ok(VERIFY_READ, from, n))
+       if (likely(access_ok(VERIFY_READ, from, n))) {
                memcpy(to, (const void __force *)from, n);
-       else
-               return n;
-       return 0;
+               return 0;
+       }
+       memset(to, 0, n);
+       return n;
 }
 
 static inline unsigned long __must_check
index e3530d0..56c7d57 100644 (file)
@@ -194,30 +194,6 @@ extern unsigned long __copy_user(void __user *to, const void *from, unsigned lon
 extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n);
 extern unsigned long __do_clear_user(void __user *to, unsigned long n);
 
-static inline unsigned long
-__generic_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-       if (access_ok(VERIFY_WRITE, to, n))
-               return __copy_user(to, from, n);
-       return n;
-}
-
-static inline unsigned long
-__generic_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-       if (access_ok(VERIFY_READ, from, n))
-               return __copy_user_zeroing(to, from, n);
-       return n;
-}
-
-static inline unsigned long
-__generic_clear_user(void __user *to, unsigned long n)
-{
-       if (access_ok(VERIFY_WRITE, to, n))
-               return __do_clear_user(to, n);
-       return n;
-}
-
 static inline long
 __strncpy_from_user(char *dst, const char __user *src, long count)
 {
@@ -282,7 +258,7 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n)
        else if (n == 24)
                __asm_copy_from_user_24(to, from, ret);
        else
-               ret = __generic_copy_from_user(to, from, n);
+               ret = __copy_user_zeroing(to, from, n);
 
        return ret;
 }
@@ -333,7 +309,7 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n)
        else if (n == 24)
                __asm_copy_to_user_24(to, from, ret);
        else
-               ret = __generic_copy_to_user(to, from, n);
+               ret = __copy_user(to, from, n);
 
        return ret;
 }
@@ -366,26 +342,43 @@ __constant_clear_user(void __user *to, unsigned long n)
        else if (n == 24)
                __asm_clear_24(to, ret);
        else
-               ret = __generic_clear_user(to, n);
+               ret = __do_clear_user(to, n);
 
        return ret;
 }
 
 
-#define clear_user(to, n)                              \
-       (__builtin_constant_p(n) ?                      \
-        __constant_clear_user(to, n) :                 \
-        __generic_clear_user(to, n))
+static inline size_t clear_user(void __user *to, size_t n)
+{
+       if (unlikely(!access_ok(VERIFY_WRITE, to, n)))
+               return n;
+       if (__builtin_constant_p(n))
+               return __constant_clear_user(to, n);
+       else
+               return __do_clear_user(to, n);
+}
 
-#define copy_from_user(to, from, n)                    \
-       (__builtin_constant_p(n) ?                      \
-        __constant_copy_from_user(to, from, n) :       \
-        __generic_copy_from_user(to, from, n))
+static inline size_t copy_from_user(void *to, const void __user *from, size_t n)
+{
+       if (unlikely(!access_ok(VERIFY_READ, from, n))) {
+               memset(to, 0, n);
+               return n;
+       }
+       if (__builtin_constant_p(n))
+               return __constant_copy_from_user(to, from, n);
+       else
+               return __copy_user_zeroing(to, from, n);
+}
 
-#define copy_to_user(to, from, n)                      \
-       (__builtin_constant_p(n) ?                      \
-        __constant_copy_to_user(to, from, n) :         \
-        __generic_copy_to_user(to, from, n))
+static inline size_t copy_to_user(void __user *to, const void *from, size_t n)
+{
+       if (unlikely(!access_ok(VERIFY_WRITE, to, n)))
+               return n;
+       if (__builtin_constant_p(n))
+               return __constant_copy_to_user(to, from, n);
+       else
+               return __copy_user(to, from, n);
+}
 
 /* We let the __ versions of copy_from/to_user inline, because they're often
  * used in fast paths and have only a small space overhead.
index 3ac9a59..87d9e34 100644 (file)
@@ -263,19 +263,25 @@ do {                                                      \
 extern long __memset_user(void *dst, unsigned long count);
 extern long __memcpy_user(void *dst, const void *src, unsigned long count);
 
-#define clear_user(dst,count)                  __memset_user(____force(dst), (count))
+#define __clear_user(dst,count)                        __memset_user(____force(dst), (count))
 #define __copy_from_user_inatomic(to, from, n) __memcpy_user((to), ____force(from), (n))
 #define __copy_to_user_inatomic(to, from, n)   __memcpy_user(____force(to), (from), (n))
 
 #else
 
-#define clear_user(dst,count)                  (memset(____force(dst), 0, (count)), 0)
+#define __clear_user(dst,count)                        (memset(____force(dst), 0, (count)), 0)
 #define __copy_from_user_inatomic(to, from, n) (memcpy((to), ____force(from), (n)), 0)
 #define __copy_to_user_inatomic(to, from, n)   (memcpy(____force(to), (from), (n)), 0)
 
 #endif
 
-#define __clear_user clear_user
+static inline unsigned long __must_check
+clear_user(void __user *to, unsigned long n)
+{
+       if (likely(__access_ok(to, n)))
+               n = __clear_user(to, n);
+       return n;
+}
 
 static inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
index f000a38..f61cfb2 100644 (file)
@@ -103,7 +103,8 @@ static inline long hexagon_strncpy_from_user(char *dst, const char __user *src,
 {
        long res = __strnlen_user(src, n);
 
-       /* return from strnlen can't be zero -- that would be rubbish. */
+       if (unlikely(!res))
+               return -EFAULT;
 
        if (res > n) {
                copy_from_user(dst, src, n);
index 0472927..bfe1319 100644 (file)
@@ -269,19 +269,16 @@ __copy_from_user (void *to, const void __user *from, unsigned long count)
        __cu_len;                                                                       \
 })
 
-#define copy_from_user(to, from, n)                                                    \
-({                                                                                     \
-       void *__cu_to = (to);                                                           \
-       const void __user *__cu_from = (from);                                          \
-       long __cu_len = (n);                                                            \
-                                                                                       \
-       __chk_user_ptr(__cu_from);                                                      \
-       if (__access_ok(__cu_from, __cu_len, get_fs())) {                               \
-               check_object_size(__cu_to, __cu_len, false);                    \
-               __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len);   \
-       }                                                                               \
-       __cu_len;                                                                       \
-})
+static inline unsigned long
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+       check_object_size(to, n, false);
+       if (likely(__access_ok(from, n, get_fs())))
+               n = __copy_user((__force void __user *) to, from, n);
+       else
+               memset(to, 0, n);
+       return n;
+}
 
 #define __copy_in_user(to, from, size) __copy_user((to), (from), (size))
 
index cac7014..6f89821 100644 (file)
@@ -219,7 +219,7 @@ extern int fixup_exception(struct pt_regs *regs);
 #define __get_user_nocheck(x, ptr, size)                               \
 ({                                                                     \
        long __gu_err = 0;                                              \
-       unsigned long __gu_val;                                         \
+       unsigned long __gu_val = 0;                                     \
        might_fault();                                                  \
        __get_user_size(__gu_val, (ptr), (size), __gu_err);             \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
index 8282cbc..273e612 100644 (file)
@@ -204,8 +204,9 @@ extern unsigned long __must_check __copy_user_zeroing(void *to,
 static inline unsigned long
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       if (access_ok(VERIFY_READ, from, n))
+       if (likely(access_ok(VERIFY_READ, from, n)))
                return __copy_user_zeroing(to, from, n);
+       memset(to, 0, n);
        return n;
 }
 
index 331b0d3..8266767 100644 (file)
@@ -227,7 +227,7 @@ extern long __user_bad(void);
 
 #define __get_user(x, ptr)                                             \
 ({                                                                     \
-       unsigned long __gu_val;                                         \
+       unsigned long __gu_val = 0;                                     \
        /*unsigned long __gu_ptr = (unsigned long)(ptr);*/              \
        long __gu_err;                                                  \
        switch (sizeof(*(ptr))) {                                       \
@@ -373,10 +373,13 @@ extern long __user_bad(void);
 static inline long copy_from_user(void *to,
                const void __user *from, unsigned long n)
 {
+       unsigned long res = n;
        might_fault();
-       if (access_ok(VERIFY_READ, from, n))
-               return __copy_from_user(to, from, n);
-       return n;
+       if (likely(access_ok(VERIFY_READ, from, n)))
+               res = __copy_from_user(to, from, n);
+       if (unlikely(res))
+               memset(to + (n - res), 0, res);
+       return res;
 }
 
 #define __copy_to_user(to, from, n)    \
index 11b965f..21a2aab 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/thread_info.h>
+#include <linux/string.h>
 #include <asm/asm-eva.h>
 
 /*
@@ -1170,6 +1171,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
                        __cu_len = __invoke_copy_from_user(__cu_to,     \
                                                           __cu_from,   \
                                                           __cu_len);   \
+               } else {                                                \
+                       memset(__cu_to, 0, __cu_len);                   \
                }                                                       \
        }                                                               \
        __cu_len;                                                       \
index 20f7bf6..d012e87 100644 (file)
@@ -166,6 +166,7 @@ struct __large_struct { unsigned long buf[100]; };
                "2:\n"                                          \
                "       .section        .fixup,\"ax\"\n"        \
                "3:\n\t"                                        \
+               "       mov             0,%1\n"                 \
                "       mov             %3,%0\n"                \
                "       jmp             2b\n"                   \
                "       .previous\n"                            \
index 7826e6c..ce8899e 100644 (file)
@@ -9,7 +9,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the Licence, or (at your option) any later version.
  */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 unsigned long
 __generic_copy_to_user(void *to, const void *from, unsigned long n)
@@ -24,6 +24,8 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n)
 {
        if (access_ok(VERIFY_READ, from, n))
                __copy_user_zeroing(to, from, n);
+       else
+               memset(to, 0, n);
        return n;
 }
 
index caa51ff..0ab8232 100644 (file)
@@ -102,9 +102,12 @@ extern long __copy_to_user(void __user *to, const void *from, unsigned long n);
 static inline long copy_from_user(void *to, const void __user *from,
                                unsigned long n)
 {
-       if (!access_ok(VERIFY_READ, from, n))
-               return n;
-       return __copy_from_user(to, from, n);
+       unsigned long res = n;
+       if (access_ok(VERIFY_READ, from, n))
+               res = __copy_from_user(to, from, n);
+       if (unlikely(res))
+               memset(to + (n - res), 0, res);
+       return res;
 }
 
 static inline long copy_to_user(void __user *to, const void *from,
@@ -139,7 +142,7 @@ extern long strnlen_user(const char __user *s, long n);
 
 #define __get_user_unknown(val, size, ptr, err) do {                   \
        err = 0;                                                        \
-       if (copy_from_user(&(val), ptr, size)) {                        \
+       if (__copy_from_user(&(val), ptr, size)) {                      \
                err = -EFAULT;                                          \
        }                                                               \
        } while (0)
@@ -166,7 +169,7 @@ do {                                                                        \
        ({                                                              \
        long __gu_err = -EFAULT;                                        \
        const __typeof__(*(ptr)) __user *__gu_ptr = (ptr);              \
-       unsigned long __gu_val;                                         \
+       unsigned long __gu_val = 0;                                     \
        __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\
        (x) = (__force __typeof__(x))__gu_val;                          \
        __gu_err;                                                       \
index a6bd07c..5cc6b4f 100644 (file)
@@ -273,28 +273,20 @@ __copy_tofrom_user(void *to, const void *from, unsigned long size);
 static inline unsigned long
 copy_from_user(void *to, const void *from, unsigned long n)
 {
-       unsigned long over;
-
-       if (access_ok(VERIFY_READ, from, n))
-               return __copy_tofrom_user(to, from, n);
-       if ((unsigned long)from < TASK_SIZE) {
-               over = (unsigned long)from + n - TASK_SIZE;
-               return __copy_tofrom_user(to, from, n - over) + over;
-       }
-       return n;
+       unsigned long res = n;
+
+       if (likely(access_ok(VERIFY_READ, from, n)))
+               res = __copy_tofrom_user(to, from, n);
+       if (unlikely(res))
+               memset(to + (n - res), 0, res);
+       return res;
 }
 
 static inline unsigned long
 copy_to_user(void *to, const void *from, unsigned long n)
 {
-       unsigned long over;
-
-       if (access_ok(VERIFY_WRITE, to, n))
-               return __copy_tofrom_user(to, from, n);
-       if ((unsigned long)to < TASK_SIZE) {
-               over = (unsigned long)to + n - TASK_SIZE;
-               return __copy_tofrom_user(to, from, n - over) + over;
-       }
+       if (likely(access_ok(VERIFY_WRITE, to, n)))
+               n = __copy_tofrom_user(to, from, n);
        return n;
 }
 
@@ -303,13 +295,8 @@ extern unsigned long __clear_user(void *addr, unsigned long size);
 static inline __must_check unsigned long
 clear_user(void *addr, unsigned long size)
 {
-
-       if (access_ok(VERIFY_WRITE, addr, size))
-               return __clear_user(addr, size);
-       if ((unsigned long)addr < TASK_SIZE) {
-               unsigned long over = (unsigned long)addr + size - TASK_SIZE;
-               return __clear_user(addr, size - over) + over;
-       }
+       if (likely(access_ok(VERIFY_WRITE, addr, size)))
+               size = __clear_user(addr, size);
        return size;
 }
 
index e915048..4828478 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm-generic/uaccess-unaligned.h>
 
 #include <linux/bug.h>
+#include <linux/string.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
@@ -221,7 +222,7 @@ static inline unsigned long __must_check copy_from_user(void *to,
                                           unsigned long n)
 {
         int sz = __compiletime_object_size(to);
-        int ret = -EFAULT;
+        unsigned long ret = n;
 
         if (likely(sz == -1 || sz >= n))
                 ret = __copy_from_user(to, from, n);
@@ -230,6 +231,8 @@ static inline unsigned long __must_check copy_from_user(void *to,
        else
                 __bad_copy_user();
 
+       if (unlikely(ret))
+               memset(to + (n - ret), 0, ret);
         return ret;
 }
 
index 2ef55f8..b312b15 100644 (file)
@@ -15,7 +15,7 @@ static inline bool early_cpu_has_feature(unsigned long feature)
 #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
 #include <linux/jump_label.h>
 
-#define NUM_CPU_FTR_KEYS       64
+#define NUM_CPU_FTR_KEYS       BITS_PER_LONG
 
 extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS];
 
index f1e3824..c266227 100644 (file)
@@ -308,36 +308,21 @@ extern unsigned long __copy_tofrom_user(void __user *to,
 static inline unsigned long copy_from_user(void *to,
                const void __user *from, unsigned long n)
 {
-       unsigned long over;
-
-       if (access_ok(VERIFY_READ, from, n)) {
+       if (likely(access_ok(VERIFY_READ, from, n))) {
                check_object_size(to, n, false);
                return __copy_tofrom_user((__force void __user *)to, from, n);
        }
-       if ((unsigned long)from < TASK_SIZE) {
-               over = (unsigned long)from + n - TASK_SIZE;
-               check_object_size(to, n - over, false);
-               return __copy_tofrom_user((__force void __user *)to, from,
-                               n - over) + over;
-       }
+       memset(to, 0, n);
        return n;
 }
 
 static inline unsigned long copy_to_user(void __user *to,
                const void *from, unsigned long n)
 {
-       unsigned long over;
-
        if (access_ok(VERIFY_WRITE, to, n)) {
                check_object_size(from, n, true);
                return __copy_tofrom_user(to, (__force void __user *)from, n);
        }
-       if ((unsigned long)to < TASK_SIZE) {
-               over = (unsigned long)to + n - TASK_SIZE;
-               check_object_size(from, n - over, true);
-               return __copy_tofrom_user(to, (__force void __user *)from,
-                               n - over) + over;
-       }
        return n;
 }
 
@@ -434,10 +419,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
        might_fault();
        if (likely(access_ok(VERIFY_WRITE, addr, size)))
                return __clear_user(addr, size);
-       if ((unsigned long)addr < TASK_SIZE) {
-               unsigned long over = (unsigned long)addr + size - TASK_SIZE;
-               return __clear_user(addr, size - over) + over;
-       }
        return size;
 }
 
index 2265c63..bd739fe 100644 (file)
@@ -411,7 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  *
  * r13 - PACA
  * cr3 - gt if waking up with partial/complete hypervisor state loss
- * cr4 - eq if waking up from complete hypervisor state loss.
+ * cr4 - gt or eq if waking up from complete hypervisor state loss.
  */
 _GLOBAL(pnv_wakeup_tb_loss)
        ld      r1,PACAR1(r13)
@@ -453,7 +453,7 @@ lwarx_loop2:
         * At this stage
         * cr2 - eq if first thread to wakeup in core
         * cr3-  gt if waking up with partial/complete hypervisor state loss
-        * cr4 - eq if waking up from complete hypervisor state loss.
+        * cr4 - gt or eq if waking up from complete hypervisor state loss.
         */
 
        ori     r15,r15,PNV_CORE_IDLE_LOCK_BIT
@@ -481,7 +481,7 @@ first_thread_in_subcore:
         * If waking up from sleep, subcore state is not lost. Hence
         * skip subcore state restore
         */
-       bne     cr4,subcore_state_restored
+       blt     cr4,subcore_state_restored
 
        /* Restore per-subcore state */
        ld      r4,_SDR1(r1)
@@ -526,7 +526,7 @@ timebase_resync:
         * If waking up from sleep, per core state is not lost, skip to
         * clear_lock.
         */
-       bne     cr4,clear_lock
+       blt     cr4,clear_lock
 
        /*
         * First thread in the core to wake up and its waking up with
@@ -557,7 +557,7 @@ common_exit:
         * If waking up from sleep, hypervisor state is not lost. Hence
         * skip hypervisor state restore.
         */
-       bne     cr4,hypervisor_state_restored
+       blt     cr4,hypervisor_state_restored
 
        /* Waking up from winkle */
 
index c16d790..bc0c91e 100644 (file)
@@ -2217,7 +2217,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
 
        pnv_pci_link_table_and_group(phb->hose->node, num,
                        tbl, &pe->table_group);
-       pnv_pci_phb3_tce_invalidate_pe(pe);
+       pnv_pci_ioda2_tce_invalidate_pe(pe);
 
        return 0;
 }
@@ -2355,7 +2355,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
        if (ret)
                pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
        else
-               pnv_pci_phb3_tce_invalidate_pe(pe);
+               pnv_pci_ioda2_tce_invalidate_pe(pe);
 
        pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
 
@@ -3426,7 +3426,17 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
                }
        }
 
-       pnv_ioda_free_pe(pe);
+       /*
+        * The PE for root bus can be removed because of hotplug in EEH
+        * recovery for fenced PHB error. We need to mark the PE dead so
+        * that it can be populated again in PCI hot add path. The PE
+        * shouldn't be destroyed as it's the global reserved resource.
+        */
+       if (phb->ioda.root_pe_populated &&
+           phb->ioda.root_pe_idx == pe->pe_number)
+               phb->ioda.root_pe_populated = false;
+       else
+               pnv_ioda_free_pe(pe);
 }
 
 static void pnv_pci_release_device(struct pci_dev *pdev)
@@ -3442,7 +3452,17 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
        if (!pdn || pdn->pe_number == IODA_INVALID_PE)
                return;
 
+       /*
+        * PCI hotplug can happen as part of EEH error recovery. The @pdn
+        * isn't removed and added afterwards in this scenario. We should
+        * set the PE number in @pdn to an invalid one. Otherwise, the PE's
+        * device count is decreased on removing devices while failing to
+        * be increased on adding devices. It leads to unbalanced PE's device
+        * count and eventually make normal PCI hotplug path broken.
+        */
        pe = &phb->ioda.pe_array[pdn->pe_number];
+       pdn->pe_number = IODA_INVALID_PE;
+
        WARN_ON(--pe->device_count < 0);
        if (pe->device_count == 0)
                pnv_ioda_release_pe(pe);
index 95aefdb..52d7c87 100644 (file)
@@ -266,28 +266,28 @@ int __put_user_bad(void) __attribute__((noreturn));
        __chk_user_ptr(ptr);                                    \
        switch (sizeof(*(ptr))) {                               \
        case 1: {                                               \
-               unsigned char __x;                              \
+               unsigned char __x = 0;                          \
                __gu_err = __get_user_fn(&__x, ptr,             \
                                         sizeof(*(ptr)));       \
                (x) = *(__force __typeof__(*(ptr)) *) &__x;     \
                break;                                          \
        };                                                      \
        case 2: {                                               \
-               unsigned short __x;                             \
+               unsigned short __x = 0;                         \
                __gu_err = __get_user_fn(&__x, ptr,             \
                                         sizeof(*(ptr)));       \
                (x) = *(__force __typeof__(*(ptr)) *) &__x;     \
                break;                                          \
        };                                                      \
        case 4: {                                               \
-               unsigned int __x;                               \
+               unsigned int __x = 0;                           \
                __gu_err = __get_user_fn(&__x, ptr,             \
                                         sizeof(*(ptr)));       \
                (x) = *(__force __typeof__(*(ptr)) *) &__x;     \
                break;                                          \
        };                                                      \
        case 8: {                                               \
-               unsigned long long __x;                         \
+               unsigned long long __x = 0;                     \
                __gu_err = __get_user_fn(&__x, ptr,             \
                                         sizeof(*(ptr)));       \
                (x) = *(__force __typeof__(*(ptr)) *) &__x;     \
index f142215..607ec91 100644 (file)
@@ -2231,9 +2231,10 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
                return -EINVAL;
        current->thread.fpu.fpc = fpu->fpc;
        if (MACHINE_HAS_VX)
-               convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+               convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
+                                (freg_t *) fpu->fprs);
        else
-               memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
+               memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
        return 0;
 }
 
@@ -2242,9 +2243,10 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
        /* make sure we have the latest values */
        save_fpu_regs();
        if (MACHINE_HAS_VX)
-               convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+               convert_vx_to_fp((freg_t *) fpu->fprs,
+                                (__vector128 *) vcpu->run->s.regs.vrs);
        else
-               memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+               memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
        fpu->fpc = current->thread.fpu.fpc;
        return 0;
 }
index c106488..d8673e2 100644 (file)
@@ -584,7 +584,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
                /* Validity 0x0044 will be checked by SIE */
                if (rc)
                        goto unpin;
-               scb_s->gvrd = hpa;
+               scb_s->riccbd = hpa;
        }
        return 0;
 unpin:
index 20a3591..01aec8c 100644 (file)
@@ -163,7 +163,7 @@ do {                                                                        \
                __get_user_asm(val, "lw", ptr);                         \
                 break;                                                 \
        case 8:                                                         \
-               if ((copy_from_user((void *)&val, ptr, 8)) == 0)        \
+               if (__copy_from_user((void *)&val, ptr, 8) == 0)        \
                        __gu_err = 0;                                   \
                else                                                    \
                        __gu_err = -EFAULT;                             \
@@ -188,6 +188,8 @@ do {                                                                        \
                                                                        \
        if (likely(access_ok(VERIFY_READ, __gu_ptr, size)))             \
                __get_user_common((x), size, __gu_ptr);                 \
+       else                                                            \
+               (x) = 0;                                                \
                                                                        \
        __gu_err;                                                       \
 })
@@ -201,6 +203,7 @@ do {                                                                        \
                "2:\n"                                                  \
                ".section .fixup,\"ax\"\n"                              \
                "3:li   %0, %4\n"                                       \
+               "li     %1, 0\n"                                        \
                "j      2b\n"                                           \
                ".previous\n"                                           \
                ".section __ex_table,\"a\"\n"                           \
@@ -298,35 +301,34 @@ extern int __copy_tofrom_user(void *to, const void *from, unsigned long len);
 static inline unsigned long
 copy_from_user(void *to, const void *from, unsigned long len)
 {
-       unsigned long over;
+       unsigned long res = len;
 
-       if (access_ok(VERIFY_READ, from, len))
-               return __copy_tofrom_user(to, from, len);
+       if (likely(access_ok(VERIFY_READ, from, len)))
+               res = __copy_tofrom_user(to, from, len);
 
-       if ((unsigned long)from < TASK_SIZE) {
-               over = (unsigned long)from + len - TASK_SIZE;
-               return __copy_tofrom_user(to, from, len - over) + over;
-       }
-       return len;
+       if (unlikely(res))
+               memset(to + (len - res), 0, res);
+
+       return res;
 }
 
 static inline unsigned long
 copy_to_user(void *to, const void *from, unsigned long len)
 {
-       unsigned long over;
-
-       if (access_ok(VERIFY_WRITE, to, len))
-               return __copy_tofrom_user(to, from, len);
+       if (likely(access_ok(VERIFY_WRITE, to, len)))
+               len = __copy_tofrom_user(to, from, len);
 
-       if ((unsigned long)to < TASK_SIZE) {
-               over = (unsigned long)to + len - TASK_SIZE;
-               return __copy_tofrom_user(to, from, len - over) + over;
-       }
        return len;
 }
 
-#define __copy_from_user(to, from, len)        \
-               __copy_tofrom_user((to), (from), (len))
+static inline unsigned long
+__copy_from_user(void *to, const void *from, unsigned long len)
+{
+       unsigned long left = __copy_tofrom_user(to, from, len);
+       if (unlikely(left))
+               memset(to + (len - left), 0, left);
+       return left;
+}
 
 #define __copy_to_user(to, from, len)          \
                __copy_tofrom_user((to), (from), (len))
@@ -340,17 +342,17 @@ __copy_to_user_inatomic(void *to, const void *from, unsigned long len)
 static inline unsigned long
 __copy_from_user_inatomic(void *to, const void *from, unsigned long len)
 {
-       return __copy_from_user(to, from, len);
+       return __copy_tofrom_user(to, from, len);
 }
 
-#define __copy_in_user(to, from, len)  __copy_from_user(to, from, len)
+#define __copy_in_user(to, from, len)  __copy_tofrom_user(to, from, len)
 
 static inline unsigned long
 copy_in_user(void *to, const void *from, unsigned long len)
 {
        if (access_ok(VERIFY_READ, from, len) &&
                      access_ok(VERFITY_WRITE, to, len))
-               return copy_from_user(to, from, len);
+               return __copy_tofrom_user(to, from, len);
 }
 
 /*
index a49635c..92ade79 100644 (file)
@@ -151,7 +151,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
        __kernel_size_t __copy_size = (__kernel_size_t) n;
 
        if (__copy_size && __access_ok(__copy_from, __copy_size))
-               return __copy_user(to, from, __copy_size);
+               __copy_size = __copy_user(to, from, __copy_size);
+
+       if (unlikely(__copy_size))
+               memset(to + (n - __copy_size), 0, __copy_size);
 
        return __copy_size;
 }
index c01376c..ca5073d 100644 (file)
@@ -24,6 +24,7 @@
 #define __get_user_size(x,ptr,size,retval)                     \
 do {                                                           \
        retval = 0;                                             \
+       x = 0;                                                  \
        switch (size) {                                         \
        case 1:                                                 \
                retval = __get_user_asm_b((void *)&x,           \
index e722c51..ea55f86 100644 (file)
@@ -266,8 +266,10 @@ static inline unsigned long copy_from_user(void *to, const void __user *from, un
        if (n && __access_ok((unsigned long) from, n)) {
                check_object_size(to, n, false);
                return __copy_user((__force void __user *) to, from, n);
-       } else
+       } else {
+               memset(to, 0, n);
                return n;
+       }
 }
 
 static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
index ff574da..94dd4a3 100644 (file)
@@ -1004,79 +1004,87 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
        return status;
 }
 
-static efi_status_t exit_boot(struct boot_params *boot_params,
-                             void *handle, bool is64)
-{
-       struct efi_info *efi = &boot_params->efi_info;
-       unsigned long map_sz, key, desc_size;
-       efi_memory_desc_t *mem_map;
+struct exit_boot_struct {
+       struct boot_params *boot_params;
+       struct efi_info *efi;
        struct setup_data *e820ext;
-       const char *signature;
        __u32 e820ext_size;
-       __u32 nr_desc, prev_nr_desc;
-       efi_status_t status;
-       __u32 desc_version;
-       bool called_exit = false;
-       u8 nr_entries;
-       int i;
-
-       nr_desc = 0;
-       e820ext = NULL;
-       e820ext_size = 0;
-
-get_map:
-       status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size,
-                                   &desc_version, &key);
-
-       if (status != EFI_SUCCESS)
-               return status;
-
-       prev_nr_desc = nr_desc;
-       nr_desc = map_sz / desc_size;
-       if (nr_desc > prev_nr_desc &&
-           nr_desc > ARRAY_SIZE(boot_params->e820_map)) {
-               u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map);
-
-               status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size);
-               if (status != EFI_SUCCESS)
-                       goto free_mem_map;
+       bool is64;
+};
 
-               efi_call_early(free_pool, mem_map);
-               goto get_map; /* Allocated memory, get map again */
+static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
+                                  struct efi_boot_memmap *map,
+                                  void *priv)
+{
+       static bool first = true;
+       const char *signature;
+       __u32 nr_desc;
+       efi_status_t status;
+       struct exit_boot_struct *p = priv;
+
+       if (first) {
+               nr_desc = *map->buff_size / *map->desc_size;
+               if (nr_desc > ARRAY_SIZE(p->boot_params->e820_map)) {
+                       u32 nr_e820ext = nr_desc -
+                                       ARRAY_SIZE(p->boot_params->e820_map);
+
+                       status = alloc_e820ext(nr_e820ext, &p->e820ext,
+                                              &p->e820ext_size);
+                       if (status != EFI_SUCCESS)
+                               return status;
+               }
+               first = false;
        }
 
-       signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE;
-       memcpy(&efi->efi_loader_signature, signature, sizeof(__u32));
+       signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE;
+       memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
 
-       efi->efi_systab = (unsigned long)sys_table;
-       efi->efi_memdesc_size = desc_size;
-       efi->efi_memdesc_version = desc_version;
-       efi->efi_memmap = (unsigned long)mem_map;
-       efi->efi_memmap_size = map_sz;
+       p->efi->efi_systab = (unsigned long)sys_table_arg;
+       p->efi->efi_memdesc_size = *map->desc_size;
+       p->efi->efi_memdesc_version = *map->desc_ver;
+       p->efi->efi_memmap = (unsigned long)*map->map;
+       p->efi->efi_memmap_size = *map->map_size;
 
 #ifdef CONFIG_X86_64
-       efi->efi_systab_hi = (unsigned long)sys_table >> 32;
-       efi->efi_memmap_hi = (unsigned long)mem_map >> 32;
+       p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
+       p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
 #endif
 
+       return EFI_SUCCESS;
+}
+
+static efi_status_t exit_boot(struct boot_params *boot_params,
+                             void *handle, bool is64)
+{
+       unsigned long map_sz, key, desc_size, buff_size;
+       efi_memory_desc_t *mem_map;
+       struct setup_data *e820ext;
+       __u32 e820ext_size;
+       efi_status_t status;
+       __u32 desc_version;
+       struct efi_boot_memmap map;
+       struct exit_boot_struct priv;
+
+       map.map =               &mem_map;
+       map.map_size =          &map_sz;
+       map.desc_size =         &desc_size;
+       map.desc_ver =          &desc_version;
+       map.key_ptr =           &key;
+       map.buff_size =         &buff_size;
+       priv.boot_params =      boot_params;
+       priv.efi =              &boot_params->efi_info;
+       priv.e820ext =          NULL;
+       priv.e820ext_size =     0;
+       priv.is64 =             is64;
+
        /* Might as well exit boot services now */
-       status = efi_call_early(exit_boot_services, handle, key);
-       if (status != EFI_SUCCESS) {
-               /*
-                * ExitBootServices() will fail if any of the event
-                * handlers change the memory map. In which case, we
-                * must be prepared to retry, but only once so that
-                * we're guaranteed to exit on repeated failures instead
-                * of spinning forever.
-                */
-               if (called_exit)
-                       goto free_mem_map;
-
-               called_exit = true;
-               efi_call_early(free_pool, mem_map);
-               goto get_map;
-       }
+       status = efi_exit_boot_services(sys_table, handle, &map, &priv,
+                                       exit_boot_func);
+       if (status != EFI_SUCCESS)
+               return status;
 
+       e820ext = priv.e820ext;
+       e820ext_size = priv.e820ext_size;
        /* Historic? */
        boot_params->alt_mem_k = 32 * 1024;
 
@@ -1085,10 +1093,6 @@ get_map:
                return status;
 
        return EFI_SUCCESS;
-
-free_mem_map:
-       efi_call_early(free_pool, mem_map);
-       return status;
 }
 
 /*
index e07a22b..f5f4b3f 100644 (file)
@@ -119,8 +119,8 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
 {
   [PERF_COUNT_HW_CPU_CYCLES]                   = 0x0076,
   [PERF_COUNT_HW_INSTRUCTIONS]                 = 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]             = 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]                 = 0x0081,
+  [PERF_COUNT_HW_CACHE_REFERENCES]             = 0x077d,
+  [PERF_COUNT_HW_CACHE_MISSES]                 = 0x077e,
   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]          = 0x00c2,
   [PERF_COUNT_HW_BRANCH_MISSES]                        = 0x00c3,
   [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]      = 0x00d0, /* "Decoder empty" event */
index e6131d4..65577f0 100644 (file)
@@ -29,6 +29,8 @@
 
 #define COUNTER_SHIFT          16
 
+static HLIST_HEAD(uncore_unused_list);
+
 struct amd_uncore {
        int id;
        int refcnt;
@@ -39,7 +41,7 @@ struct amd_uncore {
        cpumask_t *active_mask;
        struct pmu *pmu;
        struct perf_event *events[MAX_COUNTERS];
-       struct amd_uncore *free_when_cpu_online;
+       struct hlist_node node;
 };
 
 static struct amd_uncore * __percpu *amd_uncore_nb;
@@ -306,6 +308,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
                uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
                uncore_nb->active_mask = &amd_nb_active_mask;
                uncore_nb->pmu = &amd_nb_pmu;
+               uncore_nb->id = -1;
                *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
        }
 
@@ -319,6 +322,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
                uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
                uncore_l2->active_mask = &amd_l2_active_mask;
                uncore_l2->pmu = &amd_l2_pmu;
+               uncore_l2->id = -1;
                *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
        }
 
@@ -348,7 +352,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this,
                        continue;
 
                if (this->id == that->id) {
-                       that->free_when_cpu_online = this;
+                       hlist_add_head(&this->node, &uncore_unused_list);
                        this = that;
                        break;
                }
@@ -388,13 +392,23 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
        return 0;
 }
 
+static void uncore_clean_online(void)
+{
+       struct amd_uncore *uncore;
+       struct hlist_node *n;
+
+       hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
+               hlist_del(&uncore->node);
+               kfree(uncore);
+       }
+}
+
 static void uncore_online(unsigned int cpu,
                          struct amd_uncore * __percpu *uncores)
 {
        struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 
-       kfree(uncore->free_when_cpu_online);
-       uncore->free_when_cpu_online = NULL;
+       uncore_clean_online();
 
        if (cpu == uncore->cpu)
                cpumask_set_cpu(cpu, uncore->active_mask);
index 0a6e393..bdcd651 100644 (file)
 struct bts_ctx {
        struct perf_output_handle       handle;
        struct debug_store              ds_back;
-       int                             started;
+       int                             state;
+};
+
+/* BTS context states: */
+enum {
+       /* no ongoing AUX transactions */
+       BTS_STATE_STOPPED = 0,
+       /* AUX transaction is on, BTS tracing is disabled */
+       BTS_STATE_INACTIVE,
+       /* AUX transaction is on, BTS tracing is running */
+       BTS_STATE_ACTIVE,
 };
 
 static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
@@ -204,6 +214,15 @@ static void bts_update(struct bts_ctx *bts)
 static int
 bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
 
+/*
+ * Ordering PMU callbacks wrt themselves and the PMI is done by means
+ * of bts::state, which:
+ *  - is set when bts::handle::event is valid, that is, between
+ *    perf_aux_output_begin() and perf_aux_output_end();
+ *  - is zero otherwise;
+ *  - is ordered against bts::handle::event with a compiler barrier.
+ */
+
 static void __bts_event_start(struct perf_event *event)
 {
        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
@@ -221,10 +240,13 @@ static void __bts_event_start(struct perf_event *event)
 
        /*
         * local barrier to make sure that ds configuration made it
-        * before we enable BTS
+        * before we enable BTS and bts::state goes ACTIVE
         */
        wmb();
 
+       /* INACTIVE/STOPPED -> ACTIVE */
+       WRITE_ONCE(bts->state, BTS_STATE_ACTIVE);
+
        intel_pmu_enable_bts(config);
 
 }
@@ -251,9 +273,6 @@ static void bts_event_start(struct perf_event *event, int flags)
 
        __bts_event_start(event);
 
-       /* PMI handler: this counter is running and likely generating PMIs */
-       ACCESS_ONCE(bts->started) = 1;
-
        return;
 
 fail_end_stop:
@@ -263,30 +282,34 @@ fail_stop:
        event->hw.state = PERF_HES_STOPPED;
 }
 
-static void __bts_event_stop(struct perf_event *event)
+static void __bts_event_stop(struct perf_event *event, int state)
 {
+       struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+       /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */
+       WRITE_ONCE(bts->state, state);
+
        /*
         * No extra synchronization is mandated by the documentation to have
         * BTS data stores globally visible.
         */
        intel_pmu_disable_bts();
-
-       if (event->hw.state & PERF_HES_STOPPED)
-               return;
-
-       ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
 }
 
 static void bts_event_stop(struct perf_event *event, int flags)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-       struct bts_buffer *buf = perf_get_aux(&bts->handle);
+       struct bts_buffer *buf = NULL;
+       int state = READ_ONCE(bts->state);
 
-       /* PMI handler: don't restart this counter */
-       ACCESS_ONCE(bts->started) = 0;
+       if (state == BTS_STATE_ACTIVE)
+               __bts_event_stop(event, BTS_STATE_STOPPED);
 
-       __bts_event_stop(event);
+       if (state != BTS_STATE_STOPPED)
+               buf = perf_get_aux(&bts->handle);
+
+       event->hw.state |= PERF_HES_STOPPED;
 
        if (flags & PERF_EF_UPDATE) {
                bts_update(bts);
@@ -296,6 +319,7 @@ static void bts_event_stop(struct perf_event *event, int flags)
                                bts->handle.head =
                                        local_xchg(&buf->data_size,
                                                   buf->nr_pages << PAGE_SHIFT);
+
                        perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
                                            !!local_xchg(&buf->lost, 0));
                }
@@ -310,8 +334,20 @@ static void bts_event_stop(struct perf_event *event, int flags)
 void intel_bts_enable_local(void)
 {
        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+       int state = READ_ONCE(bts->state);
+
+       /*
+        * Here we transition from INACTIVE to ACTIVE;
+        * if we instead are STOPPED from the interrupt handler,
+        * stay that way. Can't be ACTIVE here though.
+        */
+       if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE))
+               return;
+
+       if (state == BTS_STATE_STOPPED)
+               return;
 
-       if (bts->handle.event && bts->started)
+       if (bts->handle.event)
                __bts_event_start(bts->handle.event);
 }
 
@@ -319,8 +355,15 @@ void intel_bts_disable_local(void)
 {
        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 
+       /*
+        * Here we transition from ACTIVE to INACTIVE;
+        * do nothing for STOPPED or INACTIVE.
+        */
+       if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE)
+               return;
+
        if (bts->handle.event)
-               __bts_event_stop(bts->handle.event);
+               __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE);
 }
 
 static int
@@ -335,8 +378,6 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
                return 0;
 
        head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
-       if (WARN_ON_ONCE(head != local_read(&buf->head)))
-               return -EINVAL;
 
        phys = &buf->buf[buf->cur_buf];
        space = phys->offset + phys->displacement + phys->size - head;
@@ -403,22 +444,37 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
 
 int intel_bts_interrupt(void)
 {
+       struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
        struct perf_event *event = bts->handle.event;
        struct bts_buffer *buf;
        s64 old_head;
-       int err;
+       int err = -ENOSPC, handled = 0;
 
-       if (!event || !bts->started)
-               return 0;
+       /*
+        * The only surefire way of knowing if this NMI is ours is by checking
+        * the write ptr against the PMI threshold.
+        */
+       if (ds->bts_index >= ds->bts_interrupt_threshold)
+               handled = 1;
+
+       /*
+        * this is wrapped in intel_bts_enable_local/intel_bts_disable_local,
+        * so we can only be INACTIVE or STOPPED
+        */
+       if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
+               return handled;
 
        buf = perf_get_aux(&bts->handle);
+       if (!buf)
+               return handled;
+
        /*
         * Skip snapshot counters: they don't use the interrupt, but
         * there's no other way of telling, because the pointer will
         * keep moving
         */
-       if (!buf || buf->snapshot)
+       if (buf->snapshot)
                return 0;
 
        old_head = local_read(&buf->head);
@@ -426,18 +482,27 @@ int intel_bts_interrupt(void)
 
        /* no new data */
        if (old_head == local_read(&buf->head))
-               return 0;
+               return handled;
 
        perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
                            !!local_xchg(&buf->lost, 0));
 
        buf = perf_aux_output_begin(&bts->handle, event);
-       if (!buf)
-               return 1;
+       if (buf)
+               err = bts_buffer_reset(buf, &bts->handle);
+
+       if (err) {
+               WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
 
-       err = bts_buffer_reset(buf, &bts->handle);
-       if (err)
-               perf_aux_output_end(&bts->handle, 0, false);
+               if (buf) {
+                       /*
+                        * BTS_STATE_STOPPED should be visible before
+                        * cleared handle::event
+                        */
+                       barrier();
+                       perf_aux_output_end(&bts->handle, 0, false);
+               }
+       }
 
        return 1;
 }
index 2cbde2f..4c9a79b 100644 (file)
@@ -1730,9 +1730,11 @@ static __initconst const u64 knl_hw_cache_extra_regs
  * disabled state if called consecutively.
  *
  * During consecutive calls, the same disable value will be written to related
- * registers, so the PMU state remains unchanged. hw.state in
- * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
- * calls.
+ * registers, so the PMU state remains unchanged.
+ *
+ * intel_bts events don't coexist with intel PMU's BTS events because of
+ * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
+ * disabled around intel PMU's event batching etc, only inside the PMI handler.
  */
 static void __intel_pmu_disable_all(void)
 {
@@ -1742,8 +1744,6 @@ static void __intel_pmu_disable_all(void)
 
        if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
                intel_pmu_disable_bts();
-       else
-               intel_bts_disable_local();
 
        intel_pmu_pebs_disable_all();
 }
@@ -1771,8 +1771,7 @@ static void __intel_pmu_enable_all(int added, bool pmi)
                        return;
 
                intel_pmu_enable_bts(event->hw.config);
-       } else
-               intel_bts_enable_local();
+       }
 }
 
 static void intel_pmu_enable_all(int added)
@@ -2073,6 +2072,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
         */
        if (!x86_pmu.late_ack)
                apic_write(APIC_LVTPC, APIC_DM_NMI);
+       intel_bts_disable_local();
        __intel_pmu_disable_all();
        handled = intel_pmu_drain_bts_buffer();
        handled += intel_bts_interrupt();
@@ -2172,6 +2172,7 @@ done:
        /* Only restore PMU state when it's active. See x86_pmu_disable(). */
        if (cpuc->enabled)
                __intel_pmu_enable_all(0, true);
+       intel_bts_enable_local();
 
        /*
         * Only unmask the NMI after the overflow counters
index 783c49d..8f82b02 100644 (file)
@@ -458,6 +458,11 @@ static void __intel_cqm_event_count(void *info);
 static void init_mbm_sample(u32 rmid, u32 evt_type);
 static void __intel_mbm_event_count(void *info);
 
+static bool is_cqm_event(int e)
+{
+       return (e == QOS_L3_OCCUP_EVENT_ID);
+}
+
 static bool is_mbm_event(int e)
 {
        return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
@@ -1366,6 +1371,10 @@ static int intel_cqm_event_init(struct perf_event *event)
             (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
                return -EINVAL;
 
+       if ((is_cqm_event(event->attr.config) && !cqm_enabled) ||
+           (is_mbm_event(event->attr.config) && !mbm_enabled))
+               return -EINVAL;
+
        /* unsupported modes and filters */
        if (event->attr.exclude_user   ||
            event->attr.exclude_kernel ||
index 7ce9f3f..9b983a4 100644 (file)
@@ -1274,18 +1274,18 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
                struct pebs_record_nhm *p = at;
                u64 pebs_status;
 
-               /* PEBS v3 has accurate status bits */
+               pebs_status = p->status & cpuc->pebs_enabled;
+               pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+
+               /* PEBS v3 has more accurate status bits */
                if (x86_pmu.intel_cap.pebs_format >= 3) {
-                       for_each_set_bit(bit, (unsigned long *)&p->status,
-                                        MAX_PEBS_EVENTS)
+                       for_each_set_bit(bit, (unsigned long *)&pebs_status,
+                                        x86_pmu.max_pebs_events)
                                counts[bit]++;
 
                        continue;
                }
 
-               pebs_status = p->status & cpuc->pebs_enabled;
-               pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
-
                /*
                 * On some CPUs the PEBS status can be zero when PEBS is
                 * racing with clearing of GLOBAL_STATUS.
@@ -1333,8 +1333,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
                        continue;
 
                event = cpuc->events[bit];
-               WARN_ON_ONCE(!event);
-               WARN_ON_ONCE(!event->attr.precise_ip);
+               if (WARN_ON_ONCE(!event))
+                       continue;
+
+               if (WARN_ON_ONCE(!event->attr.precise_ip))
+                       continue;
 
                /* log dropped samples number */
                if (error[bit])
index 04bb5fb..861a7d9 100644 (file)
@@ -1074,6 +1074,11 @@ static void pt_addr_filters_fini(struct perf_event *event)
        event->hw.addr_filters = NULL;
 }
 
+static inline bool valid_kernel_ip(unsigned long ip)
+{
+       return virt_addr_valid(ip) && kernel_ip(ip);
+}
+
 static int pt_event_addr_filters_validate(struct list_head *filters)
 {
        struct perf_addr_filter *filter;
@@ -1081,11 +1086,16 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
 
        list_for_each_entry(filter, filters, entry) {
                /* PT doesn't support single address triggers */
-               if (!filter->range)
+               if (!filter->range || !filter->size)
                        return -EOPNOTSUPP;
 
-               if (!filter->inode && !kernel_ip(filter->offset))
-                       return -EINVAL;
+               if (!filter->inode) {
+                       if (!valid_kernel_ip(filter->offset))
+                               return -EINVAL;
+
+                       if (!valid_kernel_ip(filter->offset + filter->size))
+                               return -EINVAL;
+               }
 
                if (++range > pt_cap_get(PT_CAP_num_address_ranges))
                        return -EOPNOTSUPP;
@@ -1111,7 +1121,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
                } else {
                        /* apply the offset */
                        msr_a = filter->offset + offs[range];
-                       msr_b = filter->size + msr_a;
+                       msr_b = filter->size + msr_a - 1;
                }
 
                filters->filter[range].msr_a  = msr_a;
index e3af86f..2131c4c 100644 (file)
@@ -433,7 +433,11 @@ do {                                                                       \
 #define __get_user_asm_ex(x, addr, itype, rtype, ltype)                        \
        asm volatile("1:        mov"itype" %1,%"rtype"0\n"              \
                     "2:\n"                                             \
-                    _ASM_EXTABLE_EX(1b, 2b)                            \
+                    ".section .fixup,\"ax\"\n"                         \
+                     "3:xor"itype" %"rtype"0,%"rtype"0\n"              \
+                    "  jmp 2b\n"                                       \
+                    ".previous\n"                                      \
+                    _ASM_EXTABLE_EX(1b, 3b)                            \
                     : ltype(x) : "m" (__m(addr)))
 
 #define __put_user_nocheck(x, ptr, size)                       \
index 50c95af..f3e9b2d 100644 (file)
@@ -2093,7 +2093,6 @@ int generic_processor_info(int apicid, int version)
                return -EINVAL;
        }
 
-       num_processors++;
        if (apicid == boot_cpu_physical_apicid) {
                /*
                 * x86_bios_cpu_apicid is required to have processors listed
@@ -2116,10 +2115,13 @@ int generic_processor_info(int apicid, int version)
 
                pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
                           thiscpu, apicid);
+
                disabled_cpus++;
                return -ENOSPC;
        }
 
+       num_processors++;
+
        /*
         * Validate version
         */
index b816971..620ab06 100644 (file)
@@ -54,6 +54,7 @@ static LIST_HEAD(pcache);
  */
 static u8 *container;
 static size_t container_size;
+static bool ucode_builtin;
 
 static u32 ucode_new_rev;
 static u8 amd_ucode_patch[PATCH_MAX_SIZE];
@@ -281,18 +282,22 @@ static bool __init load_builtin_amd_microcode(struct cpio_data *cp,
 void __init load_ucode_amd_bsp(unsigned int family)
 {
        struct cpio_data cp;
+       bool *builtin;
        void **data;
        size_t *size;
 
 #ifdef CONFIG_X86_32
        data =  (void **)__pa_nodebug(&ucode_cpio.data);
        size = (size_t *)__pa_nodebug(&ucode_cpio.size);
+       builtin = (bool *)__pa_nodebug(&ucode_builtin);
 #else
        data = &ucode_cpio.data;
        size = &ucode_cpio.size;
+       builtin = &ucode_builtin;
 #endif
 
-       if (!load_builtin_amd_microcode(&cp, family))
+       *builtin = load_builtin_amd_microcode(&cp, family);
+       if (!*builtin)
                cp = find_ucode_in_initrd();
 
        if (!(cp.data && cp.size))
@@ -373,7 +378,8 @@ void load_ucode_amd_ap(void)
                return;
 
        /* Add CONFIG_RANDOMIZE_MEMORY offset. */
-       cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+       if (!ucode_builtin)
+               cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
 
        eax = cpuid_eax(0x00000001);
        eq  = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ);
@@ -439,7 +445,8 @@ int __init save_microcode_in_initrd_amd(void)
                container = cont_va;
 
        /* Add CONFIG_RANDOMIZE_MEMORY offset. */
-       container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+       if (!ucode_builtin)
+               container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
 
        eax   = cpuid_eax(0x00000001);
        eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
index 1d39bfb..3692249 100644 (file)
@@ -289,6 +289,7 @@ void __init kvmclock_init(void)
        put_cpu();
 
        x86_platform.calibrate_tsc = kvm_get_tsc_khz;
+       x86_platform.calibrate_cpu = kvm_get_tsc_khz;
        x86_platform.get_wallclock = kvm_get_wallclock;
        x86_platform.set_wallclock = kvm_set_wallclock;
 #ifdef CONFIG_X86_LOCAL_APIC
index 5f42d03..c7220ba 100644 (file)
@@ -109,6 +109,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
 {
        bool new_val, old_val;
        struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+       struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
        union kvm_ioapic_redirect_entry *e;
 
        e = &ioapic->redirtbl[RTC_GSI];
@@ -117,16 +118,17 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
                return;
 
        new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
-       old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
+       old_val = test_bit(vcpu->vcpu_id, dest_map->map);
 
        if (new_val == old_val)
                return;
 
        if (new_val) {
-               __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
+               __set_bit(vcpu->vcpu_id, dest_map->map);
+               dest_map->vectors[vcpu->vcpu_id] = e->fields.vector;
                ioapic->rtc_status.pending_eoi++;
        } else {
-               __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
+               __clear_bit(vcpu->vcpu_id, dest_map->map);
                ioapic->rtc_status.pending_eoi--;
                rtc_status_pending_eoi_check_valid(ioapic);
        }
index 39b9112..cd94443 100644 (file)
@@ -23,8 +23,8 @@
 static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
        [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
        [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
-       [2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES },
-       [3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES },
+       [2] = { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES },
+       [3] = { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES },
        [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
        [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
        [6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
index 19f9f9e..699f872 100644 (file)
@@ -2743,16 +2743,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                if (tsc_delta < 0)
                        mark_tsc_unstable("KVM discovered backwards TSC");
 
-               if (kvm_lapic_hv_timer_in_use(vcpu) &&
-                               kvm_x86_ops->set_hv_timer(vcpu,
-                                       kvm_get_lapic_tscdeadline_msr(vcpu)))
-                       kvm_lapic_switch_to_sw_timer(vcpu);
                if (check_tsc_unstable()) {
                        u64 offset = kvm_compute_tsc_offset(vcpu,
                                                vcpu->arch.last_guest_tsc);
                        kvm_x86_ops->write_tsc_offset(vcpu, offset);
                        vcpu->arch.tsc_catchup = 1;
                }
+               if (kvm_lapic_hv_timer_in_use(vcpu) &&
+                               kvm_x86_ops->set_hv_timer(vcpu,
+                                       kvm_get_lapic_tscdeadline_msr(vcpu)))
+                       kvm_lapic_switch_to_sw_timer(vcpu);
                /*
                 * On a host with synchronized TSC, there is no need to update
                 * kvmclock on vcpu->cpu migration
index 837ea36..6d52b94 100644 (file)
@@ -553,15 +553,21 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
 
 /*
- * Broadwell EP Home Agent BARs erroneously return non-zero values when read.
+ * Device [8086:2fc0]
+ * Erratum HSE43
+ * CONFIG_TDP_NOMINAL CSR Implemented at Incorrect Offset
+ * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html
  *
- * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
- * entry BDF2.
+ * Devices [8086:6f60,6fa0,6fc0]
+ * Erratum BDF2
+ * PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration
+ * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
  */
-static void pci_bdwep_bar(struct pci_dev *dev)
+static void pci_invalid_bar(struct pci_dev *dev)
 {
        dev->non_compliant_bars = 1;
 }
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar);
index 3699995..a832426 100644 (file)
@@ -233,6 +233,8 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc,
                return blkcipher_walk_done(desc, walk, -EINVAL);
        }
 
+       bsize = min(walk->walk_blocksize, n);
+
        walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
                         BLKCIPHER_WALK_DIFF);
        if (!scatterwalk_aligned(&walk->in, walk->alignmask) ||
@@ -245,7 +247,6 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc,
                }
        }
 
-       bsize = min(walk->walk_blocksize, n);
        n = scatterwalk_clamp(&walk->in, n);
        n = scatterwalk_clamp(&walk->out, n);
 
index 77207b4..0c654e5 100644 (file)
@@ -631,9 +631,14 @@ static int cryptd_hash_export(struct ahash_request *req, void *out)
 
 static int cryptd_hash_import(struct ahash_request *req, const void *in)
 {
-       struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+       struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+       struct shash_desc *desc = cryptd_shash_desc(req);
+
+       desc->tfm = ctx->child;
+       desc->flags = req->base.flags;
 
-       return crypto_shash_import(&rctx->desc, in);
+       return crypto_shash_import(desc, in);
 }
 
 static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
index 1b01fe9..e3d889b 100644 (file)
@@ -1,8 +1,8 @@
 /*
  * echainiv: Encrypted Chain IV Generator
  *
- * This generator generates an IV based on a sequence number by xoring it
- * with a salt and then encrypting it with the same key as used to encrypt
+ * This generator generates an IV based on a sequence number by multiplying
+ * it with a salt and then encrypting it with the same key as used to encrypt
  * the plain text.  This algorithm requires that the block size be equal
  * to the IV size.  It is mainly useful for CBC.
  *
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/spinlock.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 
-#define MAX_IV_SIZE 16
-
-static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv);
-
-/* We don't care if we get preempted and read/write IVs from the next CPU. */
-static void echainiv_read_iv(u8 *dst, unsigned size)
-{
-       u32 *a = (u32 *)dst;
-       u32 __percpu *b = echainiv_iv;
-
-       for (; size >= 4; size -= 4) {
-               *a++ = this_cpu_read(*b);
-               b++;
-       }
-}
-
-static void echainiv_write_iv(const u8 *src, unsigned size)
-{
-       const u32 *a = (const u32 *)src;
-       u32 __percpu *b = echainiv_iv;
-
-       for (; size >= 4; size -= 4) {
-               this_cpu_write(*b, *a);
-               a++;
-               b++;
-       }
-}
-
-static void echainiv_encrypt_complete2(struct aead_request *req, int err)
-{
-       struct aead_request *subreq = aead_request_ctx(req);
-       struct crypto_aead *geniv;
-       unsigned int ivsize;
-
-       if (err == -EINPROGRESS)
-               return;
-
-       if (err)
-               goto out;
-
-       geniv = crypto_aead_reqtfm(req);
-       ivsize = crypto_aead_ivsize(geniv);
-
-       echainiv_write_iv(subreq->iv, ivsize);
-
-       if (req->iv != subreq->iv)
-               memcpy(req->iv, subreq->iv, ivsize);
-
-out:
-       if (req->iv != subreq->iv)
-               kzfree(subreq->iv);
-}
-
-static void echainiv_encrypt_complete(struct crypto_async_request *base,
-                                        int err)
-{
-       struct aead_request *req = base->data;
-
-       echainiv_encrypt_complete2(req, err);
-       aead_request_complete(req, err);
-}
-
 static int echainiv_encrypt(struct aead_request *req)
 {
        struct crypto_aead *geniv = crypto_aead_reqtfm(req);
        struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
        struct aead_request *subreq = aead_request_ctx(req);
-       crypto_completion_t compl;
-       void *data;
+       __be64 nseqno;
+       u64 seqno;
        u8 *info;
        unsigned int ivsize = crypto_aead_ivsize(geniv);
        int err;
@@ -108,8 +44,6 @@ static int echainiv_encrypt(struct aead_request *req)
 
        aead_request_set_tfm(subreq, ctx->child);
 
-       compl = echainiv_encrypt_complete;
-       data = req;
        info = req->iv;
 
        if (req->src != req->dst) {
@@ -127,29 +61,30 @@ static int echainiv_encrypt(struct aead_request *req)
                        return err;
        }
 
-       if (unlikely(!IS_ALIGNED((unsigned long)info,
-                                crypto_aead_alignmask(geniv) + 1))) {
-               info = kmalloc(ivsize, req->base.flags &
-                                      CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
-                                                                 GFP_ATOMIC);
-               if (!info)
-                       return -ENOMEM;
-
-               memcpy(info, req->iv, ivsize);
-       }
-
-       aead_request_set_callback(subreq, req->base.flags, compl, data);
+       aead_request_set_callback(subreq, req->base.flags,
+                                 req->base.complete, req->base.data);
        aead_request_set_crypt(subreq, req->dst, req->dst,
                               req->cryptlen, info);
        aead_request_set_ad(subreq, req->assoclen);
 
-       crypto_xor(info, ctx->salt, ivsize);
+       memcpy(&nseqno, info + ivsize - 8, 8);
+       seqno = be64_to_cpu(nseqno);
+       memset(info, 0, ivsize);
+
        scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
-       echainiv_read_iv(info, ivsize);
 
-       err = crypto_aead_encrypt(subreq);
-       echainiv_encrypt_complete2(req, err);
-       return err;
+       do {
+               u64 a;
+
+               memcpy(&a, ctx->salt + ivsize - 8, 8);
+
+               a |= 1;
+               a *= seqno;
+
+               memcpy(info + ivsize - 8, &a, 8);
+       } while ((ivsize -= 8));
+
+       return crypto_aead_encrypt(subreq);
 }
 
 static int echainiv_decrypt(struct aead_request *req)
@@ -196,8 +131,7 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
        alg = crypto_spawn_aead_alg(spawn);
 
        err = -EINVAL;
-       if (inst->alg.ivsize & (sizeof(u32) - 1) ||
-           inst->alg.ivsize > MAX_IV_SIZE)
+       if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize)
                goto free_inst;
 
        inst->alg.encrypt = echainiv_encrypt;
@@ -206,7 +140,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
        inst->alg.init = aead_init_geniv;
        inst->alg.exit = aead_exit_geniv;
 
-       inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
        inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx);
        inst->alg.base.cra_ctxsize += inst->alg.ivsize;
 
index 17995fa..82a081e 100644 (file)
@@ -419,7 +419,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
        struct device *parent = NULL;
        int retval;
 
-       trace_rpm_suspend(dev, rpmflags);
+       trace_rpm_suspend_rcuidle(dev, rpmflags);
 
  repeat:
        retval = rpm_check_suspend_allowed(dev);
@@ -549,7 +549,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
        }
 
  out:
-       trace_rpm_return_int(dev, _THIS_IP_, retval);
+       trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
 
        return retval;
 
index 9af3595..267f995 100644 (file)
@@ -783,14 +783,14 @@ static struct ccu_reset_map sun8i_h3_ccu_resets[] = {
        [RST_BUS_I2S1]          =  { 0x2d0, BIT(13) },
        [RST_BUS_I2S2]          =  { 0x2d0, BIT(14) },
 
-       [RST_BUS_I2C0]          =  { 0x2d4, BIT(0) },
-       [RST_BUS_I2C1]          =  { 0x2d4, BIT(1) },
-       [RST_BUS_I2C2]          =  { 0x2d4, BIT(2) },
-       [RST_BUS_UART0]         =  { 0x2d4, BIT(16) },
-       [RST_BUS_UART1]         =  { 0x2d4, BIT(17) },
-       [RST_BUS_UART2]         =  { 0x2d4, BIT(18) },
-       [RST_BUS_UART3]         =  { 0x2d4, BIT(19) },
-       [RST_BUS_SCR]           =  { 0x2d4, BIT(20) },
+       [RST_BUS_I2C0]          =  { 0x2d8, BIT(0) },
+       [RST_BUS_I2C1]          =  { 0x2d8, BIT(1) },
+       [RST_BUS_I2C2]          =  { 0x2d8, BIT(2) },
+       [RST_BUS_UART0]         =  { 0x2d8, BIT(16) },
+       [RST_BUS_UART1]         =  { 0x2d8, BIT(17) },
+       [RST_BUS_UART2]         =  { 0x2d8, BIT(18) },
+       [RST_BUS_UART3]         =  { 0x2d8, BIT(19) },
+       [RST_BUS_SCR]           =  { 0x2d8, BIT(20) },
 };
 
 static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = {
index 4470ffc..d6fafb3 100644 (file)
@@ -14,9 +14,9 @@
 #include "ccu_gate.h"
 #include "ccu_nk.h"
 
-void ccu_nk_find_best(unsigned long parent, unsigned long rate,
-                     unsigned int max_n, unsigned int max_k,
-                     unsigned int *n, unsigned int *k)
+static void ccu_nk_find_best(unsigned long parent, unsigned long rate,
+                            unsigned int max_n, unsigned int max_k,
+                            unsigned int *n, unsigned int *k)
 {
        unsigned long best_rate = 0;
        unsigned int best_k = 0, best_n = 0;
index 0ee1f36..d8eab90 100644 (file)
@@ -73,7 +73,7 @@ static void __init sun4i_pll2_setup(struct device_node *node,
                                          SUN4I_PLL2_PRE_DIV_WIDTH,
                                          CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
                                          &sun4i_a10_pll2_lock);
-       if (!prediv_clk) {
+       if (IS_ERR(prediv_clk)) {
                pr_err("Couldn't register the prediv clock\n");
                goto err_free_array;
        }
@@ -106,7 +106,7 @@ static void __init sun4i_pll2_setup(struct device_node *node,
                                          &mult->hw, &clk_multiplier_ops,
                                          &gate->hw, &clk_gate_ops,
                                          CLK_SET_RATE_PARENT);
-       if (!base_clk) {
+       if (IS_ERR(base_clk)) {
                pr_err("Couldn't register the base multiplier clock\n");
                goto err_free_multiplier;
        }
index 411d303..b200ebf 100644 (file)
@@ -48,7 +48,7 @@ static void __init sun8i_a23_mbus_setup(struct device_node *node)
                return;
 
        reg = of_io_request_and_map(node, 0, of_node_full_name(node));
-       if (!reg) {
+       if (IS_ERR(reg)) {
                pr_err("Could not get registers for sun8i-mbus-clk\n");
                goto err_free_parents;
        }
index 5a2631a..7dd2e2d 100644 (file)
@@ -657,9 +657,12 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
                }
 
                if (subnode) {
-                       node = of_get_flat_dt_subnode_by_name(node, subnode);
-                       if (node < 0)
+                       int err = of_get_flat_dt_subnode_by_name(node, subnode);
+
+                       if (err < 0)
                                return 0;
+
+                       node = err;
                }
 
                return __find_uefi_params(node, info, dt_params[i].params);
index 3bd127f..aded106 100644 (file)
@@ -41,6 +41,8 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
 #define EFI_ALLOC_ALIGN                EFI_PAGE_SIZE
 #endif
 
+#define EFI_MMAP_NR_SLACK_SLOTS        8
+
 struct file_info {
        efi_file_handle_t *handle;
        u64 size;
@@ -63,49 +65,62 @@ void efi_printk(efi_system_table_t *sys_table_arg, char *str)
        }
 }
 
+static inline bool mmap_has_headroom(unsigned long buff_size,
+                                    unsigned long map_size,
+                                    unsigned long desc_size)
+{
+       unsigned long slack = buff_size - map_size;
+
+       return slack / desc_size >= EFI_MMAP_NR_SLACK_SLOTS;
+}
+
 efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
-                               efi_memory_desc_t **map,
-                               unsigned long *map_size,
-                               unsigned long *desc_size,
-                               u32 *desc_ver,
-                               unsigned long *key_ptr)
+                               struct efi_boot_memmap *map)
 {
        efi_memory_desc_t *m = NULL;
        efi_status_t status;
        unsigned long key;
        u32 desc_version;
 
-       *map_size = sizeof(*m) * 32;
+       *map->desc_size =       sizeof(*m);
+       *map->map_size =        *map->desc_size * 32;
+       *map->buff_size =       *map->map_size;
 again:
-       /*
-        * Add an additional efi_memory_desc_t because we're doing an
-        * allocation which may be in a new descriptor region.
-        */
-       *map_size += sizeof(*m);
        status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-                               *map_size, (void **)&m);
+                               *map->map_size, (void **)&m);
        if (status != EFI_SUCCESS)
                goto fail;
 
-       *desc_size = 0;
+       *map->desc_size = 0;
        key = 0;
-       status = efi_call_early(get_memory_map, map_size, m,
-                               &key, desc_size, &desc_version);
-       if (status == EFI_BUFFER_TOO_SMALL) {
+       status = efi_call_early(get_memory_map, map->map_size, m,
+                               &key, map->desc_size, &desc_version);
+       if (status == EFI_BUFFER_TOO_SMALL ||
+           !mmap_has_headroom(*map->buff_size, *map->map_size,
+                              *map->desc_size)) {
                efi_call_early(free_pool, m);
+               /*
+                * Make sure there is some entries of headroom so that the
+                * buffer can be reused for a new map after allocations are
+                * no longer permitted.  Its unlikely that the map will grow to
+                * exceed this headroom once we are ready to trigger
+                * ExitBootServices()
+                */
+               *map->map_size += *map->desc_size * EFI_MMAP_NR_SLACK_SLOTS;
+               *map->buff_size = *map->map_size;
                goto again;
        }
 
        if (status != EFI_SUCCESS)
                efi_call_early(free_pool, m);
 
-       if (key_ptr && status == EFI_SUCCESS)
-               *key_ptr = key;
-       if (desc_ver && status == EFI_SUCCESS)
-               *desc_ver = desc_version;
+       if (map->key_ptr && status == EFI_SUCCESS)
+               *map->key_ptr = key;
+       if (map->desc_ver && status == EFI_SUCCESS)
+               *map->desc_ver = desc_version;
 
 fail:
-       *map = m;
+       *map->map = m;
        return status;
 }
 
@@ -113,13 +128,20 @@ fail:
 unsigned long get_dram_base(efi_system_table_t *sys_table_arg)
 {
        efi_status_t status;
-       unsigned long map_size;
+       unsigned long map_size, buff_size;
        unsigned long membase  = EFI_ERROR;
        struct efi_memory_map map;
        efi_memory_desc_t *md;
+       struct efi_boot_memmap boot_map;
 
-       status = efi_get_memory_map(sys_table_arg, (efi_memory_desc_t **)&map.map,
-                                   &map_size, &map.desc_size, NULL, NULL);
+       boot_map.map =          (efi_memory_desc_t **)&map.map;
+       boot_map.map_size =     &map_size;
+       boot_map.desc_size =    &map.desc_size;
+       boot_map.desc_ver =     NULL;
+       boot_map.key_ptr =      NULL;
+       boot_map.buff_size =    &buff_size;
+
+       status = efi_get_memory_map(sys_table_arg, &boot_map);
        if (status != EFI_SUCCESS)
                return membase;
 
@@ -144,15 +166,22 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
                            unsigned long size, unsigned long align,
                            unsigned long *addr, unsigned long max)
 {
-       unsigned long map_size, desc_size;
+       unsigned long map_size, desc_size, buff_size;
        efi_memory_desc_t *map;
        efi_status_t status;
        unsigned long nr_pages;
        u64 max_addr = 0;
        int i;
+       struct efi_boot_memmap boot_map;
+
+       boot_map.map =          &map;
+       boot_map.map_size =     &map_size;
+       boot_map.desc_size =    &desc_size;
+       boot_map.desc_ver =     NULL;
+       boot_map.key_ptr =      NULL;
+       boot_map.buff_size =    &buff_size;
 
-       status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size,
-                                   NULL, NULL);
+       status = efi_get_memory_map(sys_table_arg, &boot_map);
        if (status != EFI_SUCCESS)
                goto fail;
 
@@ -230,14 +259,21 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
                           unsigned long size, unsigned long align,
                           unsigned long *addr)
 {
-       unsigned long map_size, desc_size;
+       unsigned long map_size, desc_size, buff_size;
        efi_memory_desc_t *map;
        efi_status_t status;
        unsigned long nr_pages;
        int i;
+       struct efi_boot_memmap boot_map;
 
-       status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size,
-                                   NULL, NULL);
+       boot_map.map =          &map;
+       boot_map.map_size =     &map_size;
+       boot_map.desc_size =    &desc_size;
+       boot_map.desc_ver =     NULL;
+       boot_map.key_ptr =      NULL;
+       boot_map.buff_size =    &buff_size;
+
+       status = efi_get_memory_map(sys_table_arg, &boot_map);
        if (status != EFI_SUCCESS)
                goto fail;
 
@@ -704,3 +740,76 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
        *cmd_line_len = options_bytes;
        return (char *)cmdline_addr;
 }
+
+/*
+ * Handle calling ExitBootServices according to the requirements set out by the
+ * spec.  Obtains the current memory map, and returns that info after calling
+ * ExitBootServices.  The client must specify a function to perform any
+ * processing of the memory map data prior to ExitBootServices.  A client
+ * specific structure may be passed to the function via priv.  The client
+ * function may be called multiple times.
+ */
+efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg,
+                                   void *handle,
+                                   struct efi_boot_memmap *map,
+                                   void *priv,
+                                   efi_exit_boot_map_processing priv_func)
+{
+       efi_status_t status;
+
+       status = efi_get_memory_map(sys_table_arg, map);
+
+       if (status != EFI_SUCCESS)
+               goto fail;
+
+       status = priv_func(sys_table_arg, map, priv);
+       if (status != EFI_SUCCESS)
+               goto free_map;
+
+       status = efi_call_early(exit_boot_services, handle, *map->key_ptr);
+
+       if (status == EFI_INVALID_PARAMETER) {
+               /*
+                * The memory map changed between efi_get_memory_map() and
+                * exit_boot_services().  Per the UEFI Spec v2.6, Section 6.4:
+                * EFI_BOOT_SERVICES.ExitBootServices we need to get the
+                * updated map, and try again.  The spec implies one retry
+                * should be sufficent, which is confirmed against the EDK2
+                * implementation.  Per the spec, we can only invoke
+                * get_memory_map() and exit_boot_services() - we cannot alloc
+                * so efi_get_memory_map() cannot be used, and we must reuse
+                * the buffer.  For all practical purposes, the headroom in the
+                * buffer should account for any changes in the map so the call
+                * to get_memory_map() is expected to succeed here.
+                */
+               *map->map_size = *map->buff_size;
+               status = efi_call_early(get_memory_map,
+                                       map->map_size,
+                                       *map->map,
+                                       map->key_ptr,
+                                       map->desc_size,
+                                       map->desc_ver);
+
+               /* exit_boot_services() was called, thus cannot free */
+               if (status != EFI_SUCCESS)
+                       goto fail;
+
+               status = priv_func(sys_table_arg, map, priv);
+               /* exit_boot_services() was called, thus cannot free */
+               if (status != EFI_SUCCESS)
+                       goto fail;
+
+               status = efi_call_early(exit_boot_services, handle, *map->key_ptr);
+       }
+
+       /* exit_boot_services() was called, thus cannot free */
+       if (status != EFI_SUCCESS)
+               goto fail;
+
+       return EFI_SUCCESS;
+
+free_map:
+       efi_call_early(free_pool, *map->map);
+fail:
+       return status;
+}
index e58abfa..a6a9311 100644 (file)
@@ -152,6 +152,27 @@ fdt_set_fail:
 #define EFI_FDT_ALIGN EFI_PAGE_SIZE
 #endif
 
+struct exit_boot_struct {
+       efi_memory_desc_t *runtime_map;
+       int *runtime_entry_count;
+};
+
+static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
+                                  struct efi_boot_memmap *map,
+                                  void *priv)
+{
+       struct exit_boot_struct *p = priv;
+       /*
+        * Update the memory map with virtual addresses. The function will also
+        * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME
+        * entries so that we can pass it straight to SetVirtualAddressMap()
+        */
+       efi_get_virtmap(*map->map, *map->map_size, *map->desc_size,
+                       p->runtime_map, p->runtime_entry_count);
+
+       return EFI_SUCCESS;
+}
+
 /*
  * Allocate memory for a new FDT, then add EFI, commandline, and
  * initrd related fields to the FDT.  This routine increases the
@@ -175,13 +196,22 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
                                            unsigned long fdt_addr,
                                            unsigned long fdt_size)
 {
-       unsigned long map_size, desc_size;
+       unsigned long map_size, desc_size, buff_size;
        u32 desc_ver;
        unsigned long mmap_key;
        efi_memory_desc_t *memory_map, *runtime_map;
        unsigned long new_fdt_size;
        efi_status_t status;
        int runtime_entry_count = 0;
+       struct efi_boot_memmap map;
+       struct exit_boot_struct priv;
+
+       map.map =       &runtime_map;
+       map.map_size =  &map_size;
+       map.desc_size = &desc_size;
+       map.desc_ver =  &desc_ver;
+       map.key_ptr =   &mmap_key;
+       map.buff_size = &buff_size;
 
        /*
         * Get a copy of the current memory map that we will use to prepare
@@ -189,8 +219,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
         * subsequent allocations adding entries, since they could not affect
         * the number of EFI_MEMORY_RUNTIME regions.
         */
-       status = efi_get_memory_map(sys_table, &runtime_map, &map_size,
-                                   &desc_size, &desc_ver, &mmap_key);
+       status = efi_get_memory_map(sys_table, &map);
        if (status != EFI_SUCCESS) {
                pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n");
                return status;
@@ -199,6 +228,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
        pr_efi(sys_table,
               "Exiting boot services and installing virtual address map...\n");
 
+       map.map = &memory_map;
        /*
         * Estimate size of new FDT, and allocate memory for it. We
         * will allocate a bigger buffer if this ends up being too
@@ -218,8 +248,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
                 * we can get the memory map key  needed for
                 * exit_boot_services().
                 */
-               status = efi_get_memory_map(sys_table, &memory_map, &map_size,
-                                           &desc_size, &desc_ver, &mmap_key);
+               status = efi_get_memory_map(sys_table, &map);
                if (status != EFI_SUCCESS)
                        goto fail_free_new_fdt;
 
@@ -250,16 +279,11 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
                }
        }
 
-       /*
-        * Update the memory map with virtual addresses. The function will also
-        * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME
-        * entries so that we can pass it straight into SetVirtualAddressMap()
-        */
-       efi_get_virtmap(memory_map, map_size, desc_size, runtime_map,
-                       &runtime_entry_count);
-
-       /* Now we are ready to exit_boot_services.*/
-       status = sys_table->boottime->exit_boot_services(handle, mmap_key);
+       sys_table->boottime->free_pool(memory_map);
+       priv.runtime_map = runtime_map;
+       priv.runtime_entry_count = &runtime_entry_count;
+       status = efi_exit_boot_services(sys_table, handle, &map, &priv,
+                                       exit_boot_func);
 
        if (status == EFI_SUCCESS) {
                efi_set_virtual_address_map_t *svam;
index 53f6d3f..0c9f58c 100644 (file)
@@ -73,12 +73,20 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
                              unsigned long random_seed)
 {
        unsigned long map_size, desc_size, total_slots = 0, target_slot;
+       unsigned long buff_size;
        efi_status_t status;
        efi_memory_desc_t *memory_map;
        int map_offset;
+       struct efi_boot_memmap map;
 
-       status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size,
-                                   &desc_size, NULL, NULL);
+       map.map =       &memory_map;
+       map.map_size =  &map_size;
+       map.desc_size = &desc_size;
+       map.desc_ver =  NULL;
+       map.key_ptr =   NULL;
+       map.buff_size = &buff_size;
+
+       status = efi_get_memory_map(sys_table_arg, &map);
        if (status != EFI_SUCCESS)
                return status;
 
index a978381..9b17a66 100644 (file)
@@ -387,7 +387,7 @@ void atmel_hlcdc_crtc_irq(struct drm_crtc *c)
        atmel_hlcdc_crtc_finish_page_flip(drm_crtc_to_atmel_hlcdc_crtc(c));
 }
 
-void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc)
+static void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc)
 {
        struct atmel_hlcdc_crtc_state *state;
 
index 016c191..52c527f 100644 (file)
@@ -320,19 +320,19 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane,
                        u32 *coeff_tab = heo_upscaling_ycoef;
                        u32 max_memsize;
 
-                       if (state->crtc_w < state->src_w)
+                       if (state->crtc_h < state->src_h)
                                coeff_tab = heo_downscaling_ycoef;
                        for (i = 0; i < ARRAY_SIZE(heo_upscaling_ycoef); i++)
                                atmel_hlcdc_layer_update_cfg(&plane->layer,
                                                             33 + i,
                                                             0xffffffff,
                                                             coeff_tab[i]);
-                       factor = ((8 * 256 * state->src_w) - (256 * 4)) /
-                                state->crtc_w;
+                       factor = ((8 * 256 * state->src_h) - (256 * 4)) /
+                                state->crtc_h;
                        factor++;
-                       max_memsize = ((factor * state->crtc_w) + (256 * 4)) /
+                       max_memsize = ((factor * state->crtc_h) + (256 * 4)) /
                                      2048;
-                       if (max_memsize > state->src_w)
+                       if (max_memsize > state->src_h)
                                factor--;
                        factor_reg |= (factor << 16) | 0x80000000;
                }
index 57676f8..a628975 100644 (file)
@@ -1015,6 +1015,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd,
        return 0;
 }
 
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
 typedef struct drm_mode_fb_cmd232 {
        u32 fb_id;
        u32 width;
@@ -1071,6 +1072,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd,
 
        return 0;
 }
+#endif
 
 static drm_ioctl_compat_t *drm_compat_ioctls[] = {
        [DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version,
@@ -1104,7 +1106,9 @@ static drm_ioctl_compat_t *drm_compat_ioctls[] = {
        [DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw,
 #endif
        [DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank,
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
        [DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2,
+#endif
 };
 
 /**
index e016640..40ce841 100644 (file)
@@ -55,11 +55,11 @@ static int check_fb_gem_memory_type(struct drm_device *drm_dev,
        flags = exynos_gem->flags;
 
        /*
-        * without iommu support, not support physically non-continuous memory
-        * for framebuffer.
+        * Physically non-contiguous memory type for framebuffer is not
+        * supported without IOMMU.
         */
        if (IS_NONCONTIG_BUFFER(flags)) {
-               DRM_ERROR("cannot use this gem memory type for fb.\n");
+               DRM_ERROR("Non-contiguous GEM memory is not supported.\n");
                return -EINVAL;
        }
 
index 0525c56..147ef0d 100644 (file)
@@ -1753,32 +1753,6 @@ static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int fimc_suspend(struct device *dev)
-{
-       struct fimc_context *ctx = get_fimc_context(dev);
-
-       DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-       if (pm_runtime_suspended(dev))
-               return 0;
-
-       return fimc_clk_ctrl(ctx, false);
-}
-
-static int fimc_resume(struct device *dev)
-{
-       struct fimc_context *ctx = get_fimc_context(dev);
-
-       DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-       if (!pm_runtime_suspended(dev))
-               return fimc_clk_ctrl(ctx, true);
-
-       return 0;
-}
-#endif
-
 static int fimc_runtime_suspend(struct device *dev)
 {
        struct fimc_context *ctx = get_fimc_context(dev);
@@ -1799,7 +1773,8 @@ static int fimc_runtime_resume(struct device *dev)
 #endif
 
 static const struct dev_pm_ops fimc_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(fimc_suspend, fimc_resume)
+       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                               pm_runtime_force_resume)
        SET_RUNTIME_PM_OPS(fimc_runtime_suspend, fimc_runtime_resume, NULL)
 };
 
index 4bf00f5..6eca8bb 100644 (file)
@@ -1475,8 +1475,8 @@ static int g2d_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int g2d_suspend(struct device *dev)
+#ifdef CONFIG_PM
+static int g2d_runtime_suspend(struct device *dev)
 {
        struct g2d_data *g2d = dev_get_drvdata(dev);
 
@@ -1490,25 +1490,6 @@ static int g2d_suspend(struct device *dev)
 
        flush_work(&g2d->runqueue_work);
 
-       return 0;
-}
-
-static int g2d_resume(struct device *dev)
-{
-       struct g2d_data *g2d = dev_get_drvdata(dev);
-
-       g2d->suspended = false;
-       g2d_exec_runqueue(g2d);
-
-       return 0;
-}
-#endif
-
-#ifdef CONFIG_PM
-static int g2d_runtime_suspend(struct device *dev)
-{
-       struct g2d_data *g2d = dev_get_drvdata(dev);
-
        clk_disable_unprepare(g2d->gate_clk);
 
        return 0;
@@ -1523,12 +1504,16 @@ static int g2d_runtime_resume(struct device *dev)
        if (ret < 0)
                dev_warn(dev, "failed to enable clock.\n");
 
+       g2d->suspended = false;
+       g2d_exec_runqueue(g2d);
+
        return ret;
 }
 #endif
 
 static const struct dev_pm_ops g2d_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(g2d_suspend, g2d_resume)
+       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                               pm_runtime_force_resume)
        SET_RUNTIME_PM_OPS(g2d_runtime_suspend, g2d_runtime_resume, NULL)
 };
 
index 5d20da8..52a9d26 100644 (file)
@@ -1760,34 +1760,7 @@ static int gsc_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int gsc_suspend(struct device *dev)
-{
-       struct gsc_context *ctx = get_gsc_context(dev);
-
-       DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-       if (pm_runtime_suspended(dev))
-               return 0;
-
-       return gsc_clk_ctrl(ctx, false);
-}
-
-static int gsc_resume(struct device *dev)
-{
-       struct gsc_context *ctx = get_gsc_context(dev);
-
-       DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-       if (!pm_runtime_suspended(dev))
-               return gsc_clk_ctrl(ctx, true);
-
-       return 0;
-}
-#endif
-
-#ifdef CONFIG_PM
-static int gsc_runtime_suspend(struct device *dev)
+static int __maybe_unused gsc_runtime_suspend(struct device *dev)
 {
        struct gsc_context *ctx = get_gsc_context(dev);
 
@@ -1796,7 +1769,7 @@ static int gsc_runtime_suspend(struct device *dev)
        return  gsc_clk_ctrl(ctx, false);
 }
 
-static int gsc_runtime_resume(struct device *dev)
+static int __maybe_unused gsc_runtime_resume(struct device *dev)
 {
        struct gsc_context *ctx = get_gsc_context(dev);
 
@@ -1804,10 +1777,10 @@ static int gsc_runtime_resume(struct device *dev)
 
        return  gsc_clk_ctrl(ctx, true);
 }
-#endif
 
 static const struct dev_pm_ops gsc_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(gsc_suspend, gsc_resume)
+       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                               pm_runtime_force_resume)
        SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL)
 };
 
index 404367a..6591e40 100644 (file)
@@ -794,29 +794,6 @@ static int rotator_clk_crtl(struct rot_context *rot, bool enable)
        return 0;
 }
 
-
-#ifdef CONFIG_PM_SLEEP
-static int rotator_suspend(struct device *dev)
-{
-       struct rot_context *rot = dev_get_drvdata(dev);
-
-       if (pm_runtime_suspended(dev))
-               return 0;
-
-       return rotator_clk_crtl(rot, false);
-}
-
-static int rotator_resume(struct device *dev)
-{
-       struct rot_context *rot = dev_get_drvdata(dev);
-
-       if (!pm_runtime_suspended(dev))
-               return rotator_clk_crtl(rot, true);
-
-       return 0;
-}
-#endif
-
 static int rotator_runtime_suspend(struct device *dev)
 {
        struct rot_context *rot = dev_get_drvdata(dev);
@@ -833,7 +810,8 @@ static int rotator_runtime_resume(struct device *dev)
 #endif
 
 static const struct dev_pm_ops rotator_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(rotator_suspend, rotator_resume)
+       SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                               pm_runtime_force_resume)
        SET_RUNTIME_PM_OPS(rotator_runtime_suspend, rotator_runtime_resume,
                                                                        NULL)
 };
index 95ddd56..5de36d8 100644 (file)
@@ -1281,6 +1281,11 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        intel_runtime_pm_enable(dev_priv);
 
+       /* Everything is in place, we can now relax! */
+       DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
+                driver.name, driver.major, driver.minor, driver.patchlevel,
+                driver.date, pci_name(pdev), dev_priv->drm.primary->index);
+
        intel_runtime_pm_put(dev_priv);
 
        return 0;
index 7a30af7..f38ceff 100644 (file)
@@ -122,8 +122,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
        has_full_48bit_ppgtt =
                IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9;
 
-       if (intel_vgpu_active(dev_priv))
-               has_full_ppgtt = false; /* emulation is too hard */
+       if (intel_vgpu_active(dev_priv)) {
+               /* emulation is too hard */
+               has_full_ppgtt = false;
+               has_full_48bit_ppgtt = false;
+       }
 
        if (!has_aliasing_ppgtt)
                return 0;
@@ -158,7 +161,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
                return 0;
        }
 
-       if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists)
+       if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
                return has_full_48bit_ppgtt ? 3 : 2;
        else
                return has_aliasing_ppgtt ? 1 : 0;
index f6acb5a..b81cfb3 100644 (file)
@@ -65,9 +65,6 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
 
        BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 
-       if (!IS_HASWELL(dev_priv))
-               return;
-
        magic = __raw_i915_read64(dev_priv, vgtif_reg(magic));
        if (magic != VGT_MAGIC)
                return;
index 47bdf9d..b9e5a63 100644 (file)
@@ -554,7 +554,6 @@ void intel_dvo_init(struct drm_device *dev)
                return;
        }
 
-       drm_encoder_cleanup(&intel_encoder->base);
        kfree(intel_dvo);
        kfree(intel_connector);
 }
index adca262..7acbbbf 100644 (file)
@@ -1047,6 +1047,23 @@ err_out:
        return err;
 }
 
+static int intel_use_opregion_panel_type_callback(const struct dmi_system_id *id)
+{
+       DRM_INFO("Using panel type from OpRegion on %s\n", id->ident);
+       return 1;
+}
+
+static const struct dmi_system_id intel_use_opregion_panel_type[] = {
+       {
+               .callback = intel_use_opregion_panel_type_callback,
+               .ident = "Conrac GmbH IX45GM2",
+               .matches = {DMI_MATCH(DMI_SYS_VENDOR, "Conrac GmbH"),
+                           DMI_MATCH(DMI_PRODUCT_NAME, "IX45GM2"),
+               },
+       },
+       { }
+};
+
 int
 intel_opregion_get_panel_type(struct drm_i915_private *dev_priv)
 {
@@ -1072,6 +1089,16 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv)
                return -ENODEV;
        }
 
+       /*
+        * So far we know that some machined must use it, others must not use it.
+        * There doesn't seem to be any way to determine which way to go, except
+        * via a quirk list :(
+        */
+       if (!dmi_check_system(intel_use_opregion_panel_type)) {
+               DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1);
+               return -ENODEV;
+       }
+
        /*
         * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us
         * low vswing for eDP, whereas the VBT panel type (2) gives us normal
index 53e13c1..2d24813 100644 (file)
@@ -7859,6 +7859,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
        case GEN6_PCODE_ILLEGAL_CMD:
                return -ENXIO;
        case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+       case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
                return -EOVERFLOW;
        case GEN6_PCODE_TIMEOUT:
                return -ETIMEDOUT;
index 2b0d1ba..cf171b4 100644 (file)
@@ -255,14 +255,14 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp)
        struct drm_i915_private *dev_priv = to_i915(dev);
 
        uint32_t max_sleep_time = 0x1f;
-       /* Lately it was identified that depending on panel idle frame count
-        * calculated at HW can be off by 1. So let's use what came
-        * from VBT + 1.
-        * There are also other cases where panel demands at least 4
-        * but VBT is not being set. To cover these 2 cases lets use
-        * at least 5 when VBT isn't set to be on the safest side.
+       /*
+        * Let's respect VBT in case VBT asks a higher idle_frame value.
+        * Let's use 6 as the minimum to cover all known cases including
+        * the off-by-one issue that HW has in some cases. Also there are
+        * cases where sink should be able to train
+        * with the 5 or 6 idle patterns.
         */
-       uint32_t idle_frames = dev_priv->vbt.psr.idle_frames + 1;
+       uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames);
        uint32_t val = EDP_PSR_ENABLE;
 
        val |= max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT;
index 59adcf8..3f6704c 100644 (file)
@@ -144,7 +144,7 @@ static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev,
        return &vc4->bo_cache.size_list[page_index];
 }
 
-void vc4_bo_cache_purge(struct drm_device *dev)
+static void vc4_bo_cache_purge(struct drm_device *dev)
 {
        struct vc4_dev *vc4 = to_vc4_dev(dev);
 
index 46527e9..2543cf5 100644 (file)
@@ -309,8 +309,14 @@ validate_uniform_address_write(struct vc4_validated_shader_info *validated_shade
         * of uniforms on each side.  However, this scheme is easy to
         * validate so it's all we allow for now.
         */
-
-       if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
+       switch (QPU_GET_FIELD(inst, QPU_SIG)) {
+       case QPU_SIG_NONE:
+       case QPU_SIG_SCOREBOARD_UNLOCK:
+       case QPU_SIG_COLOR_LOAD:
+       case QPU_SIG_LOAD_TMU0:
+       case QPU_SIG_LOAD_TMU1:
+               break;
+       default:
                DRM_ERROR("uniforms address change must be "
                          "normal math\n");
                return false;
index 3cbbfbe..71c8867 100644 (file)
@@ -332,6 +332,8 @@ static void remove_ep_tid(struct c4iw_ep *ep)
 
        spin_lock_irqsave(&ep->com.dev->lock, flags);
        _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
+       if (idr_is_empty(&ep->com.dev->hwtid_idr))
+               wake_up(&ep->com.dev->wait);
        spin_unlock_irqrestore(&ep->com.dev->lock, flags);
 }
 
@@ -2014,8 +2016,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
                }
                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
                                        n, pdev, rt_tos2priority(tos));
-               if (!ep->l2t)
+               if (!ep->l2t) {
+                       dev_put(pdev);
                        goto out;
+               }
                ep->mtu = pdev->mtu;
                ep->tx_chan = cxgb4_port_chan(pdev);
                ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
index f170b63..93e3d27 100644 (file)
@@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
 static void c4iw_dealloc(struct uld_ctx *ctx)
 {
        c4iw_rdev_close(&ctx->dev->rdev);
+       WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
        idr_destroy(&ctx->dev->cqidr);
+       WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
        idr_destroy(&ctx->dev->qpidr);
+       WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
        idr_destroy(&ctx->dev->mmidr);
+       wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
        idr_destroy(&ctx->dev->hwtid_idr);
        idr_destroy(&ctx->dev->stid_idr);
        idr_destroy(&ctx->dev->atid_idr);
@@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
        mutex_init(&devp->rdev.stats.lock);
        mutex_init(&devp->db_mutex);
        INIT_LIST_HEAD(&devp->db_fc_list);
+       init_waitqueue_head(&devp->wait);
        devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
 
        if (c4iw_debugfs_root) {
index 6a9bef1..cdcf3ee 100644 (file)
@@ -263,6 +263,7 @@ struct c4iw_dev {
        struct idr stid_idr;
        struct list_head db_fc_list;
        u32 avail_ird;
+       wait_queue_head_t wait;
 };
 
 static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
index 9c2e53d..0f21c3a 100644 (file)
@@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work)
 
                /* Generate GUID changed event */
                if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
+                       if (mlx4_is_master(dev->dev)) {
+                               union ib_gid gid;
+                               int err = 0;
+
+                               if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
+                                       err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
+                               else
+                                       gid.global.subnet_prefix =
+                                               eqe->event.port_mgmt_change.params.port_info.gid_prefix;
+                               if (err) {
+                                       pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
+                                               port, err);
+                               } else {
+                                       pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
+                                                port,
+                                                (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
+                                                be64_to_cpu(gid.global.subnet_prefix));
+                                       atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
+                                                    be64_to_cpu(gid.global.subnet_prefix));
+                               }
+                       }
                        mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
                        /*if master, notify all slaves*/
                        if (mlx4_is_master(dev->dev))
@@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
                if (err)
                        goto demux_err;
                dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
+               atomic64_set(&dev->sriov.demux[i].subnet_prefix,
+                            be64_to_cpu(gid.global.subnet_prefix));
                err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
                                      &dev->sriov.sqps[i]);
                if (err)
index 2af44c2..87ba9bc 100644 (file)
@@ -2202,6 +2202,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
        bool per_port = !!(ibdev->dev->caps.flags2 &
                MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
 
+       if (mlx4_is_slave(ibdev->dev))
+               return 0;
+
        for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
                /* i == 1 means we are building port counters */
                if (i && !per_port)
index 8f7ad07..097bfcc 100644 (file)
@@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group)
                if (!group->members[i])
                        leave_state |= (1 << i);
 
-       return leave_state & (group->rec.scope_join_state & 7);
+       return leave_state & (group->rec.scope_join_state & 0xf);
 }
 
 static int join_group(struct mcast_group *group, int slave, u8 join_mask)
@@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
                } else
                        mcg_warn_group(group, "DRIVER BUG\n");
        } else if (group->state == MCAST_LEAVE_SENT) {
-               if (group->rec.scope_join_state & 7)
-                       group->rec.scope_join_state &= 0xf8;
+               if (group->rec.scope_join_state & 0xf)
+                       group->rec.scope_join_state &= 0xf0;
                group->state = MCAST_IDLE;
                mutex_unlock(&group->lock);
                if (release_group(group, 1))
@@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
 static int handle_join_req(struct mcast_group *group, u8 join_mask,
                           struct mcast_req *req)
 {
-       u8 group_join_state = group->rec.scope_join_state & 7;
+       u8 group_join_state = group->rec.scope_join_state & 0xf;
        int ref = 0;
        u16 status;
        struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
@@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
                        u8 cur_join_state;
 
                        resp_join_state = ((struct ib_sa_mcmember_data *)
-                                               group->response_sa_mad.data)->scope_join_state & 7;
-                       cur_join_state = group->rec.scope_join_state & 7;
+                                               group->response_sa_mad.data)->scope_join_state & 0xf;
+                       cur_join_state = group->rec.scope_join_state & 0xf;
 
                        if (method == IB_MGMT_METHOD_GET_RESP) {
                                /* successfull join */
@@ -710,7 +710,7 @@ process_requests:
                req = list_first_entry(&group->pending_list, struct mcast_req,
                                       group_list);
                sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
-               req_join_state = sa_data->scope_join_state & 0x7;
+               req_join_state = sa_data->scope_join_state & 0xf;
 
                /* For a leave request, we will immediately answer the VF, and
                 * update our internal counters. The actual leave will be sent
index 7c5832e..686ab48 100644 (file)
@@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx {
        struct workqueue_struct *wq;
        struct workqueue_struct *ud_wq;
        spinlock_t ud_lock;
-       __be64 subnet_prefix;
+       atomic64_t subnet_prefix;
        __be64 guid_cache[128];
        struct mlx4_ib_dev *dev;
        /* the following lock protects both mcg_table and mcg_mgid0_list */
index 768085f..7fb9629 100644 (file)
@@ -2493,24 +2493,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                sqp->ud_header.grh.flow_label    =
                        ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
                sqp->ud_header.grh.hop_limit     = ah->av.ib.hop_limit;
-               if (is_eth)
+               if (is_eth) {
                        memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
-               else {
-               if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
-                       /* When multi-function is enabled, the ib_core gid
-                        * indexes don't necessarily match the hw ones, so
-                        * we must use our own cache */
-                       sqp->ud_header.grh.source_gid.global.subnet_prefix =
-                               to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
-                                                      subnet_prefix;
-                       sqp->ud_header.grh.source_gid.global.interface_id =
-                               to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
-                                              guid_cache[ah->av.ib.gid_index];
-               } else
-                       ib_get_cached_gid(ib_dev,
-                                         be32_to_cpu(ah->av.ib.port_pd) >> 24,
-                                         ah->av.ib.gid_index,
-                                         &sqp->ud_header.grh.source_gid, NULL);
+               } else {
+                       if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+                               /* When multi-function is enabled, the ib_core gid
+                                * indexes don't necessarily match the hw ones, so
+                                * we must use our own cache
+                                */
+                               sqp->ud_header.grh.source_gid.global.subnet_prefix =
+                                       cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
+                                                                   demux[sqp->qp.port - 1].
+                                                                   subnet_prefix)));
+                               sqp->ud_header.grh.source_gid.global.interface_id =
+                                       to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+                                                      guid_cache[ah->av.ib.gid_index];
+                       } else {
+                               ib_get_cached_gid(ib_dev,
+                                                 be32_to_cpu(ah->av.ib.port_pd) >> 24,
+                                                 ah->av.ib.gid_index,
+                                                 &sqp->ud_header.grh.source_gid, NULL);
+                       }
                }
                memcpy(sqp->ud_header.grh.destination_gid.raw,
                       ah->av.ib.dgid, 16);
index e4aecbf..551aa0e 100644 (file)
@@ -284,7 +284,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
-       return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+       if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
+               return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+       return 0;
 }
 
 enum {
@@ -1423,6 +1425,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                                             dmac_47_16),
                                ib_spec->eth.val.dst_mac);
 
+               ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+                                            smac_47_16),
+                               ib_spec->eth.mask.src_mac);
+               ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+                                            smac_47_16),
+                               ib_spec->eth.val.src_mac);
+
                if (ib_spec->eth.mask.vlan_tag) {
                        MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
                                 vlan_tag, 1);
index 80c4b6b..46b6497 100644 (file)
@@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr)
 {
        rvt_deinit_mregion(&mr->mr);
        rvt_free_lkey(&mr->mr);
-       vfree(mr);
+       kfree(mr);
 }
 
 /**
index 55f0e8f..ddd5927 100644 (file)
@@ -362,15 +362,34 @@ static int __init rxe_module_init(void)
                return err;
        }
 
-       err = rxe_net_init();
+       err = rxe_net_ipv4_init();
        if (err) {
-               pr_err("rxe: unable to init\n");
+               pr_err("rxe: unable to init ipv4 tunnel\n");
                rxe_cache_exit();
-               return err;
+               goto exit;
+       }
+
+       err = rxe_net_ipv6_init();
+       if (err) {
+               pr_err("rxe: unable to init ipv6 tunnel\n");
+               rxe_cache_exit();
+               goto exit;
        }
+
+       err = register_netdevice_notifier(&rxe_net_notifier);
+       if (err) {
+               pr_err("rxe: Failed to rigister netdev notifier\n");
+               goto exit;
+       }
+
        pr_info("rxe: loaded\n");
 
        return 0;
+
+exit:
+       rxe_release_udp_tunnel(recv_sockets.sk4);
+       rxe_release_udp_tunnel(recv_sockets.sk6);
+       return err;
 }
 
 static void __exit rxe_module_exit(void)
index 36f67de..1c59ef2 100644 (file)
@@ -689,7 +689,14 @@ int rxe_completer(void *arg)
                                        qp->req.need_retry = 1;
                                        rxe_run_task(&qp->req.task, 1);
                                }
+
+                               if (pkt) {
+                                       rxe_drop_ref(pkt->qp);
+                                       kfree_skb(skb);
+                               }
+
                                goto exit;
+
                        } else {
                                wqe->status = IB_WC_RETRY_EXC_ERR;
                                state = COMPST_ERROR;
@@ -716,6 +723,12 @@ int rxe_completer(void *arg)
                case COMPST_ERROR:
                        do_complete(qp, wqe);
                        rxe_qp_error(qp);
+
+                       if (pkt) {
+                               rxe_drop_ref(pkt->qp);
+                               kfree_skb(skb);
+                       }
+
                        goto exit;
                }
        }
index 0b8d2ea..eedf2f1 100644 (file)
@@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
        return sock;
 }
 
-static void rxe_release_udp_tunnel(struct socket *sk)
+void rxe_release_udp_tunnel(struct socket *sk)
 {
-       udp_tunnel_sock_release(sk);
+       if (sk)
+               udp_tunnel_sock_release(sk);
 }
 
 static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,
@@ -658,51 +659,45 @@ out:
        return NOTIFY_OK;
 }
 
-static struct notifier_block rxe_net_notifier = {
+struct notifier_block rxe_net_notifier = {
        .notifier_call = rxe_notify,
 };
 
-int rxe_net_init(void)
+int rxe_net_ipv4_init(void)
 {
-       int err;
-
        spin_lock_init(&dev_list_lock);
 
-       recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
-                       htons(ROCE_V2_UDP_DPORT), true);
-       if (IS_ERR(recv_sockets.sk6)) {
-               recv_sockets.sk6 = NULL;
-               pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
-               return -1;
-       }
-
        recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
-                       htons(ROCE_V2_UDP_DPORT), false);
+                               htons(ROCE_V2_UDP_DPORT), false);
        if (IS_ERR(recv_sockets.sk4)) {
-               rxe_release_udp_tunnel(recv_sockets.sk6);
                recv_sockets.sk4 = NULL;
-               recv_sockets.sk6 = NULL;
                pr_err("rxe: Failed to create IPv4 UDP tunnel\n");
                return -1;
        }
 
-       err = register_netdevice_notifier(&rxe_net_notifier);
-       if (err) {
-               rxe_release_udp_tunnel(recv_sockets.sk6);
-               rxe_release_udp_tunnel(recv_sockets.sk4);
-               pr_err("rxe: Failed to rigister netdev notifier\n");
-       }
-
-       return err;
+       return 0;
 }
 
-void rxe_net_exit(void)
+int rxe_net_ipv6_init(void)
 {
-       if (recv_sockets.sk6)
-               rxe_release_udp_tunnel(recv_sockets.sk6);
+#if IS_ENABLED(CONFIG_IPV6)
 
-       if (recv_sockets.sk4)
-               rxe_release_udp_tunnel(recv_sockets.sk4);
+       spin_lock_init(&dev_list_lock);
 
+       recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
+                                               htons(ROCE_V2_UDP_DPORT), true);
+       if (IS_ERR(recv_sockets.sk6)) {
+               recv_sockets.sk6 = NULL;
+               pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
+               return -1;
+       }
+#endif
+       return 0;
+}
+
+void rxe_net_exit(void)
+{
+       rxe_release_udp_tunnel(recv_sockets.sk6);
+       rxe_release_udp_tunnel(recv_sockets.sk4);
        unregister_netdevice_notifier(&rxe_net_notifier);
 }
index 7b06f76..0daf7f0 100644 (file)
@@ -44,10 +44,13 @@ struct rxe_recv_sockets {
 };
 
 extern struct rxe_recv_sockets recv_sockets;
+extern struct notifier_block rxe_net_notifier;
+void rxe_release_udp_tunnel(struct socket *sk);
 
 struct rxe_dev *rxe_net_add(struct net_device *ndev);
 
-int rxe_net_init(void);
+int rxe_net_ipv4_init(void);
+int rxe_net_ipv6_init(void);
 void rxe_net_exit(void);
 
 #endif /* RXE_NET_H */
index 3d464c2..144d2f1 100644 (file)
@@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
                 * make a copy of the skb to post to the next qp
                 */
                skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
-                               skb_clone(skb, GFP_KERNEL) : NULL;
+                               skb_clone(skb, GFP_ATOMIC) : NULL;
 
                pkt->qp = qp;
                rxe_add_ref(qp);
index 33b2d9d..13a848a 100644 (file)
@@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 }
 
 static void update_wqe_state(struct rxe_qp *qp,
-                            struct rxe_send_wqe *wqe,
-                            struct rxe_pkt_info *pkt,
-                            enum wqe_state *prev_state)
+               struct rxe_send_wqe *wqe,
+               struct rxe_pkt_info *pkt)
 {
-       enum wqe_state prev_state_ = wqe->state;
-
        if (pkt->mask & RXE_END_MASK) {
                if (qp_type(qp) == IB_QPT_RC)
                        wqe->state = wqe_state_pending;
        } else {
                wqe->state = wqe_state_processing;
        }
-
-       *prev_state = prev_state_;
 }
 
-static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
-                        struct rxe_pkt_info *pkt, int payload)
+static void update_wqe_psn(struct rxe_qp *qp,
+                          struct rxe_send_wqe *wqe,
+                          struct rxe_pkt_info *pkt,
+                          int payload)
 {
        /* number of packets left to send including current one */
        int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
@@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
                qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
        else
                qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
+}
 
-       qp->req.opcode = pkt->opcode;
+static void save_state(struct rxe_send_wqe *wqe,
+                      struct rxe_qp *qp,
+                      struct rxe_send_wqe *rollback_wqe,
+                      struct rxe_qp *rollback_qp)
+{
+       rollback_wqe->state     = wqe->state;
+       rollback_wqe->first_psn = wqe->first_psn;
+       rollback_wqe->last_psn  = wqe->last_psn;
+       rollback_qp->req.psn    = qp->req.psn;
+}
 
+static void rollback_state(struct rxe_send_wqe *wqe,
+                          struct rxe_qp *qp,
+                          struct rxe_send_wqe *rollback_wqe,
+                          struct rxe_qp *rollback_qp)
+{
+       wqe->state     = rollback_wqe->state;
+       wqe->first_psn = rollback_wqe->first_psn;
+       wqe->last_psn  = rollback_wqe->last_psn;
+       qp->req.psn    = rollback_qp->req.psn;
+}
+
+static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+                        struct rxe_pkt_info *pkt, int payload)
+{
+       qp->req.opcode = pkt->opcode;
 
        if (pkt->mask & RXE_END_MASK)
                qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
@@ -571,7 +593,8 @@ int rxe_requester(void *arg)
        int mtu;
        int opcode;
        int ret;
-       enum wqe_state prev_state;
+       struct rxe_qp rollback_qp;
+       struct rxe_send_wqe rollback_wqe;
 
 next_wqe:
        if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@@ -688,13 +711,21 @@ next_wqe:
                goto err;
        }
 
-       update_wqe_state(qp, wqe, &pkt, &prev_state);
+       /*
+        * To prevent a race on wqe access between requester and completer,
+        * wqe members state and psn need to be set before calling
+        * rxe_xmit_packet().
+        * Otherwise, completer might initiate an unjustified retry flow.
+        */
+       save_state(wqe, qp, &rollback_wqe, &rollback_qp);
+       update_wqe_state(qp, wqe, &pkt);
+       update_wqe_psn(qp, wqe, &pkt, payload);
        ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
        if (ret) {
                qp->need_req_skb = 1;
                kfree_skb(skb);
 
-               wqe->state = prev_state;
+               rollback_state(wqe, qp, &rollback_wqe, &rollback_qp);
 
                if (ret == -EAGAIN) {
                        rxe_run_task(&qp->req.task, 1);
index ebb03b4..3e0f0f2 100644 (file)
@@ -972,11 +972,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
        free_rd_atomic_resource(qp, res);
        rxe_advance_resp_resource(qp);
 
+       memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
+
        res->type = RXE_ATOMIC_MASK;
        res->atomic.skb = skb;
-       res->first_psn = qp->resp.psn;
-       res->last_psn = qp->resp.psn;
-       res->cur_psn = qp->resp.psn;
+       res->first_psn = ack_pkt.psn;
+       res->last_psn  = ack_pkt.psn;
+       res->cur_psn   = ack_pkt.psn;
 
        rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy);
        if (rc) {
@@ -1116,8 +1118,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
                                rc = RESPST_CLEANUP;
                                goto out;
                        }
-                       bth_set_psn(SKB_TO_PKT(skb_copy),
-                                   qp->resp.psn - 1);
+
                        /* Resend the result. */
                        rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
                                             pkt, skb_copy);
index dc6d241..be11d5d 100644 (file)
@@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
        }
 
        if (level == IPOIB_FLUSH_LIGHT) {
+               int oper_up;
                ipoib_mark_paths_invalid(dev);
+               /* Set IPoIB operation as down to prevent races between:
+                * the flush flow which leaves MCG and on the fly joins
+                * which can happen during that time. mcast restart task
+                * should deal with join requests we missed.
+                */
+               oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
                ipoib_mcast_dev_flush(dev);
+               if (oper_up)
+                       set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
                ipoib_flush_ah(dev);
        }
 
index 112e17c..37f952d 100644 (file)
@@ -176,6 +176,7 @@ static int aic_irq_domain_xlate(struct irq_domain *d,
 {
        struct irq_domain_chip_generic *dgc = d->gc;
        struct irq_chip_generic *gc;
+       unsigned long flags;
        unsigned smr;
        int idx;
        int ret;
@@ -194,11 +195,11 @@ static int aic_irq_domain_xlate(struct irq_domain *d,
 
        gc = dgc->gc[idx];
 
-       irq_gc_lock(gc);
+       irq_gc_lock_irqsave(gc, flags);
        smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq));
        aic_common_set_priority(intspec[2], &smr);
        irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq));
-       irq_gc_unlock(gc);
+       irq_gc_unlock_irqrestore(gc, flags);
 
        return ret;
 }
index 4f0d068..2a624d8 100644 (file)
@@ -258,6 +258,7 @@ static int aic5_irq_domain_xlate(struct irq_domain *d,
                                 unsigned int *out_type)
 {
        struct irq_chip_generic *bgc = irq_get_domain_generic_chip(d, 0);
+       unsigned long flags;
        unsigned smr;
        int ret;
 
@@ -269,12 +270,12 @@ static int aic5_irq_domain_xlate(struct irq_domain *d,
        if (ret)
                return ret;
 
-       irq_gc_lock(bgc);
+       irq_gc_lock_irqsave(bgc, flags);
        irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR);
        smr = irq_reg_readl(bgc, AT91_AIC5_SMR);
        aic_common_set_priority(intspec[2], &smr);
        irq_reg_writel(bgc, smr, AT91_AIC5_SMR);
-       irq_gc_unlock(bgc);
+       irq_gc_unlock_irqrestore(bgc, flags);
 
        return ret;
 }
index 67642ba..915e84d 100644 (file)
@@ -7610,16 +7610,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations);
 
 int md_setup_cluster(struct mddev *mddev, int nodes)
 {
-       int err;
-
-       err = request_module("md-cluster");
-       if (err) {
-               pr_err("md-cluster module not found.\n");
-               return -ENOENT;
-       }
-
+       if (!md_cluster_ops)
+               request_module("md-cluster");
        spin_lock(&pers_lock);
+       /* ensure module won't be unloaded */
        if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
+               pr_err("can't find md-cluster module or get it's reference.\n");
                spin_unlock(&pers_lock);
                return -ENOENT;
        }
index 51f76dd..1b1ab4a 100644 (file)
@@ -96,7 +96,6 @@ struct r5l_log {
        spinlock_t no_space_stripes_lock;
 
        bool need_cache_flush;
-       bool in_teardown;
 };
 
 /*
@@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
 
        mddev = log->rdev->mddev;
        /*
-        * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and
-        * wait for this thread to finish. This thread waits for
-        * MD_CHANGE_PENDING clear, which is supposed to be done in
-        * md_check_recovery(). md_check_recovery() tries to get
-        * reconfig_mutex. Since r5l_quiesce already holds the mutex,
-        * md_check_recovery() fails, so the PENDING never get cleared. The
-        * in_teardown check workaround this issue.
+        * Discard could zero data, so before discard we must make sure
+        * superblock is updated to new log tail. Updating superblock (either
+        * directly call md_update_sb() or depend on md thread) must hold
+        * reconfig mutex. On the other hand, raid5_quiesce is called with
+        * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
+        * for all IO finish, hence waitting for reclaim thread, while reclaim
+        * thread is calling this function and waitting for reconfig mutex. So
+        * there is a deadlock. We workaround this issue with a trylock.
+        * FIXME: we could miss discard if we can't take reconfig mutex
         */
-       if (!log->in_teardown) {
-               set_mask_bits(&mddev->flags, 0,
-                             BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
-               md_wakeup_thread(mddev->thread);
-               wait_event(mddev->sb_wait,
-                       !test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
-                       log->in_teardown);
-               /*
-                * r5l_quiesce could run after in_teardown check and hold
-                * mutex first. Superblock might get updated twice.
-                */
-               if (log->in_teardown)
-                       md_update_sb(mddev, 1);
-       } else {
-               WARN_ON(!mddev_is_locked(mddev));
-               md_update_sb(mddev, 1);
-       }
+       set_mask_bits(&mddev->flags, 0,
+               BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
+       if (!mddev_trylock(mddev))
+               return;
+       md_update_sb(mddev, 1);
+       mddev_unlock(mddev);
 
        /* discard IO error really doesn't matter, ignore it */
        if (log->last_checkpoint < end) {
@@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
        if (!log || state == 2)
                return;
        if (state == 0) {
-               log->in_teardown = 0;
                /*
                 * This is a special case for hotadd. In suspend, the array has
                 * no journal. In resume, journal is initialized as well as the
@@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
                log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
                                        log->rdev->mddev, "reclaim");
        } else if (state == 1) {
-               /*
-                * at this point all stripes are finished, so io_unit is at
-                * least in STRIPE_END state
-                */
-               log->in_teardown = 1;
                /* make sure r5l_write_super_and_discard_space exits */
                mddev = log->rdev->mddev;
                wake_up(&mddev->sb_wait);
index da583bb..ee7fc37 100644 (file)
@@ -2423,10 +2423,10 @@ static void raid5_end_read_request(struct bio * bi)
                }
        }
        rdev_dec_pending(rdev, conf->mddev);
+       bio_reset(bi);
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
        raid5_release_stripe(sh);
-       bio_reset(bi);
 }
 
 static void raid5_end_write_request(struct bio *bi)
@@ -2498,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi)
        if (sh->batch_head && bi->bi_error && !replacement)
                set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
+       bio_reset(bi);
        if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
                clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
@@ -2505,7 +2506,6 @@ static void raid5_end_write_request(struct bio *bi)
 
        if (sh->batch_head && sh != sh->batch_head)
                raid5_release_stripe(sh->batch_head);
-       bio_reset(bi);
 }
 
 static void raid5_build_block(struct stripe_head *sh, int i, int previous)
@@ -6639,6 +6639,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        }
 
        conf->min_nr_stripes = NR_STRIPES;
+       if (mddev->reshape_position != MaxSector) {
+               int stripes = max_t(int,
+                       ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4,
+                       ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4);
+               conf->min_nr_stripes = max(NR_STRIPES, stripes);
+               if (conf->min_nr_stripes != NR_STRIPES)
+                       printk(KERN_INFO
+                               "md/raid:%s: force stripe size %d for reshape\n",
+                               mdname(mddev), conf->min_nr_stripes);
+       }
        memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
                 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
        atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
index 7001824..5719b99 100644 (file)
@@ -70,7 +70,10 @@ static unsigned int cec_get_edid_spa_location(const u8 *edid, unsigned int size)
                                u8 tag = edid[i] >> 5;
                                u8 len = edid[i] & 0x1f;
 
-                               if (tag == 3 && len >= 5 && i + len <= end)
+                               if (tag == 3 && len >= 5 && i + len <= end &&
+                                   edid[i + 1] == 0x03 &&
+                                   edid[i + 2] == 0x0c &&
+                                   edid[i + 3] == 0x00)
                                        return i + 4;
                                i += len + 1;
                        } while (i < end);
index efec2d1..4d080da 100644 (file)
@@ -1552,6 +1552,7 @@ int cx23885_417_register(struct cx23885_dev *dev)
        q->mem_ops = &vb2_dma_sg_memops;
        q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        q->lock = &dev->lock;
+       q->dev = &dev->pci->dev;
 
        err = vb2_queue_init(q);
        if (err < 0)
index db987e5..59a4b5f 100644 (file)
@@ -1238,6 +1238,7 @@ static int dvb_init(struct saa7134_dev *dev)
        q->buf_struct_size = sizeof(struct saa7134_buf);
        q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        q->lock = &dev->lock;
+       q->dev = &dev->pci->dev;
        ret = vb2_queue_init(q);
        if (ret) {
                vb2_dvb_dealloc_frontends(&dev->frontends);
index ca417a4..791a516 100644 (file)
@@ -295,6 +295,7 @@ static int empress_init(struct saa7134_dev *dev)
        q->buf_struct_size = sizeof(struct saa7134_buf);
        q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        q->lock = &dev->lock;
+       q->dev = &dev->pci->dev;
        err = vb2_queue_init(q);
        if (err)
                return err;
index f25344b..552b635 100644 (file)
@@ -169,7 +169,7 @@ config VIDEO_MEDIATEK_VPU
 config VIDEO_MEDIATEK_VCODEC
        tristate "Mediatek Video Codec driver"
        depends on MTK_IOMMU || COMPILE_TEST
-       depends on VIDEO_DEV && VIDEO_V4L2
+       depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA
        depends on ARCH_MEDIATEK || COMPILE_TEST
        select VIDEOBUF2_DMA_CONTIG
        select V4L2_MEM2MEM_DEV
index 94f0a42..3a8e695 100644 (file)
@@ -23,7 +23,6 @@
 #include <media/v4l2-ioctl.h>
 #include <media/videobuf2-core.h>
 
-#include "mtk_vcodec_util.h"
 
 #define MTK_VCODEC_DRV_NAME    "mtk_vcodec_drv"
 #define MTK_VCODEC_ENC_NAME    "mtk-vcodec-enc"
index 3ed3f2d..2c5719a 100644 (file)
@@ -487,7 +487,6 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv,
        struct mtk_q_data *q_data;
        int ret, i;
        struct mtk_video_fmt *fmt;
-       unsigned int pitch_w_div16;
        struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
 
        vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type);
@@ -530,15 +529,6 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv,
        q_data->coded_width = f->fmt.pix_mp.width;
        q_data->coded_height = f->fmt.pix_mp.height;
 
-       pitch_w_div16 = DIV_ROUND_UP(q_data->visible_width, 16);
-       if (pitch_w_div16 % 8 != 0) {
-               /* Adjust returned width/height, so application could correctly
-                * allocate hw required memory
-                */
-               q_data->visible_height += 32;
-               vidioc_try_fmt(f, q_data->fmt);
-       }
-
        q_data->field = f->fmt.pix_mp.field;
        ctx->colorspace = f->fmt.pix_mp.colorspace;
        ctx->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
@@ -878,7 +868,8 @@ static int mtk_venc_encode_header(void *priv)
 {
        struct mtk_vcodec_ctx *ctx = priv;
        int ret;
-       struct vb2_buffer *dst_buf;
+       struct vb2_buffer *src_buf, *dst_buf;
+       struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2;
        struct mtk_vcodec_mem bs_buf;
        struct venc_done_result enc_result;
 
@@ -911,6 +902,15 @@ static int mtk_venc_encode_header(void *priv)
                mtk_v4l2_err("venc_if_encode failed=%d", ret);
                return -EINVAL;
        }
+       src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx);
+       if (src_buf) {
+               src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf);
+               dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf);
+               dst_buf->timestamp = src_buf->timestamp;
+               dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode;
+       } else {
+               mtk_v4l2_err("No timestamp for the header buffer.");
+       }
 
        ctx->state = MTK_STATE_HEADER;
        dst_buf->planes[0].bytesused = enc_result.bs_size;
@@ -1003,7 +1003,7 @@ static void mtk_venc_worker(struct work_struct *work)
        struct mtk_vcodec_mem bs_buf;
        struct venc_done_result enc_result;
        int ret, i;
-       struct vb2_v4l2_buffer *vb2_v4l2;
+       struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2;
 
        /* check dst_buf, dst_buf may be removed in device_run
         * to stored encdoe header so we need check dst_buf and
@@ -1043,9 +1043,14 @@ static void mtk_venc_worker(struct work_struct *work)
        ret = venc_if_encode(ctx, VENC_START_OPT_ENCODE_FRAME,
                             &frm_buf, &bs_buf, &enc_result);
 
-       vb2_v4l2 = container_of(dst_buf, struct vb2_v4l2_buffer, vb2_buf);
+       src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf);
+       dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf);
+
+       dst_buf->timestamp = src_buf->timestamp;
+       dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode;
+
        if (enc_result.is_key_frm)
-               vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME;
+               dst_vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME;
 
        if (ret) {
                v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf),
@@ -1217,7 +1222,7 @@ int mtk_vcodec_enc_ctrls_setup(struct mtk_vcodec_ctx *ctx)
                        0, V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE);
        v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE,
                        V4L2_MPEG_VIDEO_H264_PROFILE_HIGH,
-                       0, V4L2_MPEG_VIDEO_H264_PROFILE_MAIN);
+                       0, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH);
        v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL,
                        V4L2_MPEG_VIDEO_H264_LEVEL_4_2,
                        0, V4L2_MPEG_VIDEO_H264_LEVEL_4_0);
@@ -1288,5 +1293,10 @@ int mtk_venc_lock(struct mtk_vcodec_ctx *ctx)
 
 void mtk_vcodec_enc_release(struct mtk_vcodec_ctx *ctx)
 {
-       venc_if_deinit(ctx);
+       int ret = venc_if_deinit(ctx);
+
+       if (ret)
+               mtk_v4l2_err("venc_if_deinit failed=%d", ret);
+
+       ctx->state = MTK_STATE_FREE;
 }
index c7806ec..5cd2151 100644 (file)
@@ -218,11 +218,15 @@ static int fops_vcodec_release(struct file *file)
        mtk_v4l2_debug(1, "[%d] encoder", ctx->id);
        mutex_lock(&dev->dev_mutex);
 
+       /*
+        * Call v4l2_m2m_ctx_release to make sure the worker thread is not
+        * running after venc_if_deinit.
+        */
+       v4l2_m2m_ctx_release(ctx->m2m_ctx);
        mtk_vcodec_enc_release(ctx);
        v4l2_fh_del(&ctx->fh);
        v4l2_fh_exit(&ctx->fh);
        v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
-       v4l2_m2m_ctx_release(ctx->m2m_ctx);
 
        list_del_init(&ctx->list);
        dev->num_instances--;
index 33e890f..1213185 100644 (file)
@@ -16,7 +16,6 @@
 #define _MTK_VCODEC_INTR_H_
 
 #define MTK_INST_IRQ_RECEIVED          0x1
-#define MTK_INST_WORK_THREAD_ABORT_DONE        0x2
 
 struct mtk_vcodec_ctx;
 
index 9a60052..63d4be4 100644 (file)
@@ -61,6 +61,8 @@ enum venc_h264_bs_mode {
 
 /*
  * struct venc_h264_vpu_config - Structure for h264 encoder configuration
+ *                               AP-W/R : AP is writer/reader on this item
+ *                               VPU-W/R: VPU is write/reader on this item
  * @input_fourcc: input fourcc
  * @bitrate: target bitrate (in bps)
  * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
@@ -94,13 +96,13 @@ struct venc_h264_vpu_config {
 
 /*
  * struct venc_h264_vpu_buf - Structure for buffer information
- * @align: buffer alignment (in bytes)
+ *                            AP-W/R : AP is writer/reader on this item
+ *                            VPU-W/R: VPU is write/reader on this item
  * @iova: IO virtual address
  * @vpua: VPU side memory addr which is used by RC_CODE
  * @size: buffer size (in bytes)
  */
 struct venc_h264_vpu_buf {
-       u32 align;
        u32 iova;
        u32 vpua;
        u32 size;
@@ -108,6 +110,8 @@ struct venc_h264_vpu_buf {
 
 /*
  * struct venc_h264_vsi - Structure for VPU driver control and info share
+ *                        AP-W/R : AP is writer/reader on this item
+ *                        VPU-W/R: VPU is write/reader on this item
  * This structure is allocated in VPU side and shared to AP side.
  * @config: h264 encoder configuration
  * @work_bufs: working buffer information in VPU side
@@ -150,12 +154,6 @@ struct venc_h264_inst {
        struct mtk_vcodec_ctx *ctx;
 };
 
-static inline void h264_write_reg(struct venc_h264_inst *inst, u32 addr,
-                                 u32 val)
-{
-       writel(val, inst->hw_base + addr);
-}
-
 static inline u32 h264_read_reg(struct venc_h264_inst *inst, u32 addr)
 {
        return readl(inst->hw_base + addr);
@@ -214,6 +212,8 @@ static unsigned int h264_get_level(struct venc_h264_inst *inst,
                return 40;
        case V4L2_MPEG_VIDEO_H264_LEVEL_4_1:
                return 41;
+       case V4L2_MPEG_VIDEO_H264_LEVEL_4_2:
+               return 42;
        default:
                mtk_vcodec_debug(inst, "unsupported level %d", level);
                return 31;
index 60bbcd2..6d97584 100644 (file)
@@ -56,6 +56,8 @@ enum venc_vp8_vpu_work_buf {
 
 /*
  * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration
+ *                              AP-W/R : AP is writer/reader on this item
+ *                              VPU-W/R: VPU is write/reader on this item
  * @input_fourcc: input fourcc
  * @bitrate: target bitrate (in bps)
  * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
@@ -83,14 +85,14 @@ struct venc_vp8_vpu_config {
 };
 
 /*
- * struct venc_vp8_vpu_buf -Structure for buffer information
- * @align: buffer alignment (in bytes)
+ * struct venc_vp8_vpu_buf - Structure for buffer information
+ *                           AP-W/R : AP is writer/reader on this item
+ *                           VPU-W/R: VPU is write/reader on this item
  * @iova: IO virtual address
  * @vpua: VPU side memory addr which is used by RC_CODE
  * @size: buffer size (in bytes)
  */
 struct venc_vp8_vpu_buf {
-       u32 align;
        u32 iova;
        u32 vpua;
        u32 size;
@@ -98,6 +100,8 @@ struct venc_vp8_vpu_buf {
 
 /*
  * struct venc_vp8_vsi - Structure for VPU driver control and info share
+ *                       AP-W/R : AP is writer/reader on this item
+ *                       VPU-W/R: VPU is write/reader on this item
  * This structure is allocated in VPU side and shared to AP side.
  * @config: vp8 encoder configuration
  * @work_bufs: working buffer information in VPU side
@@ -138,12 +142,6 @@ struct venc_vp8_inst {
        struct mtk_vcodec_ctx *ctx;
 };
 
-static inline void vp8_enc_write_reg(struct venc_vp8_inst *inst, u32 addr,
-                                    u32 val)
-{
-       writel(val, inst->hw_base + addr);
-}
-
 static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr)
 {
        return readl(inst->hw_base + addr);
index 6a7bcc3..bc50c69 100644 (file)
@@ -99,10 +99,16 @@ EXPORT_SYMBOL_GPL(rcar_fcp_put);
  */
 int rcar_fcp_enable(struct rcar_fcp_device *fcp)
 {
+       int error;
+
        if (!fcp)
                return 0;
 
-       return pm_runtime_get_sync(fcp->dev);
+       error = pm_runtime_get_sync(fcp->dev);
+       if (error < 0)
+               return error;
+
+       return 0;
 }
 EXPORT_SYMBOL_GPL(rcar_fcp_enable);
 
index f23d65e..be3c49f 100644 (file)
@@ -1016,14 +1016,16 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req)
 
                /* Only reconfigure if we have a different burst size */
                if (*bp != burst) {
-                       struct dma_slave_config cfg;
-
-                       cfg.src_addr = host->phys_base + OMAP_MMC_REG(host, DATA);
-                       cfg.dst_addr = host->phys_base + OMAP_MMC_REG(host, DATA);
-                       cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
-                       cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
-                       cfg.src_maxburst = burst;
-                       cfg.dst_maxburst = burst;
+                       struct dma_slave_config cfg = {
+                               .src_addr = host->phys_base +
+                                           OMAP_MMC_REG(host, DATA),
+                               .dst_addr = host->phys_base +
+                                           OMAP_MMC_REG(host, DATA),
+                               .src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES,
+                               .dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES,
+                               .src_maxburst = burst,
+                               .dst_maxburst = burst,
+                       };
 
                        if (dmaengine_slave_config(c, &cfg))
                                goto use_pio;
index 24ebc9a..5f2f24a 100644 (file)
@@ -1409,11 +1409,18 @@ static int omap_hsmmc_pre_dma_transfer(struct omap_hsmmc_host *host,
 static int omap_hsmmc_setup_dma_transfer(struct omap_hsmmc_host *host,
                                        struct mmc_request *req)
 {
-       struct dma_slave_config cfg;
        struct dma_async_tx_descriptor *tx;
        int ret = 0, i;
        struct mmc_data *data = req->data;
        struct dma_chan *chan;
+       struct dma_slave_config cfg = {
+               .src_addr = host->mapbase + OMAP_HSMMC_DATA,
+               .dst_addr = host->mapbase + OMAP_HSMMC_DATA,
+               .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+               .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+               .src_maxburst = data->blksz / 4,
+               .dst_maxburst = data->blksz / 4,
+       };
 
        /* Sanity check: all the SG entries must be aligned by block size. */
        for (i = 0; i < data->sg_len; i++) {
@@ -1433,13 +1440,6 @@ static int omap_hsmmc_setup_dma_transfer(struct omap_hsmmc_host *host,
 
        chan = omap_hsmmc_get_dma_chan(host, data);
 
-       cfg.src_addr = host->mapbase + OMAP_HSMMC_DATA;
-       cfg.dst_addr = host->mapbase + OMAP_HSMMC_DATA;
-       cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-       cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-       cfg.src_maxburst = data->blksz / 4;
-       cfg.dst_maxburst = data->blksz / 4;
-
        ret = dmaengine_slave_config(chan, &cfg);
        if (ret)
                return ret;
index c95ba83..ed92ce7 100644 (file)
@@ -28,6 +28,7 @@
 
 struct st_mmc_platform_data {
        struct  reset_control *rstc;
+       struct  clk *icnclk;
        void __iomem *top_ioaddr;
 };
 
@@ -353,7 +354,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
        struct sdhci_host *host;
        struct st_mmc_platform_data *pdata;
        struct sdhci_pltfm_host *pltfm_host;
-       struct clk *clk;
+       struct clk *clk, *icnclk;
        int ret = 0;
        u16 host_version;
        struct resource *res;
@@ -365,6 +366,11 @@ static int sdhci_st_probe(struct platform_device *pdev)
                return PTR_ERR(clk);
        }
 
+       /* ICN clock isn't compulsory, but use it if it's provided. */
+       icnclk = devm_clk_get(&pdev->dev, "icn");
+       if (IS_ERR(icnclk))
+               icnclk = NULL;
+
        rstc = devm_reset_control_get(&pdev->dev, NULL);
        if (IS_ERR(rstc))
                rstc = NULL;
@@ -389,6 +395,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
        }
 
        clk_prepare_enable(clk);
+       clk_prepare_enable(icnclk);
 
        /* Configure the FlashSS Top registers for setting eMMC TX/RX delay */
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
@@ -400,6 +407,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
        }
 
        pltfm_host->clk = clk;
+       pdata->icnclk = icnclk;
 
        /* Configure the Arasan HC inside the flashSS */
        st_mmcss_cconfig(np, host);
@@ -422,6 +430,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
        return 0;
 
 err_out:
+       clk_disable_unprepare(icnclk);
        clk_disable_unprepare(clk);
 err_of:
        sdhci_pltfm_free(pdev);
@@ -442,6 +451,8 @@ static int sdhci_st_remove(struct platform_device *pdev)
 
        ret = sdhci_pltfm_unregister(pdev);
 
+       clk_disable_unprepare(pdata->icnclk);
+
        if (rstc)
                reset_control_assert(rstc);
 
@@ -462,6 +473,7 @@ static int sdhci_st_suspend(struct device *dev)
        if (pdata->rstc)
                reset_control_assert(pdata->rstc);
 
+       clk_disable_unprepare(pdata->icnclk);
        clk_disable_unprepare(pltfm_host->clk);
 out:
        return ret;
@@ -475,6 +487,7 @@ static int sdhci_st_resume(struct device *dev)
        struct device_node *np = dev->of_node;
 
        clk_prepare_enable(pltfm_host->clk);
+       clk_prepare_enable(pdata->icnclk);
 
        if (pdata->rstc)
                reset_control_deassert(pdata->rstc);
index 41c0fc9..16f7cad 100644 (file)
@@ -1268,11 +1268,10 @@ static int __maybe_unused flexcan_suspend(struct device *device)
        struct flexcan_priv *priv = netdev_priv(dev);
        int err;
 
-       err = flexcan_chip_disable(priv);
-       if (err)
-               return err;
-
        if (netif_running(dev)) {
+               err = flexcan_chip_disable(priv);
+               if (err)
+                       return err;
                netif_stop_queue(dev);
                netif_device_detach(dev);
        }
@@ -1285,13 +1284,17 @@ static int __maybe_unused flexcan_resume(struct device *device)
 {
        struct net_device *dev = dev_get_drvdata(device);
        struct flexcan_priv *priv = netdev_priv(dev);
+       int err;
 
        priv->can.state = CAN_STATE_ERROR_ACTIVE;
        if (netif_running(dev)) {
                netif_device_attach(dev);
                netif_start_queue(dev);
+               err = flexcan_chip_enable(priv);
+               if (err)
+                       return err;
        }
-       return flexcan_chip_enable(priv);
+       return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(flexcan_pm_ops, flexcan_suspend, flexcan_resume);
index 2d1d22e..368bb07 100644 (file)
 #define IFI_CANFD_TIME_SET_TIMEA_4_12_6_6      BIT(15)
 
 #define IFI_CANFD_TDELAY                       0x1c
+#define IFI_CANFD_TDELAY_DEFAULT               0xb
+#define IFI_CANFD_TDELAY_MASK                  0x3fff
+#define IFI_CANFD_TDELAY_ABS                   BIT(14)
+#define IFI_CANFD_TDELAY_EN                    BIT(15)
 
 #define IFI_CANFD_ERROR                                0x20
 #define IFI_CANFD_ERROR_TX_OFFSET              0
@@ -641,7 +645,7 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev)
        struct ifi_canfd_priv *priv = netdev_priv(ndev);
        const struct can_bittiming *bt = &priv->can.bittiming;
        const struct can_bittiming *dbt = &priv->can.data_bittiming;
-       u16 brp, sjw, tseg1, tseg2;
+       u16 brp, sjw, tseg1, tseg2, tdc;
 
        /* Configure bit timing */
        brp = bt->brp - 2;
@@ -664,6 +668,11 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev)
               (brp << IFI_CANFD_TIME_PRESCALE_OFF) |
               (sjw << IFI_CANFD_TIME_SJW_OFF_7_9_8_8),
               priv->base + IFI_CANFD_FTIME);
+
+       /* Configure transmitter delay */
+       tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK;
+       writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc,
+              priv->base + IFI_CANFD_TDELAY);
 }
 
 static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id,
index 1a492c0..7717b19 100644 (file)
@@ -1402,16 +1402,12 @@ static void b53_br_leave(struct dsa_switch *ds, int port)
        }
 }
 
-static void b53_br_set_stp_state(struct dsa_switch *ds, int port,
-                                u8 state)
+static void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state)
 {
        struct b53_device *dev = ds->priv;
-       u8 hw_state, cur_hw_state;
+       u8 hw_state;
        u8 reg;
 
-       b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), &reg);
-       cur_hw_state = reg & PORT_CTRL_STP_STATE_MASK;
-
        switch (state) {
        case BR_STATE_DISABLED:
                hw_state = PORT_CTRL_DIS_STATE;
@@ -1433,26 +1429,20 @@ static void b53_br_set_stp_state(struct dsa_switch *ds, int port,
                return;
        }
 
-       /* Fast-age ARL entries if we are moving a port from Learning or
-        * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening
-        * state (hw_state)
-        */
-       if (cur_hw_state != hw_state) {
-               if (cur_hw_state >= PORT_CTRL_LEARN_STATE &&
-                   hw_state <= PORT_CTRL_LISTEN_STATE) {
-                       if (b53_fast_age_port(dev, port)) {
-                               dev_err(ds->dev, "fast ageing failed\n");
-                               return;
-                       }
-               }
-       }
-
        b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), &reg);
        reg &= ~PORT_CTRL_STP_STATE_MASK;
        reg |= hw_state;
        b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg);
 }
 
+static void b53_br_fast_age(struct dsa_switch *ds, int port)
+{
+       struct b53_device *dev = ds->priv;
+
+       if (b53_fast_age_port(dev, port))
+               dev_err(ds->dev, "fast ageing failed\n");
+}
+
 static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds)
 {
        return DSA_TAG_PROTO_NONE;
@@ -1472,6 +1462,7 @@ static struct dsa_switch_ops b53_switch_ops = {
        .port_bridge_join       = b53_br_join,
        .port_bridge_leave      = b53_br_leave,
        .port_stp_state_set     = b53_br_set_stp_state,
+       .port_fast_age          = b53_br_fast_age,
        .port_vlan_filtering    = b53_vlan_filtering,
        .port_vlan_prepare      = b53_vlan_prepare,
        .port_vlan_add          = b53_vlan_add,
index 25bd3fa..b2c25da 100644 (file)
@@ -216,16 +216,16 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val)
        return 0;
 }
 
-int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
-                       u16 *val)
+static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
+                              u16 *val)
 {
        int addr = chip->info->port_base_addr + port;
 
        return mv88e6xxx_read(chip, addr, reg, val);
 }
 
-int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
-                        u16 val)
+static int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
+                               u16 val)
 {
        int addr = chip->info->port_base_addr + port;
 
@@ -1133,31 +1133,18 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port,
 
        oldstate = reg & PORT_CONTROL_STATE_MASK;
 
-       if (oldstate != state) {
-               /* Flush forwarding database if we're moving a port
-                * from Learning or Forwarding state to Disabled or
-                * Blocking or Listening state.
-                */
-               if ((oldstate == PORT_CONTROL_STATE_LEARNING ||
-                    oldstate == PORT_CONTROL_STATE_FORWARDING) &&
-                   (state == PORT_CONTROL_STATE_DISABLED ||
-                    state == PORT_CONTROL_STATE_BLOCKING)) {
-                       err = _mv88e6xxx_atu_remove(chip, 0, port, false);
-                       if (err)
-                               return err;
-               }
+       reg &= ~PORT_CONTROL_STATE_MASK;
+       reg |= state;
 
-               reg = (reg & ~PORT_CONTROL_STATE_MASK) | state;
-               err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
-               if (err)
-                       return err;
+       err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
+       if (err)
+               return err;
 
-               netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n",
-                          mv88e6xxx_port_state_names[state],
-                          mv88e6xxx_port_state_names[oldstate]);
-       }
+       netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n",
+                  mv88e6xxx_port_state_names[state],
+                  mv88e6xxx_port_state_names[oldstate]);
 
-       return err;
+       return 0;
 }
 
 static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
@@ -1232,6 +1219,19 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
                           mv88e6xxx_port_state_names[stp_state]);
 }
 
+static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mutex_lock(&chip->reg_lock);
+       err = _mv88e6xxx_atu_remove(chip, 0, port, false);
+       mutex_unlock(&chip->reg_lock);
+
+       if (err)
+               netdev_err(ds->ports[port].netdev, "failed to flush ATU\n");
+}
+
 static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port,
                                u16 *new, u16 *old)
 {
@@ -3684,6 +3684,7 @@ static struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .port_bridge_join       = mv88e6xxx_port_bridge_join,
        .port_bridge_leave      = mv88e6xxx_port_bridge_leave,
        .port_stp_state_set     = mv88e6xxx_port_stp_state_set,
+       .port_fast_age          = mv88e6xxx_port_fast_age,
        .port_vlan_filtering    = mv88e6xxx_port_vlan_filtering,
        .port_vlan_prepare      = mv88e6xxx_port_vlan_prepare,
        .port_vlan_add          = mv88e6xxx_port_vlan_add,
index 8a8d055..8456337 100644 (file)
@@ -237,6 +237,8 @@ enum xgene_enet_rm {
 #define TCPHDR_LEN                     6
 #define IPHDR_POS                      6
 #define IPHDR_LEN                      6
+#define MSS_POS                                20
+#define MSS_LEN                                2
 #define EC_POS                         22      /* Enable checksum */
 #define EC_LEN                         1
 #define ET_POS                         23      /* Enable TSO */
@@ -253,6 +255,11 @@ enum xgene_enet_rm {
 
 #define LAST_BUFFER                    (0x7800ULL << BUFDATALEN_POS)
 
+#define TSO_MSS0_POS                   0
+#define TSO_MSS0_LEN                   14
+#define TSO_MSS1_POS                   16
+#define TSO_MSS1_LEN                   14
+
 struct xgene_enet_raw_desc {
        __le64 m0;
        __le64 m1;
index 522ba92..429f18f 100644 (file)
@@ -137,6 +137,7 @@ static irqreturn_t xgene_enet_rx_irq(const int irq, void *data)
 static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring,
                                    struct xgene_enet_raw_desc *raw_desc)
 {
+       struct xgene_enet_pdata *pdata = netdev_priv(cp_ring->ndev);
        struct sk_buff *skb;
        struct device *dev;
        skb_frag_t *frag;
@@ -144,6 +145,7 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring,
        u16 skb_index;
        u8 status;
        int i, ret = 0;
+       u8 mss_index;
 
        skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0));
        skb = cp_ring->cp_skb[skb_index];
@@ -160,6 +162,13 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring,
                               DMA_TO_DEVICE);
        }
 
+       if (GET_BIT(ET, le64_to_cpu(raw_desc->m3))) {
+               mss_index = GET_VAL(MSS, le64_to_cpu(raw_desc->m3));
+               spin_lock(&pdata->mss_lock);
+               pdata->mss_refcnt[mss_index]--;
+               spin_unlock(&pdata->mss_lock);
+       }
+
        /* Checking for error */
        status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0));
        if (unlikely(status > 2)) {
@@ -178,15 +187,53 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring,
        return ret;
 }
 
-static u64 xgene_enet_work_msg(struct sk_buff *skb)
+static int xgene_enet_setup_mss(struct net_device *ndev, u32 mss)
+{
+       struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+       bool mss_index_found = false;
+       int mss_index;
+       int i;
+
+       spin_lock(&pdata->mss_lock);
+
+       /* Reuse the slot if MSS matches */
+       for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) {
+               if (pdata->mss[i] == mss) {
+                       pdata->mss_refcnt[i]++;
+                       mss_index = i;
+                       mss_index_found = true;
+               }
+       }
+
+       /* Overwrite the slot with ref_count = 0 */
+       for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) {
+               if (!pdata->mss_refcnt[i]) {
+                       pdata->mss_refcnt[i]++;
+                       pdata->mac_ops->set_mss(pdata, mss, i);
+                       pdata->mss[i] = mss;
+                       mss_index = i;
+                       mss_index_found = true;
+               }
+       }
+
+       spin_unlock(&pdata->mss_lock);
+
+       /* No slots with ref_count = 0 available, return busy */
+       if (!mss_index_found)
+               return -EBUSY;
+
+       return mss_index;
+}
+
+static int xgene_enet_work_msg(struct sk_buff *skb, u64 *hopinfo)
 {
        struct net_device *ndev = skb->dev;
        struct iphdr *iph;
        u8 l3hlen = 0, l4hlen = 0;
        u8 ethhdr, proto = 0, csum_enable = 0;
-       u64 hopinfo = 0;
        u32 hdr_len, mss = 0;
        u32 i, len, nr_frags;
+       int mss_index;
 
        ethhdr = xgene_enet_hdr_len(skb->data);
 
@@ -226,7 +273,11 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb)
                        if (!mss || ((skb->len - hdr_len) <= mss))
                                goto out;
 
-                       hopinfo |= SET_BIT(ET);
+                       mss_index = xgene_enet_setup_mss(ndev, mss);
+                       if (unlikely(mss_index < 0))
+                               return -EBUSY;
+
+                       *hopinfo |= SET_BIT(ET) | SET_VAL(MSS, mss_index);
                }
        } else if (iph->protocol == IPPROTO_UDP) {
                l4hlen = UDP_HDR_SIZE;
@@ -234,15 +285,15 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb)
        }
 out:
        l3hlen = ip_hdrlen(skb) >> 2;
-       hopinfo |= SET_VAL(TCPHDR, l4hlen) |
-                 SET_VAL(IPHDR, l3hlen) |
-                 SET_VAL(ETHHDR, ethhdr) |
-                 SET_VAL(EC, csum_enable) |
-                 SET_VAL(IS, proto) |
-                 SET_BIT(IC) |
-                 SET_BIT(TYPE_ETH_WORK_MESSAGE);
-
-       return hopinfo;
+       *hopinfo |= SET_VAL(TCPHDR, l4hlen) |
+                   SET_VAL(IPHDR, l3hlen) |
+                   SET_VAL(ETHHDR, ethhdr) |
+                   SET_VAL(EC, csum_enable) |
+                   SET_VAL(IS, proto) |
+                   SET_BIT(IC) |
+                   SET_BIT(TYPE_ETH_WORK_MESSAGE);
+
+       return 0;
 }
 
 static u16 xgene_enet_encode_len(u16 len)
@@ -282,20 +333,22 @@ static int xgene_enet_setup_tx_desc(struct xgene_enet_desc_ring *tx_ring,
        dma_addr_t dma_addr, pbuf_addr, *frag_dma_addr;
        skb_frag_t *frag;
        u16 tail = tx_ring->tail;
-       u64 hopinfo;
+       u64 hopinfo = 0;
        u32 len, hw_len;
        u8 ll = 0, nv = 0, idx = 0;
        bool split = false;
        u32 size, offset, ell_bytes = 0;
        u32 i, fidx, nr_frags, count = 1;
+       int ret;
 
        raw_desc = &tx_ring->raw_desc[tail];
        tail = (tail + 1) & (tx_ring->slots - 1);
        memset(raw_desc, 0, sizeof(struct xgene_enet_raw_desc));
 
-       hopinfo = xgene_enet_work_msg(skb);
-       if (!hopinfo)
-               return -EINVAL;
+       ret = xgene_enet_work_msg(skb, &hopinfo);
+       if (ret)
+               return ret;
+
        raw_desc->m3 = cpu_to_le64(SET_VAL(HENQNUM, tx_ring->dst_ring_num) |
                                   hopinfo);
 
@@ -435,6 +488,9 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb,
                return NETDEV_TX_OK;
 
        count = xgene_enet_setup_tx_desc(tx_ring, skb);
+       if (count == -EBUSY)
+               return NETDEV_TX_BUSY;
+
        if (count <= 0) {
                dev_kfree_skb_any(skb);
                return NETDEV_TX_OK;
@@ -1669,7 +1725,7 @@ static int xgene_enet_probe(struct platform_device *pdev)
 
        if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
                ndev->features |= NETIF_F_TSO;
-               pdata->mss = XGENE_ENET_MSS;
+               spin_lock_init(&pdata->mss_lock);
        }
        ndev->hw_features = ndev->features;
 
index 7735371..0cda58f 100644 (file)
@@ -47,7 +47,7 @@
 #define NUM_PKT_BUF    64
 #define NUM_BUFPOOL    32
 #define MAX_EXP_BUFFS  256
-#define XGENE_ENET_MSS 1448
+#define NUM_MSS_REG    4
 #define XGENE_MIN_ENET_FRAME_SIZE      60
 
 #define XGENE_MAX_ENET_IRQ     16
@@ -143,7 +143,7 @@ struct xgene_mac_ops {
        void (*rx_disable)(struct xgene_enet_pdata *pdata);
        void (*set_speed)(struct xgene_enet_pdata *pdata);
        void (*set_mac_addr)(struct xgene_enet_pdata *pdata);
-       void (*set_mss)(struct xgene_enet_pdata *pdata);
+       void (*set_mss)(struct xgene_enet_pdata *pdata, u16 mss, u8 index);
        void (*link_state)(struct work_struct *work);
 };
 
@@ -212,7 +212,9 @@ struct xgene_enet_pdata {
        u8 eth_bufnum;
        u8 bp_bufnum;
        u16 ring_num;
-       u32 mss;
+       u32 mss[NUM_MSS_REG];
+       u32 mss_refcnt[NUM_MSS_REG];
+       spinlock_t mss_lock;  /* mss lock */
        u8 tx_delay;
        u8 rx_delay;
        bool mdio_driver;
index 279ee27..6475f38 100644 (file)
@@ -232,9 +232,22 @@ static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata)
        xgene_enet_wr_mac(pdata, HSTMACADR_MSW_ADDR, addr1);
 }
 
-static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata)
+static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata,
+                               u16 mss, u8 index)
 {
-       xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR, pdata->mss);
+       u8 offset;
+       u32 data;
+
+       offset = (index < 2) ? 0 : 4;
+       xgene_enet_rd_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, &data);
+
+       if (!(index & 0x1))
+               data = SET_VAL(TSO_MSS1, data >> TSO_MSS1_POS) |
+                       SET_VAL(TSO_MSS0, mss);
+       else
+               data = SET_VAL(TSO_MSS1, mss) | SET_VAL(TSO_MSS0, data);
+
+       xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, data);
 }
 
 static u32 xgene_enet_link_status(struct xgene_enet_pdata *pdata)
@@ -258,7 +271,6 @@ static void xgene_xgmac_init(struct xgene_enet_pdata *pdata)
        xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_1, data);
 
        xgene_xgmac_set_mac_addr(pdata);
-       xgene_xgmac_set_mss(pdata);
 
        xgene_enet_rd_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, &data);
        data |= CFG_RSIF_FPBUFF_TIMEOUT_EN;
index ecd357d..27f11a5 100644 (file)
@@ -6356,10 +6356,6 @@ bnx2_open(struct net_device *dev)
        struct bnx2 *bp = netdev_priv(dev);
        int rc;
 
-       rc = bnx2_request_firmware(bp);
-       if (rc < 0)
-               goto out;
-
        netif_carrier_off(dev);
 
        bnx2_disable_int(bp);
@@ -6428,7 +6424,6 @@ open_err:
        bnx2_free_irq(bp);
        bnx2_free_mem(bp);
        bnx2_del_napi(bp);
-       bnx2_release_firmware(bp);
        goto out;
 }
 
@@ -8575,6 +8570,12 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        pci_set_drvdata(pdev, dev);
 
+       rc = bnx2_request_firmware(bp);
+       if (rc < 0)
+               goto error;
+
+
+       bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
        memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
 
        dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
@@ -8607,6 +8608,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        return 0;
 
 error:
+       bnx2_release_firmware(bp);
        pci_iounmap(pdev, bp->regview);
        pci_release_regions(pdev);
        pci_disable_device(pdev);
index 0e68fad..243cb97 100644 (file)
@@ -492,7 +492,8 @@ int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
 int bnx2x_get_vf_config(struct net_device *dev, int vf,
                        struct ifla_vf_info *ivi);
 int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac);
-int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos);
+int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+                     __be16 vlan_proto);
 
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
index 6c586b0..3f77d08 100644 (file)
@@ -2521,7 +2521,8 @@ void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp)
        for_each_vf(bp, vfidx) {
                bulletin = BP_VF_BULLETIN(bp, vfidx);
                if (bulletin->valid_bitmap & (1 << VLAN_VALID))
-                       bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0);
+                       bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0,
+                                         htons(ETH_P_8021Q));
        }
 }
 
@@ -2781,7 +2782,8 @@ static int bnx2x_set_vf_vlan_filter(struct bnx2x *bp, struct bnx2x_virtf *vf,
        return 0;
 }
 
-int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos)
+int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos,
+                     __be16 vlan_proto)
 {
        struct pf_vf_bulletin_content *bulletin = NULL;
        struct bnx2x *bp = netdev_priv(dev);
@@ -2796,6 +2798,9 @@ int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos)
                return -EINVAL;
        }
 
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+
        DP(BNX2X_MSG_IOV, "configuring VF %d with VLAN %d qos %d\n",
           vfidx, vlan, 0);
 
index 8be7185..ec6cd18 100644 (file)
@@ -174,7 +174,8 @@ int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac)
        return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
-int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos)
+int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
+                    __be16 vlan_proto)
 {
        struct hwrm_func_cfg_input req = {0};
        struct bnxt *bp = netdev_priv(dev);
@@ -185,6 +186,9 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos)
        if (bp->hwrm_spec_code < 0x10201)
                return -ENOTSUPP;
 
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+
        rc = bnxt_vf_ndo_prep(bp, vf_id);
        if (rc)
                return rc;
index 0392670..1ab72e4 100644 (file)
@@ -12,7 +12,7 @@
 
 int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *);
 int bnxt_set_vf_mac(struct net_device *, int, u8 *);
-int bnxt_set_vf_vlan(struct net_device *, int, u16, u8);
+int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
 int bnxt_set_vf_bw(struct net_device *, int, int, int);
 int bnxt_set_vf_link_state(struct net_device *, int, int);
 int bnxt_set_vf_spoofchk(struct net_device *, int, bool);
index 0e4fdc3..31f61a7 100644 (file)
 #define BNAD_NUM_TXF_COUNTERS 12
 #define BNAD_NUM_RXF_COUNTERS 10
 #define BNAD_NUM_CQ_COUNTERS (3 + 5)
-#define BNAD_NUM_RXQ_COUNTERS 6
+#define BNAD_NUM_RXQ_COUNTERS 7
 #define BNAD_NUM_TXQ_COUNTERS 5
 
-#define BNAD_ETHTOOL_STATS_NUM                                         \
-       (sizeof(struct rtnl_link_stats64) / sizeof(u64) +       \
-       sizeof(struct bnad_drv_stats) / sizeof(u64) +           \
-       offsetof(struct bfi_enet_stats, rxf_stats[0]) / sizeof(u64))
-
-static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = {
+static const char *bnad_net_stats_strings[] = {
        "rx_packets",
        "tx_packets",
        "rx_bytes",
@@ -50,22 +45,10 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = {
        "tx_dropped",
        "multicast",
        "collisions",
-
        "rx_length_errors",
-       "rx_over_errors",
        "rx_crc_errors",
        "rx_frame_errors",
-       "rx_fifo_errors",
-       "rx_missed_errors",
-
-       "tx_aborted_errors",
-       "tx_carrier_errors",
        "tx_fifo_errors",
-       "tx_heartbeat_errors",
-       "tx_window_errors",
-
-       "rx_compressed",
-       "tx_compressed",
 
        "netif_queue_stop",
        "netif_queue_wakeup",
@@ -254,6 +237,8 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = {
        "fc_tx_fid_parity_errors",
 };
 
+#define BNAD_ETHTOOL_STATS_NUM ARRAY_SIZE(bnad_net_stats_strings)
+
 static int
 bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
 {
@@ -658,6 +643,8 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string)
                                string += ETH_GSTRING_LEN;
                                sprintf(string, "rxq%d_allocbuf_failed", q_num);
                                string += ETH_GSTRING_LEN;
+                               sprintf(string, "rxq%d_mapbuf_failed", q_num);
+                               string += ETH_GSTRING_LEN;
                                sprintf(string, "rxq%d_producer_index", q_num);
                                string += ETH_GSTRING_LEN;
                                sprintf(string, "rxq%d_consumer_index", q_num);
@@ -678,6 +665,9 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string)
                                        sprintf(string, "rxq%d_allocbuf_failed",
                                                                q_num);
                                        string += ETH_GSTRING_LEN;
+                                       sprintf(string, "rxq%d_mapbuf_failed",
+                                               q_num);
+                                       string += ETH_GSTRING_LEN;
                                        sprintf(string, "rxq%d_producer_index",
                                                                q_num);
                                        string += ETH_GSTRING_LEN;
@@ -854,9 +844,9 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats,
                       u64 *buf)
 {
        struct bnad *bnad = netdev_priv(netdev);
-       int i, j, bi;
+       int i, j, bi = 0;
        unsigned long flags;
-       struct rtnl_link_stats64 *net_stats64;
+       struct rtnl_link_stats64 net_stats64;
        u64 *stats64;
        u32 bmap;
 
@@ -871,14 +861,25 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats,
         * under the same lock
         */
        spin_lock_irqsave(&bnad->bna_lock, flags);
-       bi = 0;
-       memset(buf, 0, stats->n_stats * sizeof(u64));
-
-       net_stats64 = (struct rtnl_link_stats64 *)buf;
-       bnad_netdev_qstats_fill(bnad, net_stats64);
-       bnad_netdev_hwstats_fill(bnad, net_stats64);
 
-       bi = sizeof(*net_stats64) / sizeof(u64);
+       memset(&net_stats64, 0, sizeof(net_stats64));
+       bnad_netdev_qstats_fill(bnad, &net_stats64);
+       bnad_netdev_hwstats_fill(bnad, &net_stats64);
+
+       buf[bi++] = net_stats64.rx_packets;
+       buf[bi++] = net_stats64.tx_packets;
+       buf[bi++] = net_stats64.rx_bytes;
+       buf[bi++] = net_stats64.tx_bytes;
+       buf[bi++] = net_stats64.rx_errors;
+       buf[bi++] = net_stats64.tx_errors;
+       buf[bi++] = net_stats64.rx_dropped;
+       buf[bi++] = net_stats64.tx_dropped;
+       buf[bi++] = net_stats64.multicast;
+       buf[bi++] = net_stats64.collisions;
+       buf[bi++] = net_stats64.rx_length_errors;
+       buf[bi++] = net_stats64.rx_crc_errors;
+       buf[bi++] = net_stats64.rx_frame_errors;
+       buf[bi++] = net_stats64.tx_fifo_errors;
 
        /* Get netif_queue_stopped from stack */
        bnad->stats.drv_stats.netif_queue_stopped = netif_queue_stopped(netdev);
index 18d12d3..3042610 100644 (file)
@@ -305,7 +305,7 @@ struct nicvf {
        bool                    msix_enabled;
        u8                      num_vec;
        struct msix_entry       msix_entries[NIC_VF_MSIX_VECTORS];
-       char                    irq_name[NIC_VF_MSIX_VECTORS][20];
+       char                    irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15];
        bool                    irq_allocated[NIC_VF_MSIX_VECTORS];
        cpumask_var_t           affinity_mask[NIC_VF_MSIX_VECTORS];
 
index 7d00162..45a13f7 100644 (file)
@@ -516,7 +516,8 @@ static int nicvf_init_resources(struct nicvf *nic)
 static void nicvf_snd_pkt_handler(struct net_device *netdev,
                                  struct cmp_queue *cq,
                                  struct cqe_send_t *cqe_tx,
-                                 int cqe_type, int budget)
+                                 int cqe_type, int budget,
+                                 unsigned int *tx_pkts, unsigned int *tx_bytes)
 {
        struct sk_buff *skb = NULL;
        struct nicvf *nic = netdev_priv(netdev);
@@ -547,6 +548,8 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
                }
                nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
                prefetch(skb);
+               (*tx_pkts)++;
+               *tx_bytes += skb->len;
                napi_consume_skb(skb, budget);
                sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
        } else {
@@ -662,6 +665,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
        struct cmp_queue *cq = &qs->cq[cq_idx];
        struct cqe_rx_t *cq_desc;
        struct netdev_queue *txq;
+       unsigned int tx_pkts = 0, tx_bytes = 0;
 
        spin_lock_bh(&cq->lock);
 loop:
@@ -701,7 +705,7 @@ loop:
                case CQE_TYPE_SEND:
                        nicvf_snd_pkt_handler(netdev, cq,
                                              (void *)cq_desc, CQE_TYPE_SEND,
-                                             budget);
+                                             budget, &tx_pkts, &tx_bytes);
                        tx_done++;
                break;
                case CQE_TYPE_INVALID:
@@ -730,6 +734,9 @@ done:
                netdev = nic->pnicvf->netdev;
                txq = netdev_get_tx_queue(netdev,
                                          nicvf_netdev_qidx(nic, cq_idx));
+               if (tx_pkts)
+                       netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+
                nic = nic->pnicvf;
                if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
                        netif_tx_start_queue(txq);
@@ -1160,6 +1167,9 @@ int nicvf_stop(struct net_device *netdev)
 
        netif_tx_disable(netdev);
 
+       for (qidx = 0; qidx < netdev->num_tx_queues; qidx++)
+               netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx));
+
        /* Free resources */
        nicvf_config_data_transfer(nic, false);
 
index 178c5c7..a4fc501 100644 (file)
@@ -1082,6 +1082,24 @@ static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
        imm->len = 1;
 }
 
+static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb,
+                                    int sq_num, int desc_cnt)
+{
+       struct netdev_queue *txq;
+
+       txq = netdev_get_tx_queue(nic->pnicvf->netdev,
+                                 skb_get_queue_mapping(skb));
+
+       netdev_tx_sent_queue(txq, skb->len);
+
+       /* make sure all memory stores are done before ringing doorbell */
+       smp_wmb();
+
+       /* Inform HW to xmit all TSO segments */
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
+                             sq_num, desc_cnt);
+}
+
 /* Segment a TSO packet into 'gso_size' segments and append
  * them to SQ for transfer
  */
@@ -1141,12 +1159,8 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
        /* Save SKB in the last segment for freeing */
        sq->skbuff[hdr_qentry] = (u64)skb;
 
-       /* make sure all memory stores are done before ringing doorbell */
-       smp_wmb();
+       nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
 
-       /* Inform HW to xmit all TSO segments */
-       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
-                             sq_num, desc_cnt);
        nic->drv_stats.tx_tso++;
        return 1;
 }
@@ -1219,12 +1233,8 @@ doorbell:
                nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
        }
 
-       /* make sure all memory stores are done before ringing doorbell */
-       smp_wmb();
+       nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
 
-       /* Inform HW to xmit new packet */
-       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
-                             sq_num, subdesc_cnt);
        return 1;
 
 append_fail:
index ea0d1f1..28e653e 100644 (file)
@@ -422,8 +422,8 @@ struct link_config {
        unsigned short supported;        /* link capabilities */
        unsigned short advertising;      /* advertised capabilities */
        unsigned short lp_advertising;   /* peer advertised capabilities */
-       unsigned short requested_speed;  /* speed user has requested */
-       unsigned short speed;            /* actual link speed */
+       unsigned int   requested_speed;  /* speed user has requested */
+       unsigned int   speed;            /* actual link speed */
        unsigned char  requested_fc;     /* flow control user has requested */
        unsigned char  fc;               /* actual link flow control */
        unsigned char  autoneg;          /* autonegotiating? */
index 2a61617..1073673 100644 (file)
@@ -117,11 +117,11 @@ static int validate_filter(struct net_device *dev,
        return 0;
 }
 
-static unsigned int get_filter_steerq(struct net_device *dev,
-                                     struct ch_filter_specification *fs)
+static int get_filter_steerq(struct net_device *dev,
+                            struct ch_filter_specification *fs)
 {
        struct adapter *adapter = netdev2adap(dev);
-       unsigned int iq;
+       int iq;
 
        /* If the user has requested steering matching Ingress Packets
         * to a specific Queue Set, we need to make sure it's in range
@@ -443,10 +443,10 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
                       struct filter_ctx *ctx)
 {
        struct adapter *adapter = netdev2adap(dev);
-       unsigned int max_fidx, fidx, iq;
+       unsigned int max_fidx, fidx;
        struct filter_entry *f;
        u32 iconf;
-       int ret;
+       int iq, ret;
 
        max_fidx = adapter->tids.nftids;
        if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
index 1be4d23..eaa7fa9 100644 (file)
@@ -3971,10 +3971,17 @@ static const struct pci_error_handlers cxgb4_eeh = {
        .resume         = eeh_resume,
 };
 
+/* Return true if the Link Configuration supports "High Speeds" (those greater
+ * than 1Gb/s).
+ */
 static inline bool is_x_10g_port(const struct link_config *lc)
 {
-       return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
-              (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
+       unsigned int speeds, high_speeds;
+
+       speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
+       high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
+
+       return high_speeds != 0;
 }
 
 /*
@@ -4408,8 +4415,12 @@ static void print_port_info(const struct net_device *dev)
                bufp += sprintf(bufp, "1000/");
        if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
                bufp += sprintf(bufp, "10G/");
+       if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G)
+               bufp += sprintf(bufp, "25G/");
        if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
                bufp += sprintf(bufp, "40G/");
+       if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G)
+               bufp += sprintf(bufp, "100G/");
        if (bufp != buf)
                --bufp;
        sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
index d12a73e..f13b593 100644 (file)
@@ -367,7 +367,7 @@ int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
        }
        return 0;
 unwind:
-       while (--idx >= 0) {
+       while (idx-- > 0) {
                bmap_idx = rxq_info->msix_tbl[idx];
                free_msix_idx_in_bmap(adap, bmap_idx);
                free_irq(adap->msix_info_ulds[bmap_idx].vec,
index 15be543..20dec85 100644 (file)
@@ -3627,7 +3627,8 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf)
 }
 
 #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
-                    FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
+                    FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
+                    FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
                     FW_PORT_CAP_ANEG)
 
 /**
@@ -7196,8 +7197,12 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
                speed = 1000;
        else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
                speed = 10000;
+       else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+               speed = 25000;
        else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
                speed = 40000;
+       else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+               speed = 100000;
 
        lc = &pi->link_cfg;
 
index ffe4bf4..4b58b32 100644 (file)
@@ -2267,6 +2267,12 @@ enum fw_port_cap {
        FW_PORT_CAP_802_3_ASM_DIR       = 0x8000,
 };
 
+#define FW_PORT_CAP_SPEED_S     0
+#define FW_PORT_CAP_SPEED_M     0x3f
+#define FW_PORT_CAP_SPEED_V(x)  ((x) << FW_PORT_CAP_SPEED_S)
+#define FW_PORT_CAP_SPEED_G(x) \
+       (((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M)
+
 enum fw_port_mdi {
        FW_PORT_CAP_MDI_UNCHANGED,
        FW_PORT_CAP_MDI_AUTO,
index 8067424..b3903fe 100644 (file)
@@ -108,8 +108,8 @@ struct link_config {
        unsigned int   supported;        /* link capabilities */
        unsigned int   advertising;      /* advertised capabilities */
        unsigned short lp_advertising;   /* peer advertised capabilities */
-       unsigned short requested_speed;  /* speed user has requested */
-       unsigned short speed;            /* actual link speed */
+       unsigned int   requested_speed;  /* speed user has requested */
+       unsigned int   speed;            /* actual link speed */
        unsigned char  requested_fc;     /* flow control user has requested */
        unsigned char  fc;               /* actual link flow control */
        unsigned char  autoneg;          /* autonegotiating? */
@@ -271,10 +271,17 @@ static inline bool is_10g_port(const struct link_config *lc)
        return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
 }
 
+/* Return true if the Link Configuration supports "High Speeds" (those greater
+ * than 1Gb/s).
+ */
 static inline bool is_x_10g_port(const struct link_config *lc)
 {
-       return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
-               (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
+       unsigned int speeds, high_speeds;
+
+       speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
+       high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
+
+       return high_speeds != 0;
 }
 
 static inline unsigned int core_ticks_per_usec(const struct adapter *adapter)
index 879f4c5..e98248f 100644 (file)
@@ -314,8 +314,9 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
 }
 
 #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
-                    FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
-                    FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG)
+                    FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
+                    FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
+                    FW_PORT_CAP_ANEG)
 
 /**
  *     init_link_config - initialize a link's SW state
@@ -1716,8 +1717,12 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl)
                        speed = 1000;
                else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
                        speed = 10000;
+               else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+                       speed = 25000;
                else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
                        speed = 40000;
+               else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+                       speed = 100000;
 
                /*
                 * Scan all of our "ports" (Virtual Interfaces) looking for
index 9a94840..dcb930a 100644 (file)
@@ -44,7 +44,7 @@ MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
 /* Per-module error detection/recovery workq shared across all functions.
  * Each function schedules its own work request on this shared workq.
  */
-struct workqueue_struct *be_err_recovery_workq;
+static struct workqueue_struct *be_err_recovery_workq;
 
 static const struct pci_device_id be_dev_ids[] = {
        { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
@@ -60,7 +60,7 @@ static const struct pci_device_id be_dev_ids[] = {
 MODULE_DEVICE_TABLE(pci, be_dev_ids);
 
 /* Workqueue used by all functions for defering cmd calls to the adapter */
-struct workqueue_struct *be_wq;
+static struct workqueue_struct *be_wq;
 
 /* UE Status Low CSR */
 static const char * const ue_status_low_desc[] = {
@@ -1895,7 +1895,8 @@ static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
        return 0;
 }
 
-static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
+static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+                         __be16 vlan_proto)
 {
        struct be_adapter *adapter = netdev_priv(netdev);
        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
@@ -1907,6 +1908,9 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
        if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
                return -EINVAL;
 
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+
        if (vlan || qos) {
                vlan |= qos << VLAN_PRIO_SHIFT;
                status = be_set_vf_tvt(adapter, vf, vlan);
index ec4d0f3..8f13919 100644 (file)
@@ -977,7 +977,37 @@ static void emac_set_multicast_list(struct net_device *ndev)
                dev->mcast_pending = 1;
                return;
        }
+
+       mutex_lock(&dev->link_lock);
        __emac_set_multicast_list(dev);
+       mutex_unlock(&dev->link_lock);
+}
+
+static int emac_set_mac_address(struct net_device *ndev, void *sa)
+{
+       struct emac_instance *dev = netdev_priv(ndev);
+       struct sockaddr *addr = sa;
+       struct emac_regs __iomem *p = dev->emacp;
+
+       if (!is_valid_ether_addr(addr->sa_data))
+              return -EADDRNOTAVAIL;
+
+       mutex_lock(&dev->link_lock);
+
+       memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
+
+       emac_rx_disable(dev);
+       emac_tx_disable(dev);
+       out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]);
+       out_be32(&p->ialr, (ndev->dev_addr[2] << 24) |
+               (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) |
+               ndev->dev_addr[5]);
+       emac_tx_enable(dev);
+       emac_rx_enable(dev);
+
+       mutex_unlock(&dev->link_lock);
+
+       return 0;
 }
 
 static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
@@ -2686,7 +2716,7 @@ static const struct net_device_ops emac_netdev_ops = {
        .ndo_do_ioctl           = emac_ioctl,
        .ndo_tx_timeout         = emac_tx_timeout,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_set_mac_address    = emac_set_mac_address,
        .ndo_start_xmit         = emac_start_xmit,
        .ndo_change_mtu         = eth_change_mtu,
 };
@@ -2699,7 +2729,7 @@ static const struct net_device_ops emac_gige_netdev_ops = {
        .ndo_do_ioctl           = emac_ioctl,
        .ndo_tx_timeout         = emac_tx_timeout,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_set_mac_address    = eth_mac_addr,
+       .ndo_set_mac_address    = emac_set_mac_address,
        .ndo_start_xmit         = emac_start_xmit_sg,
        .ndo_change_mtu         = emac_change_mtu,
 };
index 67ff01a..4d19e46 100644 (file)
@@ -507,7 +507,7 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs);
 s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
 int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
 int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
-                         int vf_idx, u16 vid, u8 qos);
+                         int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
 int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate,
                        int unused);
 int fm10k_ndo_get_vf_config(struct net_device *netdev,
index d9dec81..5f4dac0 100644 (file)
@@ -445,7 +445,7 @@ int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac)
 }
 
 int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
-                         u8 qos)
+                         u8 qos, __be16 vlan_proto)
 {
        struct fm10k_intfc *interface = netdev_priv(netdev);
        struct fm10k_iov_data *iov_data = interface->iov_data;
@@ -460,6 +460,10 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
        if (qos || (vid > (VLAN_VID_MASK - 1)))
                return -EINVAL;
 
+       /* VF VLAN Protocol part to default is unsupported */
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+
        vf_info = &iov_data->vf_info[vf_idx];
 
        /* exit if there is nothing to do */
index 19103a6..2030d7c 100644 (file)
@@ -74,7 +74,7 @@
 #define I40E_MIN_NUM_DESCRIPTORS       64
 #define I40E_MIN_MSIX                  2
 #define I40E_DEFAULT_NUM_VMDQ_VSI      8 /* max 256 VSIs */
-#define I40E_MIN_VSI_ALLOC             51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */
+#define I40E_MIN_VSI_ALLOC             83 /* LAN, ATR, FCOE, 64 VF */
 /* max 16 qps */
 #define i40e_default_queues_per_vmdq(pf) \
                (((pf)->flags & I40E_FLAG_RSS_AQ_CAPABLE) ? 4 : 1)
@@ -701,6 +701,8 @@ void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags);
 void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags);
 int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
+                      u16 rss_table_size, u16 rss_size);
 struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id);
 void i40e_update_stats(struct i40e_vsi *vsi);
 void i40e_update_eth_stats(struct i40e_vsi *vsi);
@@ -708,8 +710,6 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi);
 int i40e_fetch_switch_configuration(struct i40e_pf *pf,
                                    bool printconfig);
 
-int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
-                            struct i40e_pf *pf, bool add);
 int i40e_add_del_fdir(struct i40e_vsi *vsi,
                      struct i40e_fdir_filter *input, bool add);
 void i40e_fdir_check_and_reenable(struct i40e_pf *pf);
index 05cf9a7..0c1875b 100644 (file)
@@ -1054,6 +1054,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
                        struct i40e_dcbx_config *r_cfg =
                                                &pf->hw.remote_dcbx_config;
                        int i, ret;
+                       u32 switch_id;
 
                        bw_data = kzalloc(sizeof(
                                    struct i40e_aqc_query_port_ets_config_resp),
@@ -1063,8 +1064,12 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
                                goto command_write_done;
                        }
 
+                       vsi = pf->vsi[pf->lan_vsi];
+                       switch_id =
+                               vsi->info.switch_id & I40E_AQ_VSI_SW_ID_MASK;
+
                        ret = i40e_aq_query_port_ets_config(&pf->hw,
-                                                           pf->mac_seid,
+                                                           switch_id,
                                                            bw_data, NULL);
                        if (ret) {
                                dev_info(&pf->pdev->dev,
@@ -1425,84 +1430,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
                buff = NULL;
                kfree(desc);
                desc = NULL;
-       } else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) ||
-                  (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) {
-               struct i40e_fdir_filter fd_data;
-               u16 packet_len, i, j = 0;
-               char *asc_packet;
-               u8 *raw_packet;
-               bool add = false;
-               int ret;
-
-               if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
-                       goto command_write_done;
-
-               if (strncmp(cmd_buf, "add", 3) == 0)
-                       add = true;
-
-               if (add && (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED))
-                       goto command_write_done;
-
-               asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE,
-                                    GFP_KERNEL);
-               if (!asc_packet)
-                       goto command_write_done;
-
-               raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE,
-                                    GFP_KERNEL);
-
-               if (!raw_packet) {
-                       kfree(asc_packet);
-                       asc_packet = NULL;
-                       goto command_write_done;
-               }
-
-               cnt = sscanf(&cmd_buf[13],
-                            "%hx %2hhx %2hhx %hx %2hhx %2hhx %hx %x %hd %511s",
-                            &fd_data.q_index,
-                            &fd_data.flex_off, &fd_data.pctype,
-                            &fd_data.dest_vsi, &fd_data.dest_ctl,
-                            &fd_data.fd_status, &fd_data.cnt_index,
-                            &fd_data.fd_id, &packet_len, asc_packet);
-               if (cnt != 10) {
-                       dev_info(&pf->pdev->dev,
-                                "program fd_filter: bad command string, cnt=%d\n",
-                                cnt);
-                       kfree(asc_packet);
-                       asc_packet = NULL;
-                       kfree(raw_packet);
-                       goto command_write_done;
-               }
-
-               /* fix packet length if user entered 0 */
-               if (packet_len == 0)
-                       packet_len = I40E_FDIR_MAX_RAW_PACKET_SIZE;
-
-               /* make sure to check the max as well */
-               packet_len = min_t(u16,
-                                  packet_len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
-
-               for (i = 0; i < packet_len; i++) {
-                       cnt = sscanf(&asc_packet[j], "%2hhx ", &raw_packet[i]);
-                       if (!cnt)
-                               break;
-                       j += 3;
-               }
-               dev_info(&pf->pdev->dev, "FD raw packet dump\n");
-               print_hex_dump(KERN_INFO, "FD raw packet: ",
-                              DUMP_PREFIX_OFFSET, 16, 1,
-                              raw_packet, packet_len, true);
-               ret = i40e_program_fdir_filter(&fd_data, raw_packet, pf, add);
-               if (!ret) {
-                       dev_info(&pf->pdev->dev, "Filter command send Status : Success\n");
-               } else {
-                       dev_info(&pf->pdev->dev,
-                                "Filter command send failed %d\n", ret);
-               }
-               kfree(raw_packet);
-               raw_packet = NULL;
-               kfree(asc_packet);
-               asc_packet = NULL;
        } else if (strncmp(cmd_buf, "fd current cnt", 14) == 0) {
                dev_info(&pf->pdev->dev, "FD current total filter count for this interface: %d\n",
                         i40e_get_current_fd_count(pf));
@@ -1727,8 +1654,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
                dev_info(&pf->pdev->dev, "  globr\n");
                dev_info(&pf->pdev->dev, "  send aq_cmd <flags> <opcode> <datalen> <retval> <cookie_h> <cookie_l> <param0> <param1> <param2> <param3>\n");
                dev_info(&pf->pdev->dev, "  send indirect aq_cmd <flags> <opcode> <datalen> <retval> <cookie_h> <cookie_l> <param0> <param1> <param2> <param3> <buffer_len>\n");
-               dev_info(&pf->pdev->dev, "  add fd_filter <dest q_index> <flex_off> <pctype> <dest_vsi> <dest_ctl> <fd_status> <cnt_index> <fd_id> <packet_len> <packet>\n");
-               dev_info(&pf->pdev->dev, "  rem fd_filter <dest q_index> <flex_off> <pctype> <dest_vsi> <dest_ctl> <fd_status> <cnt_index> <fd_id> <packet_len> <packet>\n");
                dev_info(&pf->pdev->dev, "  fd current cnt");
                dev_info(&pf->pdev->dev, "  lldp start\n");
                dev_info(&pf->pdev->dev, "  lldp stop\n");
index 1835186..92bc884 100644 (file)
@@ -1970,11 +1970,22 @@ static int i40e_set_phys_id(struct net_device *netdev,
  * 125us (8000 interrupts per second) == ITR(62)
  */
 
+/**
+ * __i40e_get_coalesce - get per-queue coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ * @queue: which queue to pick
+ *
+ * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs
+ * are per queue. If queue is <0 then we default to queue 0 as the
+ * representative value.
+ **/
 static int __i40e_get_coalesce(struct net_device *netdev,
                               struct ethtool_coalesce *ec,
                               int queue)
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_ring *rx_ring, *tx_ring;
        struct i40e_vsi *vsi = np->vsi;
 
        ec->tx_max_coalesced_frames_irq = vsi->work_limit;
@@ -1989,14 +2000,18 @@ static int __i40e_get_coalesce(struct net_device *netdev,
                return -EINVAL;
        }
 
-       if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting))
+       rx_ring = vsi->rx_rings[queue];
+       tx_ring = vsi->tx_rings[queue];
+
+       if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
                ec->use_adaptive_rx_coalesce = 1;
 
-       if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting))
+       if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
                ec->use_adaptive_tx_coalesce = 1;
 
-       ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-       ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+       ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
+       ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+
 
        /* we use the _usecs_high to store/set the interrupt rate limit
         * that the hardware supports, that almost but not quite
@@ -2010,18 +2025,44 @@ static int __i40e_get_coalesce(struct net_device *netdev,
        return 0;
 }
 
+/**
+ * i40e_get_coalesce - get a netdev's coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ *
+ * Gets the coalesce settings for a particular netdev. Note that if user has
+ * modified per-queue settings, this only guarantees to represent queue 0. See
+ * __i40e_get_coalesce for more details.
+ **/
 static int i40e_get_coalesce(struct net_device *netdev,
                             struct ethtool_coalesce *ec)
 {
        return __i40e_get_coalesce(netdev, ec, -1);
 }
 
+/**
+ * i40e_get_per_queue_coalesce - gets coalesce settings for particular queue
+ * @netdev: netdev structure
+ * @ec: ethtool's coalesce settings
+ * @queue: the particular queue to read
+ *
+ * Will read a specific queue's coalesce settings
+ **/
 static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
                                       struct ethtool_coalesce *ec)
 {
        return __i40e_get_coalesce(netdev, ec, queue);
 }
 
+/**
+ * i40e_set_itr_per_queue - set ITR values for specific queue
+ * @vsi: the VSI to set values for
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to modify
+ *
+ * Change the ITR settings for a specific queue.
+ **/
+
 static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
                                   struct ethtool_coalesce *ec,
                                   int queue)
@@ -2060,6 +2101,14 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
        i40e_flush(hw);
 }
 
+/**
+ * __i40e_set_coalesce - set coalesce settings for particular queue
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the coalesce settings for a particular queue.
+ **/
 static int __i40e_set_coalesce(struct net_device *netdev,
                               struct ethtool_coalesce *ec,
                               int queue)
@@ -2120,12 +2169,27 @@ static int __i40e_set_coalesce(struct net_device *netdev,
        return 0;
 }
 
+/**
+ * i40e_set_coalesce - set coalesce settings for every queue on the netdev
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ *
+ * This will set each queue to the same coalesce settings.
+ **/
 static int i40e_set_coalesce(struct net_device *netdev,
                             struct ethtool_coalesce *ec)
 {
        return __i40e_set_coalesce(netdev, ec, -1);
 }
 
+/**
+ * i40e_set_per_queue_coalesce - set specific queue's coalesce settings
+ * @netdev: the netdev to change
+ * @ec: ethtool's coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the specified queue's coalesce settings.
+ **/
 static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue,
                                       struct ethtool_coalesce *ec)
 {
@@ -2922,15 +2986,13 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir,
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
        u8 *seed = NULL;
        u16 i;
 
        if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
                return -EOPNOTSUPP;
 
-       if (!indir)
-               return 0;
-
        if (key) {
                if (!vsi->rss_hkey_user) {
                        vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE,
@@ -2948,8 +3010,12 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir,
        }
 
        /* Each 32 bits pointed by 'indir' is stored with a lut entry */
-       for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++)
-               vsi->rss_lut_user[i] = (u8)(indir[i]);
+       if (indir)
+               for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++)
+                       vsi->rss_lut_user[i] = (u8)(indir[i]);
+       else
+               i40e_fill_rss_lut(pf, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE,
+                                 vsi->rss_size);
 
        return i40e_config_rss(vsi, seed, vsi->rss_lut_user,
                               I40E_HLUT_ARRAY_SIZE);
@@ -3019,6 +3085,9 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
        } else {
                pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
                pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
+
+               /* flush current ATR settings */
+               set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
        }
 
        if ((flags & I40E_PRIV_FLAGS_VEB_STATS) &&
index 61b0fc4..8176596 100644 (file)
@@ -41,7 +41,7 @@ static const char i40e_driver_string[] =
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 12
+#define DRV_VERSION_BUILD 16
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -57,8 +57,6 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit);
 static int i40e_setup_misc_vector(struct i40e_pf *pf);
 static void i40e_determine_queue_usage(struct i40e_pf *pf);
 static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
-static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
-                             u16 rss_table_size, u16 rss_size);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
 
@@ -1317,7 +1315,7 @@ static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
        element.vlan_tag = 0;
        /* ...and some firmware does it this way. */
        element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
-                       I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+                       I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
        i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
 }
 
@@ -1910,7 +1908,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
                        ether_addr_copy(del_list[num_del].mac_addr, f->macaddr);
                        if (f->vlan == I40E_VLAN_ANY) {
                                del_list[num_del].vlan_tag = 0;
-                               cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+                               cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
                        } else {
                                del_list[num_del].vlan_tag =
                                        cpu_to_le16((u16)(f->vlan));
@@ -5244,7 +5242,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
                /* reset fd counters */
                pf->fd_add_err = pf->fd_atr_cnt = 0;
                if (pf->fd_tcp_rule > 0) {
-                       pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+                       pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
                        if (I40E_DEBUG_FD & pf->hw.debug_mask)
                                dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 exist\n");
                        pf->fd_tcp_rule = 0;
@@ -5941,13 +5939,17 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
                                dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
                }
        }
-       /* Wait for some more space to be available to turn on ATR */
+
+       /* Wait for some more space to be available to turn on ATR. We also
+        * must check that no existing ntuple rules for TCP are in effect
+        */
        if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM * 2)) {
                if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-                   (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) {
+                   (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED) &&
+                   (pf->fd_tcp_rule == 0)) {
                        pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
                        if (I40E_DEBUG_FD & pf->hw.debug_mask)
-                               dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table now\n");
+                               dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
                }
        }
 
@@ -5978,9 +5980,6 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
        int fd_room;
        int reg;
 
-       if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED)))
-               return;
-
        if (!time_after(jiffies, pf->fd_flush_timestamp +
                                 (I40E_MIN_FD_FLUSH_INTERVAL * HZ)))
                return;
@@ -6000,7 +5999,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
        }
 
        pf->fd_flush_timestamp = jiffies;
-       pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+       pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
        /* flush all filters */
        wr32(&pf->hw, I40E_PFQF_CTL_1,
             I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
@@ -6020,7 +6019,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
                /* replay sideband filters */
                i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
                if (!disable_atr)
-                       pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
+                       pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
                clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
                if (I40E_DEBUG_FD & pf->hw.debug_mask)
                        dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
@@ -6054,9 +6053,6 @@ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf)
        if (test_bit(__I40E_DOWN, &pf->state))
                return;
 
-       if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED)))
-               return;
-
        if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
                i40e_fdir_flush_and_replay(pf);
 
@@ -7156,9 +7152,9 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
                        pf->pending_udp_bitmap &= ~BIT_ULL(i);
                        port = pf->udp_ports[i].index;
                        if (port)
-                               ret = i40e_aq_add_udp_tunnel(hw, ntohs(port),
-                                                    pf->udp_ports[i].type,
-                                                    NULL, NULL);
+                               ret = i40e_aq_add_udp_tunnel(hw, port,
+                                                       pf->udp_ports[i].type,
+                                                       NULL, NULL);
                        else
                                ret = i40e_aq_del_udp_tunnel(hw, i, NULL);
 
@@ -8244,8 +8240,8 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
  * @rss_table_size: Lookup table size
  * @rss_size: Range of queue number for hashing
  */
-static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
-                             u16 rss_table_size, u16 rss_size)
+void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
+                      u16 rss_table_size, u16 rss_size)
 {
        u16 i;
 
@@ -8286,6 +8282,8 @@ static int i40e_pf_config_rss(struct i40e_pf *pf)
        if (!vsi->rss_size)
                vsi->rss_size = min_t(int, pf->alloc_rss_size,
                                      vsi->num_queue_pairs);
+       if (!vsi->rss_size)
+               return -EINVAL;
 
        lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
        if (!lut)
@@ -8610,7 +8608,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
                             I40E_FLAG_WB_ON_ITR_CAPABLE |
                             I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE |
                             I40E_FLAG_NO_PCI_LINK_CHECK |
-                            I40E_FLAG_100M_SGMII_CAPABLE |
                             I40E_FLAG_USE_SET_LLDP_MIB |
                             I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
        } else if ((pf->hw.aq.api_maj_ver > 1) ||
@@ -8685,13 +8682,13 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
                /* reset fd counters */
                pf->fd_add_err = pf->fd_atr_cnt = pf->fd_tcp_rule = 0;
                pf->fdir_pf_active_filters = 0;
-               pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
-               if (I40E_DEBUG_FD & pf->hw.debug_mask)
-                       dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
                /* if ATR was auto disabled it can be re-enabled. */
                if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-                   (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
+                   (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) {
                        pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+                       if (I40E_DEBUG_FD & pf->hw.debug_mask)
+                               dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
+               }
        }
        return need_reset;
 }
@@ -11338,11 +11335,7 @@ static void i40e_remove(struct pci_dev *pdev)
        }
 
        /* shutdown the adminq */
-       ret_code = i40e_shutdown_adminq(hw);
-       if (ret_code)
-               dev_warn(&pdev->dev,
-                        "Failed to destroy the Admin Queue resources: %d\n",
-                        ret_code);
+       i40e_shutdown_adminq(hw);
 
        /* destroy the locks only once, here */
        mutex_destroy(&hw->aq.arq_mutex);
index f8d6623..6287bf6 100644 (file)
@@ -40,6 +40,69 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 }
 
 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
+/**
+ * i40e_fdir - Generate a Flow Director descriptor based on fdata
+ * @tx_ring: Tx ring to send buffer on
+ * @fdata: Flow director filter data
+ * @add: Indicate if we are adding a rule or deleting one
+ *
+ **/
+static void i40e_fdir(struct i40e_ring *tx_ring,
+                     struct i40e_fdir_filter *fdata, bool add)
+{
+       struct i40e_filter_program_desc *fdir_desc;
+       struct i40e_pf *pf = tx_ring->vsi->back;
+       u32 flex_ptype, dtype_cmd;
+       u16 i;
+
+       /* grab the next descriptor */
+       i = tx_ring->next_to_use;
+       fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
+
+       i++;
+       tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+       flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK &
+                    (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT);
+
+       flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK &
+                     (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT);
+
+       flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
+                     (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
+
+       /* Use LAN VSI Id if not programmed by user */
+       flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK &
+                     ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) <<
+                      I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT);
+
+       dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
+
+       dtype_cmd |= add ?
+                    I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
+                    I40E_TXD_FLTR_QW1_PCMD_SHIFT :
+                    I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
+                    I40E_TXD_FLTR_QW1_PCMD_SHIFT;
+
+       dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK &
+                    (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT);
+
+       dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK &
+                    (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT);
+
+       if (fdata->cnt_index) {
+               dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
+               dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK &
+                            ((u32)fdata->cnt_index <<
+                             I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT);
+       }
+
+       fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
+       fdir_desc->rsvd = cpu_to_le32(0);
+       fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
+       fdir_desc->fd_id = cpu_to_le32(fdata->fd_id);
+}
+
 #define I40E_FD_CLEAN_DELAY 10
 /**
  * i40e_program_fdir_filter - Program a Flow Director filter
@@ -48,14 +111,13 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
  * @pf: The PF pointer
  * @add: True for add/update, False for remove
  **/
-int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
-                            struct i40e_pf *pf, bool add)
+static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
+                                   u8 *raw_packet, struct i40e_pf *pf,
+                                   bool add)
 {
-       struct i40e_filter_program_desc *fdir_desc;
        struct i40e_tx_buffer *tx_buf, *first;
        struct i40e_tx_desc *tx_desc;
        struct i40e_ring *tx_ring;
-       unsigned int fpt, dcc;
        struct i40e_vsi *vsi;
        struct device *dev;
        dma_addr_t dma;
@@ -92,56 +154,8 @@ int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
 
        /* grab the next descriptor */
        i = tx_ring->next_to_use;
-       fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
        first = &tx_ring->tx_bi[i];
-       memset(first, 0, sizeof(struct i40e_tx_buffer));
-
-       tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
-
-       fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
-             I40E_TXD_FLTR_QW0_QINDEX_MASK;
-
-       fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
-              I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
-
-       fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
-              I40E_TXD_FLTR_QW0_PCTYPE_MASK;
-
-       /* Use LAN VSI Id if not programmed by user */
-       if (fdir_data->dest_vsi == 0)
-               fpt |= (pf->vsi[pf->lan_vsi]->id) <<
-                      I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
-       else
-               fpt |= ((u32)fdir_data->dest_vsi <<
-                       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
-                      I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
-
-       dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
-
-       if (add)
-               dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
-                      I40E_TXD_FLTR_QW1_PCMD_SHIFT;
-       else
-               dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
-                      I40E_TXD_FLTR_QW1_PCMD_SHIFT;
-
-       dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
-              I40E_TXD_FLTR_QW1_DEST_MASK;
-
-       dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
-              I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
-
-       if (fdir_data->cnt_index != 0) {
-               dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
-               dcc |= ((u32)fdir_data->cnt_index <<
-                       I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
-                       I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
-       }
-
-       fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
-       fdir_desc->rsvd = cpu_to_le32(0);
-       fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
-       fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
+       i40e_fdir(tx_ring, fdir_data, add);
 
        /* Now program a dummy descriptor */
        i = tx_ring->next_to_use;
@@ -282,18 +296,18 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
 
        if (add) {
                pf->fd_tcp_rule++;
-               if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
-                       if (I40E_DEBUG_FD & pf->hw.debug_mask)
-                               dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
-                       pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
-               }
+               if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+                   I40E_DEBUG_FD & pf->hw.debug_mask)
+                       dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
+               pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
        } else {
                pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
                                  (pf->fd_tcp_rule - 1) : 0;
                if (pf->fd_tcp_rule == 0) {
-                       pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
-                       if (I40E_DEBUG_FD & pf->hw.debug_mask)
+                       if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+                           I40E_DEBUG_FD & pf->hw.debug_mask)
                                dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
+                       pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
                }
        }
 
@@ -532,7 +546,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
                                            struct i40e_tx_buffer *tx_buffer)
 {
        if (tx_buffer->skb) {
-               dev_kfree_skb_any(tx_buffer->skb);
+               if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
+                       kfree(tx_buffer->raw_buf);
+               else
+                       dev_kfree_skb_any(tx_buffer->skb);
                if (dma_unmap_len(tx_buffer, len))
                        dma_unmap_single(ring->dev,
                                         dma_unmap_addr(tx_buffer, dma),
@@ -545,9 +562,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
                               DMA_TO_DEVICE);
        }
 
-       if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
-               kfree(tx_buffer->raw_buf);
-
        tx_buffer->next_to_watch = NULL;
        tx_buffer->skb = NULL;
        dma_unmap_len_set(tx_buffer, len, 0);
@@ -584,8 +598,7 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
                return;
 
        /* cleanup Tx queue statistics */
-       netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
-                                                 tx_ring->queue_index));
+       netdev_tx_reset_queue(txring_txq(tx_ring));
 }
 
 /**
@@ -754,8 +767,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                        tx_ring->arm_wb = true;
        }
 
-       netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
-                                                     tx_ring->queue_index),
+       /* notify netdev of completed buffers */
+       netdev_tx_completed_queue(txring_txq(tx_ring),
                                  total_packets, total_bytes);
 
 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
@@ -1864,6 +1877,15 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_PFINT_DYN_CTLN
+static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx)
+{
+       return !!(vsi->rx_rings[idx]->rx_itr_setting);
+}
+
+static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx)
+{
+       return !!(vsi->tx_rings[idx]->tx_itr_setting);
+}
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -1879,6 +1901,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
        u32 rxval, txval;
        int vector;
        int idx = q_vector->v_idx;
+       int rx_itr_setting, tx_itr_setting;
 
        vector = (q_vector->v_idx + vsi->base_vector);
 
@@ -1887,18 +1910,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
         */
        rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
 
+       rx_itr_setting = get_rx_itr_enabled(vsi, idx);
+       tx_itr_setting = get_tx_itr_enabled(vsi, idx);
+
        if (q_vector->itr_countdown > 0 ||
-           (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) &&
-            !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) {
+           (!ITR_IS_DYNAMIC(rx_itr_setting) &&
+            !ITR_IS_DYNAMIC(tx_itr_setting))) {
                goto enable_int;
        }
 
-       if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) {
+       if (ITR_IS_DYNAMIC(tx_itr_setting)) {
                rx = i40e_set_new_dynamic_itr(&q_vector->rx);
                rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
        }
 
-       if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) {
+       if (ITR_IS_DYNAMIC(tx_itr_setting)) {
                tx = i40e_set_new_dynamic_itr(&q_vector->tx);
                txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
        }
@@ -2621,9 +2647,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
                return false;
 
        /* We need to walk through the list and validate that each group
-        * of 6 fragments totals at least gso_size.  However we don't need
-        * to perform such validation on the last 6 since the last 6 cannot
-        * inherit any data from a descriptor after them.
+        * of 6 fragments totals at least gso_size.
         */
        nr_frags -= I40E_MAX_BUFFER_TXD - 2;
        frag = &skb_shinfo(skb)->frags[0];
@@ -2654,8 +2678,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
                if (sum < 0)
                        return true;
 
-               /* use pre-decrement to avoid processing last fragment */
-               if (!--nr_frags)
+               if (!nr_frags--)
                        break;
 
                sum -= skb_frag_size(stale++);
@@ -2787,9 +2810,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
        tx_ring->next_to_use = i;
 
-       netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
-                                                tx_ring->queue_index),
-                                                first->bytecount);
+       netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
        i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
        /* Algorithm to optimize tail and RS bit setting:
@@ -2814,13 +2835,11 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
         * trigger a force WB.
         */
        if (skb->xmit_more  &&
-           !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
-                                                   tx_ring->queue_index))) {
+           !netif_xmit_stopped(txring_txq(tx_ring))) {
                tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
                tail_bump = false;
        } else if (!skb->xmit_more &&
-                  !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
-                                                      tx_ring->queue_index)) &&
+                  !netif_xmit_stopped(txring_txq(tx_ring)) &&
                   (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
                   (tx_ring->packet_stride < WB_STRIDE) &&
                   (desc_count < WB_STRIDE)) {
index b78c810..5088405 100644 (file)
@@ -463,4 +463,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype)
        return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) &&
               (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER);
 }
+
+/**
+ * txring_txq - Find the netdev Tx ring based on the i40e Tx ring
+ * @ring: Tx ring to find the netdev equivalent of
+ **/
+static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring)
+{
+       return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
 #endif /* _I40E_TXRX_H_ */
index c92a3bd..f861d31 100644 (file)
@@ -163,6 +163,7 @@ struct i40e_virtchnl_vsi_resource {
 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING    0x00020000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF                0X00080000
+#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM    0X00100000
 
 struct i40e_virtchnl_vf_resource {
        u16 num_vsis;
index da34235..54b8ee2 100644 (file)
@@ -502,8 +502,16 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id,
        u32 qtx_ctl;
        int ret = 0;
 
+       if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+               ret = -ENOENT;
+               goto error_context;
+       }
        pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
        vsi = i40e_find_vsi_from_id(pf, vsi_id);
+       if (!vsi) {
+               ret = -ENOENT;
+               goto error_context;
+       }
 
        /* clear the context structure first */
        memset(&tx_ctx, 0, sizeof(struct i40e_hmc_obj_txq));
@@ -1476,7 +1484,8 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 
        vsi = i40e_find_vsi_from_id(pf, info->vsi_id);
        if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
-           !i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+           !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) ||
+           !vsi) {
                aq_ret = I40E_ERR_PARAM;
                goto error_param;
        }
@@ -2217,8 +2226,8 @@ static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen,
 error_param:
        /* send the response to the VF */
        return i40e_vc_send_resp_to_vf(vf,
-                              config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP :
-                              I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
+                              config ? I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP :
+                              I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
                               aq_ret);
 }
 
@@ -2747,11 +2756,12 @@ error_param:
  * @vf_id: VF identifier
  * @vlan_id: mac address
  * @qos: priority setting
+ * @vlan_proto: vlan protocol
  *
  * program VF vlan id and/or qos
  **/
-int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
-                             int vf_id, u16 vlan_id, u8 qos)
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
+                             u16 vlan_id, u8 qos, __be16 vlan_proto)
 {
        u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT);
        struct i40e_netdev_priv *np = netdev_priv(netdev);
@@ -2774,6 +2784,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
                goto error_pvid;
        }
 
+       if (vlan_proto != htons(ETH_P_8021Q)) {
+               dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n");
+               ret = -EPROTONOSUPPORT;
+               goto error_pvid;
+       }
+
        vf = &(pf->vf[vf_id]);
        vsi = pf->vsi[vf->lan_vsi_idx];
        if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
index 8751741..4012d06 100644 (file)
@@ -129,8 +129,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
 
 /* VF configuration related iplink handlers */
 int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
-int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
-                             int vf_id, u16 vlan_id, u8 qos);
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
+                             u16 vlan_id, u8 qos, __be16 vlan_proto);
 int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
                       int max_tx_rate);
 int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting);
index 4db0c03..7953c13 100644 (file)
@@ -302,7 +302,6 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
                   void *buffer, u16 buf_len)
 {
        struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
-       u16 len = le16_to_cpu(aq_desc->datalen);
        u8 *buf = (u8 *)buffer;
        u16 i = 0;
 
@@ -326,6 +325,8 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
                   le32_to_cpu(aq_desc->params.external.addr_low));
 
        if ((buffer != NULL) && (aq_desc->datalen != 0)) {
+               u16 len = le16_to_cpu(aq_desc->datalen);
+
                i40e_debug(hw, mask, "AQ CMD Buffer:\n");
                if (buf_len < len)
                        len = buf_len;
index 0130458..75f2a2c 100644 (file)
@@ -51,7 +51,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
                                            struct i40e_tx_buffer *tx_buffer)
 {
        if (tx_buffer->skb) {
-               dev_kfree_skb_any(tx_buffer->skb);
+               if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
+                       kfree(tx_buffer->raw_buf);
+               else
+                       dev_kfree_skb_any(tx_buffer->skb);
                if (dma_unmap_len(tx_buffer, len))
                        dma_unmap_single(ring->dev,
                                         dma_unmap_addr(tx_buffer, dma),
@@ -64,9 +67,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
                               DMA_TO_DEVICE);
        }
 
-       if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
-               kfree(tx_buffer->raw_buf);
-
        tx_buffer->next_to_watch = NULL;
        tx_buffer->skb = NULL;
        dma_unmap_len_set(tx_buffer, len, 0);
@@ -103,8 +103,7 @@ void i40evf_clean_tx_ring(struct i40e_ring *tx_ring)
                return;
 
        /* cleanup Tx queue statistics */
-       netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
-                                                 tx_ring->queue_index));
+       netdev_tx_reset_queue(txring_txq(tx_ring));
 }
 
 /**
@@ -273,8 +272,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                        tx_ring->arm_wb = true;
        }
 
-       netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
-                                                     tx_ring->queue_index),
+       /* notify netdev of completed buffers */
+       netdev_tx_completed_queue(txring_txq(tx_ring),
                                  total_packets, total_bytes);
 
 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
@@ -1312,6 +1311,19 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_VFINT_DYN_CTLN1
+static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx)
+{
+       struct i40evf_adapter *adapter = vsi->back;
+
+       return !!(adapter->rx_rings[idx].rx_itr_setting);
+}
+
+static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx)
+{
+       struct i40evf_adapter *adapter = vsi->back;
+
+       return !!(adapter->tx_rings[idx].tx_itr_setting);
+}
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -1326,6 +1338,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
        bool rx = false, tx = false;
        u32 rxval, txval;
        int vector;
+       int idx = q_vector->v_idx;
+       int rx_itr_setting, tx_itr_setting;
 
        vector = (q_vector->v_idx + vsi->base_vector);
 
@@ -1334,18 +1348,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
         */
        rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
 
+       rx_itr_setting = get_rx_itr_enabled(vsi, idx);
+       tx_itr_setting = get_tx_itr_enabled(vsi, idx);
+
        if (q_vector->itr_countdown > 0 ||
-           (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) &&
-            !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) {
+           (!ITR_IS_DYNAMIC(rx_itr_setting) &&
+            !ITR_IS_DYNAMIC(tx_itr_setting))) {
                goto enable_int;
        }
 
-       if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
+       if (ITR_IS_DYNAMIC(rx_itr_setting)) {
                rx = i40e_set_new_dynamic_itr(&q_vector->rx);
                rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
        }
 
-       if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
+       if (ITR_IS_DYNAMIC(tx_itr_setting)) {
                tx = i40e_set_new_dynamic_itr(&q_vector->tx);
                txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
        }
@@ -1832,9 +1849,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
                return false;
 
        /* We need to walk through the list and validate that each group
-        * of 6 fragments totals at least gso_size.  However we don't need
-        * to perform such validation on the last 6 since the last 6 cannot
-        * inherit any data from a descriptor after them.
+        * of 6 fragments totals at least gso_size.
         */
        nr_frags -= I40E_MAX_BUFFER_TXD - 2;
        frag = &skb_shinfo(skb)->frags[0];
@@ -1865,8 +1880,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
                if (sum < 0)
                        return true;
 
-               /* use pre-decrement to avoid processing last fragment */
-               if (!--nr_frags)
+               if (!nr_frags--)
                        break;
 
                sum -= skb_frag_size(stale++);
@@ -2015,9 +2029,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
        tx_ring->next_to_use = i;
 
-       netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
-                                                tx_ring->queue_index),
-                                                first->bytecount);
+       netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
        i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
        /* Algorithm to optimize tail and RS bit setting:
@@ -2042,13 +2054,11 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
         * trigger a force WB.
         */
        if (skb->xmit_more  &&
-           !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
-                                                   tx_ring->queue_index))) {
+           !netif_xmit_stopped(txring_txq(tx_ring))) {
                tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
                tail_bump = false;
        } else if (!skb->xmit_more &&
-                  !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
-                                                      tx_ring->queue_index)) &&
+                  !netif_xmit_stopped(txring_txq(tx_ring)) &&
                   (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
                   (tx_ring->packet_stride < WB_STRIDE) &&
                   (desc_count < WB_STRIDE)) {
index 0112277..abcdeca 100644 (file)
@@ -287,6 +287,14 @@ struct i40e_ring {
        u8 dcb_tc;                      /* Traffic class of ring */
        u8 __iomem *tail;
 
+       /* high bit set means dynamic, use accessors routines to read/write.
+        * hardware only supports 2us resolution for the ITR registers.
+        * these values always store the USER setting, and must be converted
+        * before programming to a register.
+        */
+       u16 rx_itr_setting;
+       u16 tx_itr_setting;
+
        u16 count;                      /* Number of descriptors */
        u16 reg_idx;                    /* HW register index of the ring */
        u16 rx_buf_len;
@@ -445,4 +453,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype)
        return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) &&
               (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER);
 }
+
+/**
+ * txring_txq - Find the netdev Tx ring based on the i40e Tx ring
+ * @ring: Tx ring to find the netdev equivalent of
+ **/
+static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring)
+{
+       return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
 #endif /* _I40E_TXRX_H_ */
index f04ce6c..bd691ad 100644 (file)
@@ -160,6 +160,7 @@ struct i40e_virtchnl_vsi_resource {
 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING    0x00020000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF                0X00080000
+#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM    0X00100000
 
 struct i40e_virtchnl_vf_resource {
        u16 num_vsis;
index dc00aaf..c5fd724 100644 (file)
@@ -59,13 +59,6 @@ struct i40e_vsi {
        unsigned long state;
        int base_vector;
        u16 work_limit;
-       /* high bit set means dynamic, use accessor routines to read/write.
-        * hardware only supports 2us resolution for the ITR registers.
-        * these values always store the USER setting, and must be converted
-        * before programming to a register.
-        */
-       u16 rx_itr_setting;
-       u16 tx_itr_setting;
        u16 qs_handle;
 };
 
index e17a154..a994015 100644 (file)
@@ -296,92 +296,206 @@ static int i40evf_set_ringparam(struct net_device *netdev,
 }
 
 /**
- * i40evf_get_coalesce - Get interrupt coalescing settings
- * @netdev: network interface device structure
- * @ec: ethtool coalesce structure
+ * __i40evf_get_coalesce - get per-queue coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ * @queue: which queue to pick
  *
- * Returns current coalescing settings. This is referred to elsewhere in the
- * driver as Interrupt Throttle Rate, as this is how the hardware describes
- * this functionality.
+ * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs
+ * are per queue. If queue is <0 then we default to queue 0 as the
+ * representative value.
  **/
-static int i40evf_get_coalesce(struct net_device *netdev,
-                              struct ethtool_coalesce *ec)
+static int __i40evf_get_coalesce(struct net_device *netdev,
+                                struct ethtool_coalesce *ec,
+                                int queue)
 {
        struct i40evf_adapter *adapter = netdev_priv(netdev);
        struct i40e_vsi *vsi = &adapter->vsi;
+       struct i40e_ring *rx_ring, *tx_ring;
 
        ec->tx_max_coalesced_frames = vsi->work_limit;
        ec->rx_max_coalesced_frames = vsi->work_limit;
 
-       if (ITR_IS_DYNAMIC(vsi->rx_itr_setting))
+       /* Rx and Tx usecs per queue value. If user doesn't specify the
+        * queue, return queue 0's value to represent.
+        */
+       if (queue < 0)
+               queue = 0;
+       else if (queue >= adapter->num_active_queues)
+               return -EINVAL;
+
+       rx_ring = &adapter->rx_rings[queue];
+       tx_ring = &adapter->tx_rings[queue];
+
+       if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
                ec->use_adaptive_rx_coalesce = 1;
 
-       if (ITR_IS_DYNAMIC(vsi->tx_itr_setting))
+       if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
                ec->use_adaptive_tx_coalesce = 1;
 
-       ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-       ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+       ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
+       ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
 
        return 0;
 }
 
 /**
- * i40evf_set_coalesce - Set interrupt coalescing settings
+ * i40evf_get_coalesce - Get interrupt coalescing settings
  * @netdev: network interface device structure
  * @ec: ethtool coalesce structure
  *
- * Change current coalescing settings.
+ * Returns current coalescing settings. This is referred to elsewhere in the
+ * driver as Interrupt Throttle Rate, as this is how the hardware describes
+ * this functionality. Note that if per-queue settings have been modified this
+ * only represents the settings of queue 0.
  **/
-static int i40evf_set_coalesce(struct net_device *netdev,
+static int i40evf_get_coalesce(struct net_device *netdev,
                               struct ethtool_coalesce *ec)
 {
-       struct i40evf_adapter *adapter = netdev_priv(netdev);
-       struct i40e_hw *hw = &adapter->hw;
+       return __i40evf_get_coalesce(netdev, ec, -1);
+}
+
+/**
+ * i40evf_get_per_queue_coalesce - get coalesce values for specific queue
+ * @netdev: netdev to read
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to read
+ *
+ * Read specific queue's coalesce settings.
+ **/
+static int i40evf_get_per_queue_coalesce(struct net_device *netdev,
+                                        u32 queue,
+                                        struct ethtool_coalesce *ec)
+{
+       return __i40evf_get_coalesce(netdev, ec, queue);
+}
+
+/**
+ * i40evf_set_itr_per_queue - set ITR values for specific queue
+ * @vsi: the VSI to set values for
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to modify
+ *
+ * Change the ITR settings for a specific queue.
+ **/
+static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
+                                    struct ethtool_coalesce *ec,
+                                    int queue)
+{
        struct i40e_vsi *vsi = &adapter->vsi;
+       struct i40e_hw *hw = &adapter->hw;
        struct i40e_q_vector *q_vector;
+       u16 vector;
+
+       adapter->rx_rings[queue].rx_itr_setting = ec->rx_coalesce_usecs;
+       adapter->tx_rings[queue].tx_itr_setting = ec->tx_coalesce_usecs;
+
+       if (ec->use_adaptive_rx_coalesce)
+               adapter->rx_rings[queue].rx_itr_setting |= I40E_ITR_DYNAMIC;
+       else
+               adapter->rx_rings[queue].rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+
+       if (ec->use_adaptive_tx_coalesce)
+               adapter->tx_rings[queue].tx_itr_setting |= I40E_ITR_DYNAMIC;
+       else
+               adapter->tx_rings[queue].tx_itr_setting &= ~I40E_ITR_DYNAMIC;
+
+       q_vector = adapter->rx_rings[queue].q_vector;
+       q_vector->rx.itr = ITR_TO_REG(adapter->rx_rings[queue].rx_itr_setting);
+       vector = vsi->base_vector + q_vector->v_idx;
+       wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+
+       q_vector = adapter->tx_rings[queue].q_vector;
+       q_vector->tx.itr = ITR_TO_REG(adapter->tx_rings[queue].tx_itr_setting);
+       vector = vsi->base_vector + q_vector->v_idx;
+       wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+
+       i40e_flush(hw);
+}
+
+/**
+ * __i40evf_set_coalesce - set coalesce settings for particular queue
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the coalesce settings for a particular queue.
+ **/
+static int __i40evf_set_coalesce(struct net_device *netdev,
+                                struct ethtool_coalesce *ec,
+                                int queue)
+{
+       struct i40evf_adapter *adapter = netdev_priv(netdev);
+       struct i40e_vsi *vsi = &adapter->vsi;
        int i;
 
        if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
                vsi->work_limit = ec->tx_max_coalesced_frames_irq;
 
-       if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
-           (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
-               vsi->rx_itr_setting = ec->rx_coalesce_usecs;
-
-       else
+       if (ec->rx_coalesce_usecs == 0) {
+               if (ec->use_adaptive_rx_coalesce)
+                       netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
+       } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
+                  (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+               netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
                return -EINVAL;
+       }
 
-       if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
-           (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
-               vsi->tx_itr_setting = ec->tx_coalesce_usecs;
-       else if (ec->use_adaptive_tx_coalesce)
-               vsi->tx_itr_setting = (I40E_ITR_DYNAMIC |
-                                      ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
        else
+       if (ec->tx_coalesce_usecs == 0) {
+               if (ec->use_adaptive_tx_coalesce)
+                       netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
+       } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
+                  (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+               netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
                return -EINVAL;
+       }
 
-       if (ec->use_adaptive_rx_coalesce)
-               vsi->rx_itr_setting |= I40E_ITR_DYNAMIC;
-       else
-               vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
-
-       if (ec->use_adaptive_tx_coalesce)
-               vsi->tx_itr_setting |= I40E_ITR_DYNAMIC;
-       else
-               vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
-
-       for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) {
-               q_vector = &adapter->q_vectors[i];
-               q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
-               wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr);
-               q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
-               wr32(hw, I40E_VFINT_ITRN1(1, i), q_vector->tx.itr);
-               i40e_flush(hw);
+       /* Rx and Tx usecs has per queue value. If user doesn't specify the
+        * queue, apply to all queues.
+        */
+       if (queue < 0) {
+               for (i = 0; i < adapter->num_active_queues; i++)
+                       i40evf_set_itr_per_queue(adapter, ec, i);
+       } else if (queue < adapter->num_active_queues) {
+               i40evf_set_itr_per_queue(adapter, ec, queue);
+       } else {
+               netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
+                          adapter->num_active_queues - 1);
+               return -EINVAL;
        }
 
        return 0;
 }
 
+/**
+ * i40evf_set_coalesce - Set interrupt coalescing settings
+ * @netdev: network interface device structure
+ * @ec: ethtool coalesce structure
+ *
+ * Change current coalescing settings for every queue.
+ **/
+static int i40evf_set_coalesce(struct net_device *netdev,
+                              struct ethtool_coalesce *ec)
+{
+       return __i40evf_set_coalesce(netdev, ec, -1);
+}
+
+/**
+ * i40evf_set_per_queue_coalesce - set specific queue's coalesce settings
+ * @netdev: the netdev to change
+ * @ec: ethtool's coalesce settings
+ * @queue: the queue to modify
+ *
+ * Modifies a specific queue's coalesce settings.
+ */
+static int i40evf_set_per_queue_coalesce(struct net_device *netdev,
+                                        u32 queue,
+                                        struct ethtool_coalesce *ec)
+{
+       return __i40evf_set_coalesce(netdev, ec, queue);
+}
+
 /**
  * i40evf_get_rxnfc - command to get RX flow classification rules
  * @netdev: network interface device structure
@@ -533,6 +647,8 @@ static const struct ethtool_ops i40evf_ethtool_ops = {
        .set_msglevel           = i40evf_set_msglevel,
        .get_coalesce           = i40evf_get_coalesce,
        .set_coalesce           = i40evf_set_coalesce,
+       .get_per_queue_coalesce = i40evf_get_per_queue_coalesce,
+       .set_per_queue_coalesce = i40evf_set_per_queue_coalesce,
        .get_rxnfc              = i40evf_get_rxnfc,
        .get_rxfh_indir_size    = i40evf_get_rxfh_indir_size,
        .get_rxfh               = i40evf_get_rxfh,
index f751f7b..1437281 100644 (file)
@@ -38,7 +38,7 @@ static const char i40evf_driver_string[] =
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 12
+#define DRV_VERSION_BUILD 16
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD) \
@@ -370,6 +370,7 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
 {
        struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx];
        struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx];
+       struct i40e_hw *hw = &adapter->hw;
 
        rx_ring->q_vector = q_vector;
        rx_ring->next = q_vector->rx.ring;
@@ -377,7 +378,10 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
        q_vector->rx.ring = rx_ring;
        q_vector->rx.count++;
        q_vector->rx.latency_range = I40E_LOW_LATENCY;
+       q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
+       q_vector->ring_mask |= BIT(r_idx);
        q_vector->itr_countdown = ITR_COUNTDOWN_START;
+       wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr);
 }
 
 /**
@@ -391,6 +395,7 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
 {
        struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx];
        struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx];
+       struct i40e_hw *hw = &adapter->hw;
 
        tx_ring->q_vector = q_vector;
        tx_ring->next = q_vector->tx.ring;
@@ -398,9 +403,10 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
        q_vector->tx.ring = tx_ring;
        q_vector->tx.count++;
        q_vector->tx.latency_range = I40E_LOW_LATENCY;
+       q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
        q_vector->itr_countdown = ITR_COUNTDOWN_START;
        q_vector->num_ringpairs++;
-       q_vector->ring_mask |= BIT(t_idx);
+       wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr);
 }
 
 /**
@@ -1007,7 +1013,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter)
  * i40evf_up_complete - Finish the last steps of bringing up a connection
  * @adapter: board private structure
  **/
-static int i40evf_up_complete(struct i40evf_adapter *adapter)
+static void i40evf_up_complete(struct i40evf_adapter *adapter)
 {
        adapter->state = __I40EVF_RUNNING;
        clear_bit(__I40E_DOWN, &adapter->vsi.state);
@@ -1016,7 +1022,6 @@ static int i40evf_up_complete(struct i40evf_adapter *adapter)
 
        adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_QUEUES;
        mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
-       return 0;
 }
 
 /**
@@ -1037,6 +1042,7 @@ void i40evf_down(struct i40evf_adapter *adapter)
 
        netif_carrier_off(netdev);
        netif_tx_disable(netdev);
+       adapter->link_up = false;
        i40evf_napi_disable_all(adapter);
        i40evf_irq_disable(adapter);
 
@@ -1154,6 +1160,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
                tx_ring->netdev = adapter->netdev;
                tx_ring->dev = &adapter->pdev->dev;
                tx_ring->count = adapter->tx_desc_count;
+               tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF);
                if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE)
                        tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
 
@@ -1162,6 +1169,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
                rx_ring->netdev = adapter->netdev;
                rx_ring->dev = &adapter->pdev->dev;
                rx_ring->count = adapter->rx_desc_count;
+               rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF);
        }
 
        return 0;
@@ -1731,6 +1739,7 @@ static void i40evf_reset_task(struct work_struct *work)
                        set_bit(__I40E_DOWN, &adapter->vsi.state);
                        netif_carrier_off(netdev);
                        netif_tx_disable(netdev);
+                       adapter->link_up = false;
                        i40evf_napi_disable_all(adapter);
                        i40evf_irq_disable(adapter);
                        i40evf_free_traffic_irqs(adapter);
@@ -1769,6 +1778,7 @@ continue_reset:
        if (netif_running(adapter->netdev)) {
                netif_carrier_off(netdev);
                netif_tx_stop_all_queues(netdev);
+               adapter->link_up = false;
                i40evf_napi_disable_all(adapter);
        }
        i40evf_irq_disable(adapter);
@@ -1783,8 +1793,7 @@ continue_reset:
        i40evf_free_all_tx_resources(adapter);
 
        /* kill and reinit the admin queue */
-       if (i40evf_shutdown_adminq(hw))
-               dev_warn(&adapter->pdev->dev, "Failed to shut down adminq\n");
+       i40evf_shutdown_adminq(hw);
        adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
        err = i40evf_init_adminq(hw);
        if (err)
@@ -1824,9 +1833,7 @@ continue_reset:
 
                i40evf_configure(adapter);
 
-               err = i40evf_up_complete(adapter);
-               if (err)
-                       goto reset_err;
+               i40evf_up_complete(adapter);
 
                i40evf_irq_enable(adapter, true);
        } else {
@@ -2056,9 +2063,7 @@ static int i40evf_open(struct net_device *netdev)
        i40evf_add_filter(adapter, adapter->hw.mac.addr);
        i40evf_configure(adapter);
 
-       err = i40evf_up_complete(adapter);
-       if (err)
-               goto err_req_irq;
+       i40evf_up_complete(adapter);
 
        i40evf_irq_enable(adapter, true);
 
@@ -2272,10 +2277,6 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
        adapter->vsi.back = adapter;
        adapter->vsi.base_vector = 1;
        adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK;
-       adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC |
-                                      ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
-       adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC |
-                                      ITR_REG_TO_USEC(I40E_ITR_TX_DEF));
        vsi->netdev = adapter->netdev;
        vsi->qs_handle = adapter->vsi_res->qset_handle;
        if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) {
@@ -2457,6 +2458,7 @@ static void i40evf_init_task(struct work_struct *work)
                goto err_sw_init;
 
        netif_carrier_off(netdev);
+       adapter->link_up = false;
 
        if (!adapter->netdev_registered) {
                err = register_netdev(netdev);
index cc6cb30..ddf478d 100644 (file)
@@ -898,8 +898,14 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
                            vpe->event_data.link_event.link_status) {
                                adapter->link_up =
                                        vpe->event_data.link_event.link_status;
+                               if (adapter->link_up) {
+                                       netif_tx_start_all_queues(netdev);
+                                       netif_carrier_on(netdev);
+                               } else {
+                                       netif_tx_stop_all_queues(netdev);
+                                       netif_carrier_off(netdev);
+                               }
                                i40evf_print_link_message(adapter);
-                               netif_tx_stop_all_queues(netdev);
                        }
                        break;
                case I40E_VIRTCHNL_EVENT_RESET_IMPENDING:
@@ -974,8 +980,6 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
        case I40E_VIRTCHNL_OP_ENABLE_QUEUES:
                /* enable transmits */
                i40evf_irq_enable(adapter, true);
-               netif_tx_start_all_queues(adapter->netdev);
-               netif_carrier_on(adapter->netdev);
                break;
        case I40E_VIRTCHNL_OP_DISABLE_QUEUES:
                i40evf_free_all_tx_resources(adapter);
index af75eac..a83aa13 100644 (file)
@@ -169,7 +169,7 @@ static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
-                              int vf, u16 vlan, u8 qos);
+                              int vf, u16 vlan, u8 qos, __be16 vlan_proto);
 static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
                                   bool setting);
@@ -6222,14 +6222,17 @@ static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf)
        return 0;
 }
 
-static int igb_ndo_set_vf_vlan(struct net_device *netdev,
-                              int vf, u16 vlan, u8 qos)
+static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf,
+                              u16 vlan, u8 qos, __be16 vlan_proto)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
 
        if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
                return -EINVAL;
 
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+
        return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) :
                               igb_disable_port_vlan(adapter, vf);
 }
index 9547191..f49f803 100644 (file)
@@ -313,6 +313,25 @@ static int ixgbe_get_settings(struct net_device *netdev,
                break;
        }
 
+       /* Indicate pause support */
+       ecmd->supported |= SUPPORTED_Pause;
+
+       switch (hw->fc.requested_mode) {
+       case ixgbe_fc_full:
+               ecmd->advertising |= ADVERTISED_Pause;
+               break;
+       case ixgbe_fc_rx_pause:
+               ecmd->advertising |= ADVERTISED_Pause |
+                                    ADVERTISED_Asym_Pause;
+               break;
+       case ixgbe_fc_tx_pause:
+               ecmd->advertising |= ADVERTISED_Asym_Pause;
+               break;
+       default:
+               ecmd->advertising &= ~(ADVERTISED_Pause |
+                                      ADVERTISED_Asym_Pause);
+       }
+
        if (netif_carrier_ok(netdev)) {
                switch (adapter->link_speed) {
                case IXGBE_LINK_SPEED_10GB_FULL:
@@ -2928,9 +2947,13 @@ static u32 ixgbe_rss_indir_size(struct net_device *netdev)
 static void ixgbe_get_reta(struct ixgbe_adapter *adapter, u32 *indir)
 {
        int i, reta_size = ixgbe_rss_indir_tbl_entries(adapter);
+       u16 rss_m = adapter->ring_feature[RING_F_RSS].mask;
+
+       if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+               rss_m = adapter->ring_feature[RING_F_RSS].indices - 1;
 
        for (i = 0; i < reta_size; i++)
-               indir[i] = adapter->rss_indir_tbl[i];
+               indir[i] = adapter->rss_indir_tbl[i] & rss_m;
 }
 
 static int ixgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
@@ -3041,8 +3064,8 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter)
                /* We only support one q_vector without MSI-X */
                max_combined = 1;
        } else if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
-               /* SR-IOV currently only allows one queue on the PF */
-               max_combined = 1;
+               /* Limit value based on the queue mask */
+               max_combined = adapter->ring_feature[RING_F_RSS].mask + 1;
        } else if (tcs > 1) {
                /* For DCB report channels per traffic class */
                if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
index bcdc884..15ab337 100644 (file)
@@ -515,15 +515,16 @@ static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter)
        vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i);
 
        /* 64 pool mode with 2 queues per pool */
-       if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) {
+       if ((vmdq_i > 32) || (vmdq_i > 16 && pools)) {
                vmdq_m = IXGBE_82599_VMDQ_2Q_MASK;
                rss_m = IXGBE_RSS_2Q_MASK;
                rss_i = min_t(u16, rss_i, 2);
-       /* 32 pool mode with 4 queues per pool */
+       /* 32 pool mode with up to 4 queues per pool */
        } else {
                vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
                rss_m = IXGBE_RSS_4Q_MASK;
-               rss_i = 4;
+               /* We can support 4, 2, or 1 queues */
+               rss_i = (rss_i > 3) ? 4 : (rss_i > 1) ? 2 : 1;
        }
 
 #ifdef IXGBE_FCOE
index d76bc1a..a244d9a 100644 (file)
@@ -3248,7 +3248,8 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
                        mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
                else if (tcs > 1)
                        mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
-               else if (adapter->ring_feature[RING_F_RSS].indices == 4)
+               else if (adapter->ring_feature[RING_F_VMDQ].mask ==
+                        IXGBE_82599_VMDQ_4Q_MASK)
                        mtqc |= IXGBE_MTQC_32VF;
                else
                        mtqc |= IXGBE_MTQC_64VF;
@@ -3475,12 +3476,12 @@ static void ixgbe_setup_reta(struct ixgbe_adapter *adapter)
        u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
        u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
 
-       /* Program table for at least 2 queues w/ SR-IOV so that VFs can
+       /* Program table for at least 4 queues w/ SR-IOV so that VFs can
         * make full use of any rings they may have.  We will use the
         * PSRTYPE register to control how many rings we use within the PF.
         */
-       if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 2))
-               rss_i = 2;
+       if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 4))
+               rss_i = 4;
 
        /* Fill out hash function seeds */
        for (i = 0; i < 10; i++)
@@ -3544,7 +3545,8 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
                                mrqc = IXGBE_MRQC_VMDQRT8TCEN;  /* 8 TCs */
                        else if (tcs > 1)
                                mrqc = IXGBE_MRQC_VMDQRT4TCEN;  /* 4 TCs */
-                       else if (adapter->ring_feature[RING_F_RSS].indices == 4)
+                       else if (adapter->ring_feature[RING_F_VMDQ].mask ==
+                                IXGBE_82599_VMDQ_4Q_MASK)
                                mrqc = IXGBE_MRQC_VMDQRSS32EN;
                        else
                                mrqc = IXGBE_MRQC_VMDQRSS64EN;
@@ -4105,23 +4107,20 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
 
        vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
 
-       switch (hw->mac.type) {
-       case ixgbe_mac_82599EB:
-       case ixgbe_mac_X540:
-       case ixgbe_mac_X550:
-       case ixgbe_mac_X550EM_x:
-       case ixgbe_mac_x550em_a:
-       default:
-               if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
-                       break;
-               /* fall through */
-       case ixgbe_mac_82598EB:
-               /* legacy case, we can just disable VLAN filtering */
+       if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
+       /* For VMDq and SR-IOV we must leave VLAN filtering enabled */
+               vlnctrl |= IXGBE_VLNCTRL_VFE;
+               IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+       } else {
                vlnctrl &= ~IXGBE_VLNCTRL_VFE;
                IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
                return;
        }
 
+       /* Nothing to do for 82598 */
+       if (hw->mac.type == ixgbe_mac_82598EB)
+               return;
+
        /* We are already in VLAN promisc, nothing to do */
        if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)
                return;
@@ -4129,10 +4128,6 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
        /* Set flag so we don't redo unnecessary work */
        adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC;
 
-       /* For VMDq and SR-IOV we must leave VLAN filtering enabled */
-       vlnctrl |= IXGBE_VLNCTRL_VFE;
-       IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
-
        /* Add PF to all active pools */
        for (i = IXGBE_VLVF_ENTRIES; --i;) {
                u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32);
@@ -4204,19 +4199,9 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
        vlnctrl |= IXGBE_VLNCTRL_VFE;
        IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 
-       switch (hw->mac.type) {
-       case ixgbe_mac_82599EB:
-       case ixgbe_mac_X540:
-       case ixgbe_mac_X550:
-       case ixgbe_mac_X550EM_x:
-       case ixgbe_mac_x550em_a:
-       default:
-               if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
-                       break;
-               /* fall through */
-       case ixgbe_mac_82598EB:
+       if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) ||
+           hw->mac.type == ixgbe_mac_82598EB)
                return;
-       }
 
        /* We are not in VLAN promisc, nothing to do */
        if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
index db0731e..021ab9b 100644 (file)
@@ -346,8 +346,8 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
                                return 0;
                        }
                }
-               /* clear value if nothing found */
-               hw->phy.mdio.prtad = 0;
+               /* indicate no PHY found */
+               hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
                return IXGBE_ERR_PHY_ADDR_INVALID;
        }
        return 0;
index 8618599..7e5d985 100644 (file)
@@ -329,13 +329,15 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs)
        for (i = 0; i < adapter->num_vfs; i++)
                ixgbe_vf_configuration(dev, (i | 0x10000000));
 
+       /* reset before enabling SRIOV to avoid mailbox issues */
+       ixgbe_sriov_reinit(adapter);
+
        err = pci_enable_sriov(dev, num_vfs);
        if (err) {
                e_dev_warn("Failed to enable PCI sriov: %d\n", err);
                return err;
        }
        ixgbe_get_vfs(adapter);
-       ixgbe_sriov_reinit(adapter);
 
        return num_vfs;
 #else
@@ -1354,13 +1356,16 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf)
        return err;
 }
 
-int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
+int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+                         u8 qos, __be16 vlan_proto)
 {
        int err = 0;
        struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
        if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7))
                return -EINVAL;
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
        if (vlan || qos) {
                /* Check if there is already a port VLAN set, if so
                 * we have to delete the old one first before we
index 47e65e2..0c7977d 100644 (file)
@@ -43,7 +43,7 @@ void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
 void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
 int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
-                          u8 qos);
+                          u8 qos, __be16 vlan_proto);
 int ixgbe_link_mbps(struct ixgbe_adapter *adapter);
 int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
                        int max_tx_rate);
index e092a89..7e6b926 100644 (file)
@@ -1459,7 +1459,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
        /* Configure internal PHY for KR/KX. */
        ixgbe_setup_kr_speed_x550em(hw, speed);
 
-       if (!hw->phy.mdio.prtad || hw->phy.mdio.prtad == 0xFFFF)
+       if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE)
                return IXGBE_ERR_PHY_ADDR_INVALID;
 
        /* Get external PHY device id */
@@ -2125,7 +2125,7 @@ static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw)
  *  @hw: pointer to hardware structure
  *  @led_idx: led number to turn on
  **/
-s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
 {
        u16 phy_data;
 
@@ -2147,7 +2147,7 @@ s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
  *  @hw: pointer to hardware structure
  *  @led_idx: led number to turn off
  **/
-s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
 {
        u16 phy_data;
 
@@ -3049,6 +3049,8 @@ static const struct ixgbe_phy_operations phy_ops_x550em_a = {
        .identify               = &ixgbe_identify_phy_x550em,
        .read_reg               = &ixgbe_read_phy_reg_x550a,
        .write_reg              = &ixgbe_write_phy_reg_x550a,
+       .read_reg_mdi           = &ixgbe_read_phy_reg_mdi,
+       .write_reg_mdi          = &ixgbe_write_phy_reg_mdi,
 };
 
 static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = {
index 4044608..7eaac32 100644 (file)
@@ -1810,8 +1810,10 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
        if (hw->mac.type >= ixgbe_mac_X550_vf)
                ixgbevf_setup_vfmrqc(adapter);
 
+       spin_lock_bh(&adapter->mbx_lock);
        /* notify the PF of our intent to use this size of frame */
        ret = hw->mac.ops.set_rlpml(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
+       spin_unlock_bh(&adapter->mbx_lock);
        if (ret)
                dev_err(&adapter->pdev->dev,
                        "Failed to set MTU at %d\n", netdev->mtu);
@@ -3758,8 +3760,10 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
        if ((new_mtu < 68) || (max_frame > max_possible_frame))
                return -EINVAL;
 
+       spin_lock_bh(&adapter->mbx_lock);
        /* notify the PF of our intent to use this size of frame */
        ret = hw->mac.ops.set_rlpml(hw, max_frame);
+       spin_unlock_bh(&adapter->mbx_lock);
        if (ret)
                return -EINVAL;
 
index 2909372..4cc50c0 100644 (file)
@@ -52,7 +52,7 @@ static const struct mtk_ethtool_stats {
 };
 
 static const char * const mtk_clks_source_name[] = {
-       "ethif", "esw", "gp1", "gp2"
+       "ethif", "esw", "gp1", "gp2", "trgpll"
 };
 
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
@@ -135,6 +135,33 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg)
        return _mtk_mdio_read(eth, phy_addr, phy_reg);
 }
 
+static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed)
+{
+       u32 val;
+       int ret;
+
+       val = (speed == SPEED_1000) ?
+               INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100;
+       mtk_w32(eth, val, INTF_MODE);
+
+       regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0,
+                          ETHSYS_TRGMII_CLK_SEL362_5,
+                          ETHSYS_TRGMII_CLK_SEL362_5);
+
+       val = (speed == SPEED_1000) ? 250000000 : 500000000;
+       ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val);
+       if (ret)
+               dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret);
+
+       val = (speed == SPEED_1000) ?
+               RCK_CTRL_RGMII_1000 : RCK_CTRL_RGMII_10_100;
+       mtk_w32(eth, val, TRGMII_RCK_CTRL);
+
+       val = (speed == SPEED_1000) ?
+               TCK_CTRL_RGMII_1000 : TCK_CTRL_RGMII_10_100;
+       mtk_w32(eth, val, TRGMII_TCK_CTRL);
+}
+
 static void mtk_phy_link_adjust(struct net_device *dev)
 {
        struct mtk_mac *mac = netdev_priv(dev);
@@ -148,7 +175,7 @@ static void mtk_phy_link_adjust(struct net_device *dev)
        if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
                return;
 
-       switch (mac->phy_dev->speed) {
+       switch (dev->phydev->speed) {
        case SPEED_1000:
                mcr |= MAC_MCR_SPEED_1000;
                break;
@@ -157,20 +184,23 @@ static void mtk_phy_link_adjust(struct net_device *dev)
                break;
        };
 
-       if (mac->phy_dev->link)
+       if (mac->id == 0 && !mac->trgmii)
+               mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed);
+
+       if (dev->phydev->link)
                mcr |= MAC_MCR_FORCE_LINK;
 
-       if (mac->phy_dev->duplex) {
+       if (dev->phydev->duplex) {
                mcr |= MAC_MCR_FORCE_DPX;
 
-               if (mac->phy_dev->pause)
+               if (dev->phydev->pause)
                        rmt_adv = LPA_PAUSE_CAP;
-               if (mac->phy_dev->asym_pause)
+               if (dev->phydev->asym_pause)
                        rmt_adv |= LPA_PAUSE_ASYM;
 
-               if (mac->phy_dev->advertising & ADVERTISED_Pause)
+               if (dev->phydev->advertising & ADVERTISED_Pause)
                        lcl_adv |= ADVERTISE_PAUSE_CAP;
-               if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause)
+               if (dev->phydev->advertising & ADVERTISED_Asym_Pause)
                        lcl_adv |= ADVERTISE_PAUSE_ASYM;
 
                flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
@@ -187,7 +217,7 @@ static void mtk_phy_link_adjust(struct net_device *dev)
 
        mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
 
-       if (mac->phy_dev->link)
+       if (dev->phydev->link)
                netif_carrier_on(dev);
        else
                netif_carrier_off(dev);
@@ -196,17 +226,9 @@ static void mtk_phy_link_adjust(struct net_device *dev)
 static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac,
                                struct device_node *phy_node)
 {
-       const __be32 *_addr = NULL;
        struct phy_device *phydev;
-       int phy_mode, addr;
-
-       _addr = of_get_property(phy_node, "reg", NULL);
+       int phy_mode;
 
-       if (!_addr || (be32_to_cpu(*_addr) >= 0x20)) {
-               pr_err("%s: invalid phy address\n", phy_node->name);
-               return -EINVAL;
-       }
-       addr = be32_to_cpu(*_addr);
        phy_mode = of_get_phy_mode(phy_node);
        if (phy_mode < 0) {
                dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode);
@@ -225,17 +247,17 @@ static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac,
                 mac->id, phydev_name(phydev), phydev->phy_id,
                 phydev->drv->name);
 
-       mac->phy_dev = phydev;
-
        return 0;
 }
 
-static int mtk_phy_connect(struct mtk_mac *mac)
+static int mtk_phy_connect(struct net_device *dev)
 {
-       struct mtk_eth *eth = mac->hw;
+       struct mtk_mac *mac = netdev_priv(dev);
+       struct mtk_eth *eth;
        struct device_node *np;
        u32 val;
 
+       eth = mac->hw;
        np = of_parse_phandle(mac->of_node, "phy-handle", 0);
        if (!np && of_phy_is_fixed_link(mac->of_node))
                if (!of_phy_register_fixed_link(mac->of_node))
@@ -244,6 +266,8 @@ static int mtk_phy_connect(struct mtk_mac *mac)
                return -ENODEV;
 
        switch (of_get_phy_mode(np)) {
+       case PHY_INTERFACE_MODE_TRGMII:
+               mac->trgmii = true;
        case PHY_INTERFACE_MODE_RGMII_TXID:
        case PHY_INTERFACE_MODE_RGMII_RXID:
        case PHY_INTERFACE_MODE_RGMII_ID:
@@ -271,20 +295,23 @@ static int mtk_phy_connect(struct mtk_mac *mac)
        val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id);
        regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
 
-       mtk_phy_connect_node(eth, mac, np);
-       mac->phy_dev->autoneg = AUTONEG_ENABLE;
-       mac->phy_dev->speed = 0;
-       mac->phy_dev->duplex = 0;
+       /* couple phydev to net_device */
+       if (mtk_phy_connect_node(eth, mac, np))
+               goto err_phy;
+
+       dev->phydev->autoneg = AUTONEG_ENABLE;
+       dev->phydev->speed = 0;
+       dev->phydev->duplex = 0;
 
        if (of_phy_is_fixed_link(mac->of_node))
-               mac->phy_dev->supported |=
+               dev->phydev->supported |=
                SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 
-       mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
+       dev->phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
                                   SUPPORTED_Asym_Pause;
-       mac->phy_dev->advertising = mac->phy_dev->supported |
+       dev->phydev->advertising = dev->phydev->supported |
                                    ADVERTISED_Autoneg;
-       phy_start_aneg(mac->phy_dev);
+       phy_start_aneg(dev->phydev);
 
        of_node_put(np);
 
@@ -292,7 +319,7 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 
 err_phy:
        of_node_put(np);
-       dev_err(eth->dev, "invalid phy_mode\n");
+       dev_err(eth->dev, "%s: invalid phy\n", __func__);
        return -EINVAL;
 }
 
@@ -1710,7 +1737,7 @@ static int mtk_open(struct net_device *dev)
        }
        atomic_inc(&eth->dma_refcnt);
 
-       phy_start(mac->phy_dev);
+       phy_start(dev->phydev);
        netif_start_queue(dev);
 
        return 0;
@@ -1745,7 +1772,7 @@ static int mtk_stop(struct net_device *dev)
        struct mtk_eth *eth = mac->hw;
 
        netif_tx_disable(dev);
-       phy_stop(mac->phy_dev);
+       phy_stop(dev->phydev);
 
        /* only shutdown DMA if this is the last user */
        if (!atomic_dec_and_test(&eth->dma_refcnt))
@@ -1885,7 +1912,7 @@ static int __init mtk_init(struct net_device *dev)
                dev->addr_assign_type = NET_ADDR_RANDOM;
        }
 
-       return mtk_phy_connect(mac);
+       return mtk_phy_connect(dev);
 }
 
 static void mtk_uninit(struct net_device *dev)
@@ -1893,20 +1920,18 @@ static void mtk_uninit(struct net_device *dev)
        struct mtk_mac *mac = netdev_priv(dev);
        struct mtk_eth *eth = mac->hw;
 
-       phy_disconnect(mac->phy_dev);
+       phy_disconnect(dev->phydev);
        mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
        mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
 }
 
 static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-       struct mtk_mac *mac = netdev_priv(dev);
-
        switch (cmd) {
        case SIOCGMIIPHY:
        case SIOCGMIIREG:
        case SIOCSMIIREG:
-               return phy_mii_ioctl(mac->phy_dev, ifr, cmd);
+               return phy_mii_ioctl(dev->phydev, ifr, cmd);
        default:
                break;
        }
@@ -1951,7 +1976,7 @@ static void mtk_pending_work(struct work_struct *work)
                if (!eth->mac[i] ||
                    of_phy_is_fixed_link(eth->mac[i]->of_node))
                        continue;
-               err = phy_init_hw(eth->mac[i]->phy_dev);
+               err = phy_init_hw(eth->netdev[i]->phydev);
                if (err)
                        dev_err(eth->dev, "%s: PHY init failed.\n",
                                eth->netdev[i]->name);
@@ -2011,35 +2036,26 @@ static int mtk_cleanup(struct mtk_eth *eth)
        return 0;
 }
 
-static int mtk_get_settings(struct net_device *dev,
-                           struct ethtool_cmd *cmd)
+int mtk_get_link_ksettings(struct net_device *ndev,
+                          struct ethtool_link_ksettings *cmd)
 {
-       struct mtk_mac *mac = netdev_priv(dev);
-       int err;
+       struct mtk_mac *mac = netdev_priv(ndev);
 
        if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
                return -EBUSY;
 
-       err = phy_read_status(mac->phy_dev);
-       if (err)
-               return -ENODEV;
-
-       return phy_ethtool_gset(mac->phy_dev, cmd);
+       return phy_ethtool_ksettings_get(ndev->phydev, cmd);
 }
 
-static int mtk_set_settings(struct net_device *dev,
-                           struct ethtool_cmd *cmd)
+int mtk_set_link_ksettings(struct net_device *ndev,
+                          const struct ethtool_link_ksettings *cmd)
 {
-       struct mtk_mac *mac = netdev_priv(dev);
+       struct mtk_mac *mac = netdev_priv(ndev);
 
-       if (cmd->phy_address != mac->phy_dev->mdio.addr) {
-               mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus,
-                                              cmd->phy_address);
-               if (!mac->phy_dev)
-                       return -ENODEV;
-       }
+       if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+               return -EBUSY;
 
-       return phy_ethtool_sset(mac->phy_dev, cmd);
+       return phy_ethtool_ksettings_set(ndev->phydev, cmd);
 }
 
 static void mtk_get_drvinfo(struct net_device *dev,
@@ -2073,7 +2089,7 @@ static int mtk_nway_reset(struct net_device *dev)
        if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
                return -EBUSY;
 
-       return genphy_restart_aneg(mac->phy_dev);
+       return genphy_restart_aneg(dev->phydev);
 }
 
 static u32 mtk_get_link(struct net_device *dev)
@@ -2084,11 +2100,11 @@ static u32 mtk_get_link(struct net_device *dev)
        if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
                return -EBUSY;
 
-       err = genphy_update_link(mac->phy_dev);
+       err = genphy_update_link(dev->phydev);
        if (err)
                return ethtool_op_get_link(dev);
 
-       return mac->phy_dev->link;
+       return dev->phydev->link;
 }
 
 static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -2202,8 +2218,8 @@ static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 }
 
 static const struct ethtool_ops mtk_ethtool_ops = {
-       .get_settings           = mtk_get_settings,
-       .set_settings           = mtk_set_settings,
+       .get_link_ksettings     = mtk_get_link_ksettings,
+       .set_link_ksettings     = mtk_set_link_ksettings,
        .get_drvinfo            = mtk_get_drvinfo,
        .get_msglevel           = mtk_get_msglevel,
        .set_msglevel           = mtk_set_msglevel,
@@ -2460,6 +2476,7 @@ const struct of_device_id of_mtk_match[] = {
        { .compatible = "mediatek,mt7623-eth" },
        {},
 };
+MODULE_DEVICE_TABLE(of, of_mtk_match);
 
 static struct platform_driver mtk_driver = {
        .probe = mtk_probe,
index 7c5e534..7e194f7 100644 (file)
                                 MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \
                                 MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK)
 
+/* TRGMII RXC control register */
+#define TRGMII_RCK_CTRL                0x10300
+#define DQSI0(x)               ((x << 0) & GENMASK(6, 0))
+#define DQSI1(x)               ((x << 8) & GENMASK(14, 8))
+#define RXCTL_DMWTLAT(x)       ((x << 16) & GENMASK(18, 16))
+#define RXC_DQSISEL            BIT(30)
+#define RCK_CTRL_RGMII_1000    (RXC_DQSISEL | RXCTL_DMWTLAT(2) | DQSI1(16))
+#define RCK_CTRL_RGMII_10_100  RXCTL_DMWTLAT(2)
+
+/* TRGMII RXC control register */
+#define TRGMII_TCK_CTRL                0x10340
+#define TXCTL_DMWTLAT(x)       ((x << 16) & GENMASK(18, 16))
+#define TXC_INV                        BIT(30)
+#define TCK_CTRL_RGMII_1000    TXCTL_DMWTLAT(2)
+#define TCK_CTRL_RGMII_10_100  (TXC_INV | TXCTL_DMWTLAT(2))
+
+/* TRGMII Interface mode register */
+#define INTF_MODE              0x10390
+#define TRGMII_INTF_DIS                BIT(0)
+#define TRGMII_MODE            BIT(1)
+#define TRGMII_CENTRAL_ALIGNED BIT(2)
+#define INTF_MODE_RGMII_1000    (TRGMII_MODE | TRGMII_CENTRAL_ALIGNED)
+#define INTF_MODE_RGMII_10_100  0
+
 /* GPIO port control registers for GMAC 2*/
 #define GPIO_OD33_CTRL8                0x4c0
 #define GPIO_BIAS_CTRL         0xed0
 #define SYSCFG0_GE_MASK                0x3
 #define SYSCFG0_GE_MODE(x, y)  (x << (12 + (y * 2)))
 
-/*ethernet reset control register*/
+/* ethernet subsystem clock register */
+#define ETHSYS_CLKCFG0         0x2c
+#define ETHSYS_TRGMII_CLK_SEL362_5     BIT(11)
+
+/* ethernet reset control register */
 #define ETHSYS_RSTCTRL         0x34
 #define RSTCTRL_FE             BIT(6)
 #define RSTCTRL_PPE            BIT(31)
@@ -389,6 +417,7 @@ enum mtk_clks_map {
        MTK_CLK_ESW,
        MTK_CLK_GP1,
        MTK_CLK_GP2,
+       MTK_CLK_TRGPLL,
        MTK_CLK_MAX
 };
 
@@ -528,7 +557,8 @@ struct mtk_eth {
  * @of_node:           Our devicetree node
  * @hw:                        Backpointer to our main datastruture
  * @hw_stats:          Packet statistics counter
- * @phy_dev:           The attached PHY if available
+ * @trgmii             Indicate if the MAC uses TRGMII connected to internal
+                       switch
  */
 struct mtk_mac {
        int                             id;
@@ -536,9 +566,9 @@ struct mtk_mac {
        struct device_node              *of_node;
        struct mtk_eth                  *hw;
        struct mtk_hw_stats             *hw_stats;
-       struct phy_device               *phy_dev;
        __be32                          hwlro_ip[MTK_MAX_LRO_IP_CNT];
        int                             hwlro_ip_cnt;
+       bool                            trgmii;
 };
 
 /* the struct describing the SoC. these are declared in the soc_xyz.c files */
index a58d96c..b1cef7a 100644 (file)
@@ -1851,6 +1851,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv,
 
        if (vp_oper->state.default_vlan == vp_admin->default_vlan &&
            vp_oper->state.default_qos == vp_admin->default_qos &&
+           vp_oper->state.vlan_proto == vp_admin->vlan_proto &&
            vp_oper->state.link_state == vp_admin->link_state &&
            vp_oper->state.qos_vport == vp_admin->qos_vport)
                return 0;
@@ -1909,6 +1910,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv,
 
        vp_oper->state.default_vlan = vp_admin->default_vlan;
        vp_oper->state.default_qos = vp_admin->default_qos;
+       vp_oper->state.vlan_proto = vp_admin->vlan_proto;
        vp_oper->state.link_state = vp_admin->link_state;
        vp_oper->state.qos_vport = vp_admin->qos_vport;
 
@@ -1922,6 +1924,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv,
        work->qos_vport = vp_oper->state.qos_vport;
        work->vlan_id = vp_oper->state.default_vlan;
        work->vlan_ix = vp_oper->vlan_idx;
+       work->vlan_proto = vp_oper->state.vlan_proto;
        work->priv = priv;
        INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler);
        queue_work(priv->mfunc.master.comm_wq, &work->work);
@@ -1992,6 +1995,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave)
        int port, err;
        struct mlx4_vport_state *vp_admin;
        struct mlx4_vport_oper_state *vp_oper;
+       struct mlx4_slave_state *slave_state =
+               &priv->mfunc.master.slave_state[slave];
        struct mlx4_active_ports actv_ports = mlx4_get_active_ports(
                        &priv->dev, slave);
        int min_port = find_first_bit(actv_ports.ports,
@@ -2006,12 +2011,26 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave)
                        priv->mfunc.master.vf_admin[slave].enable_smi[port];
                vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
                vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
-               vp_oper->state = *vp_admin;
+               if (vp_admin->vlan_proto != htons(ETH_P_8021AD) ||
+                   slave_state->vst_qinq_supported) {
+                       vp_oper->state.vlan_proto   = vp_admin->vlan_proto;
+                       vp_oper->state.default_vlan = vp_admin->default_vlan;
+                       vp_oper->state.default_qos  = vp_admin->default_qos;
+               }
+               vp_oper->state.link_state = vp_admin->link_state;
+               vp_oper->state.mac        = vp_admin->mac;
+               vp_oper->state.spoofchk   = vp_admin->spoofchk;
+               vp_oper->state.tx_rate    = vp_admin->tx_rate;
+               vp_oper->state.qos_vport  = vp_admin->qos_vport;
+               vp_oper->state.guid       = vp_admin->guid;
+
                if (MLX4_VGT != vp_admin->default_vlan) {
                        err = __mlx4_register_vlan(&priv->dev, port,
                                                   vp_admin->default_vlan, &(vp_oper->vlan_idx));
                        if (err) {
                                vp_oper->vlan_idx = NO_INDX;
+                               vp_oper->state.default_vlan = MLX4_VGT;
+                               vp_oper->state.vlan_proto = htons(ETH_P_8021Q);
                                mlx4_warn(&priv->dev,
                                          "No vlan resources slave %d, port %d\n",
                                          slave, port);
@@ -2092,6 +2111,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
                mlx4_warn(dev, "Received reset from slave:%d\n", slave);
                slave_state[slave].active = false;
                slave_state[slave].old_vlan_api = false;
+               slave_state[slave].vst_qinq_supported = false;
                mlx4_master_deactivate_admin_state(priv, slave);
                for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) {
                                slave_state[slave].event_eq[i].eqn = -1;
@@ -2359,6 +2379,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
                        vf_oper = &priv->mfunc.master.vf_oper[i];
                        s_state = &priv->mfunc.master.slave_state[i];
                        s_state->last_cmd = MLX4_COMM_CMD_RESET;
+                       s_state->vst_qinq_supported = false;
                        mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]);
                        for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j)
                                s_state->event_eq[j].eqn = -1;
@@ -2388,6 +2409,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
                                admin_vport->qos_vport =
                                                MLX4_VPP_DEFAULT_VPORT;
                                oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
+                               admin_vport->vlan_proto = htons(ETH_P_8021Q);
+                               oper_vport->vlan_proto = htons(ETH_P_8021Q);
                                vf_oper->vport[port].vlan_idx = NO_INDX;
                                vf_oper->vport[port].mac_idx = NO_INDX;
                                mlx4_set_random_admin_guid(dev, i, port);
@@ -2948,10 +2971,13 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac)
 EXPORT_SYMBOL_GPL(mlx4_set_vf_mac);
 
 
-int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos,
+                    __be16 proto)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_vport_state *vf_admin;
+       struct mlx4_slave_state *slave_state;
+       struct mlx4_vport_oper_state *vf_oper;
        int slave;
 
        if ((!mlx4_is_master(dev)) ||
@@ -2961,12 +2987,31 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
        if ((vlan > 4095) || (qos > 7))
                return -EINVAL;
 
+       if (proto == htons(ETH_P_8021AD) &&
+           !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP))
+               return -EPROTONOSUPPORT;
+
+       if (proto != htons(ETH_P_8021Q) &&
+           proto != htons(ETH_P_8021AD))
+               return -EINVAL;
+
+       if ((proto == htons(ETH_P_8021AD)) &&
+           ((vlan == 0) || (vlan == MLX4_VGT)))
+               return -EINVAL;
+
        slave = mlx4_get_slave_indx(dev, vf);
        if (slave < 0)
                return -EINVAL;
 
+       slave_state = &priv->mfunc.master.slave_state[slave];
+       if ((proto == htons(ETH_P_8021AD)) && (slave_state->active) &&
+           (!slave_state->vst_qinq_supported)) {
+               mlx4_err(dev, "vf %d does not support VST QinQ mode\n", vf);
+               return -EPROTONOSUPPORT;
+       }
        port = mlx4_slaves_closest_port(dev, slave, port);
        vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+       vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 
        if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos))
                return -EPERM;
@@ -2976,6 +3021,7 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
        else
                vf_admin->default_vlan = vlan;
        vf_admin->default_qos = qos;
+       vf_admin->vlan_proto = proto;
 
        /* If rate was configured prior to VST, we saved the configured rate
         * in vf_admin->rate and now, if priority supported we enforce the QoS
@@ -2984,7 +3030,12 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
            vf_admin->tx_rate)
                vf_admin->qos_vport = slave;
 
-       if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
+       /* Try to activate new vf state without restart,
+        * this option is not supported while moving to VST QinQ mode.
+        */
+       if ((proto == htons(ETH_P_8021AD) &&
+            vf_oper->state.vlan_proto != proto) ||
+           mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
                mlx4_info(dev,
                          "updating vf %d port %d config will take effect on next VF restart\n",
                          vf, port);
@@ -3128,6 +3179,7 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in
 
        ivf->vlan               = s_info->default_vlan;
        ivf->qos                = s_info->default_qos;
+       ivf->vlan_proto         = s_info->vlan_proto;
 
        if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info))
                ivf->max_tx_rate = s_info->tx_rate;
index 62516f8..7e703be 100644 (file)
@@ -2400,12 +2400,14 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac)
        return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64);
 }
 
-static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
+static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+                              __be16 vlan_proto)
 {
        struct mlx4_en_priv *en_priv = netdev_priv(dev);
        struct mlx4_en_dev *mdev = en_priv->mdev;
 
-       return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos);
+       return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos,
+                               vlan_proto);
 }
 
 static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
@@ -3224,6 +3226,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        }
 
        if (mlx4_is_slave(mdev->dev)) {
+               bool vlan_offload_disabled;
                int phv;
 
                err = get_phv_bit(mdev->dev, port, &phv);
@@ -3231,6 +3234,18 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
                        dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
                        priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV;
                }
+               err = mlx4_get_is_vlan_offload_disabled(mdev->dev, port,
+                                                       &vlan_offload_disabled);
+               if (!err && vlan_offload_disabled) {
+                       dev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+                                             NETIF_F_HW_VLAN_CTAG_RX |
+                                             NETIF_F_HW_VLAN_STAG_TX |
+                                             NETIF_F_HW_VLAN_STAG_RX);
+                       dev->features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+                                          NETIF_F_HW_VLAN_CTAG_RX |
+                                          NETIF_F_HW_VLAN_STAG_TX |
+                                          NETIF_F_HW_VLAN_STAG_RX);
+               }
        } else {
                if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN &&
                    !(mdev->dev->caps.flags2 &
index f613977..cf8f8a7 100644 (file)
@@ -1305,8 +1305,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
        return 0;
 
 err_out_unmap:
-       while (i >= 0)
-               mlx4_free_eq(dev, &priv->eq_table.eq[i--]);
+       while (i > 0)
+               mlx4_free_eq(dev, &priv->eq_table.eq[--i]);
 #ifdef CONFIG_RFS_ACCEL
        for (i = 1; i <= dev->caps.num_ports; i++) {
                if (mlx4_priv(dev)->port[i].rmap) {
index d728704..090bf81 100644 (file)
@@ -158,7 +158,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
                [31] = "Modifying loopback source checks using UPDATE_QP support",
                [32] = "Loopback source checks support",
                [33] = "RoCEv2 support",
-               [34] = "DMFS Sniffer support (UC & MC)"
+               [34] = "DMFS Sniffer support (UC & MC)",
+               [35] = "QinQ VST mode support",
        };
        int i;
 
@@ -248,6 +249,72 @@ out:
        return err;
 }
 
+static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port)
+{
+       struct mlx4_vport_oper_state *vp_oper;
+       struct mlx4_vport_state *vp_admin;
+       int err;
+
+       vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+       vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+       if (vp_admin->default_vlan != vp_oper->state.default_vlan) {
+               err = __mlx4_register_vlan(&priv->dev, port,
+                                          vp_admin->default_vlan,
+                                          &vp_oper->vlan_idx);
+               if (err) {
+                       vp_oper->vlan_idx = NO_INDX;
+                       mlx4_warn(&priv->dev,
+                                 "No vlan resources slave %d, port %d\n",
+                                 slave, port);
+                       return err;
+               }
+               mlx4_dbg(&priv->dev, "alloc vlan %d idx  %d slave %d port %d\n",
+                        (int)(vp_oper->state.default_vlan),
+                        vp_oper->vlan_idx, slave, port);
+       }
+       vp_oper->state.vlan_proto   = vp_admin->vlan_proto;
+       vp_oper->state.default_vlan = vp_admin->default_vlan;
+       vp_oper->state.default_qos  = vp_admin->default_qos;
+
+       return 0;
+}
+
+static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port)
+{
+       struct mlx4_vport_oper_state *vp_oper;
+       struct mlx4_slave_state *slave_state;
+       struct mlx4_vport_state *vp_admin;
+       int err;
+
+       vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+       vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+       slave_state = &priv->mfunc.master.slave_state[slave];
+
+       if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) ||
+           (!slave_state->active))
+               return 0;
+
+       if (vp_oper->state.vlan_proto == vp_admin->vlan_proto &&
+           vp_oper->state.default_vlan == vp_admin->default_vlan &&
+           vp_oper->state.default_qos == vp_admin->default_qos)
+               return 0;
+
+       if (!slave_state->vst_qinq_supported) {
+               /* Warn and revert the request to set vst QinQ mode */
+               vp_admin->vlan_proto   = vp_oper->state.vlan_proto;
+               vp_admin->default_vlan = vp_oper->state.default_vlan;
+               vp_admin->default_qos  = vp_oper->state.default_qos;
+
+               mlx4_warn(&priv->dev,
+                         "Slave %d does not support VST QinQ mode\n", slave);
+               return 0;
+       }
+
+       err = mlx4_activate_vst_qinq(priv, slave, port);
+       return err;
+}
+
 int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
                                struct mlx4_vhcr *vhcr,
                                struct mlx4_cmd_mailbox *inbox,
@@ -311,14 +378,18 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_VF_ENABLE_QP0           0x08
 
 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
-#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31)
 #define QUERY_FUNC_CAP_PHV_BIT                 0x40
+#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE    0x20
+
+#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ       BIT(30)
+#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31)
 
        if (vhcr->op_modifier == 1) {
                struct mlx4_active_ports actv_ports =
                        mlx4_get_active_ports(dev, slave);
                int converted_port = mlx4_slave_convert_port(
                                dev, slave, vhcr->in_modifier);
+               struct mlx4_vport_oper_state *vp_oper;
 
                if (converted_port < 0)
                        return -EINVAL;
@@ -357,15 +428,24 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
                MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier],
                         QUERY_FUNC_CAP_PHYS_PORT_ID);
 
-               if (dev->caps.phv_bit[port]) {
-                       field = QUERY_FUNC_CAP_PHV_BIT;
-                       MLX4_PUT(outbox->buf, field,
-                                QUERY_FUNC_CAP_FLAGS0_OFFSET);
-               }
+               vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+               err = mlx4_handle_vst_qinq(priv, slave, port);
+               if (err)
+                       return err;
+
+               field = 0;
+               if (dev->caps.phv_bit[port])
+                       field |= QUERY_FUNC_CAP_PHV_BIT;
+               if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD))
+                       field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE;
+               MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET);
 
        } else if (vhcr->op_modifier == 0) {
                struct mlx4_active_ports actv_ports =
                        mlx4_get_active_ports(dev, slave);
+               struct mlx4_slave_state *slave_state =
+                       &priv->mfunc.master.slave_state[slave];
+
                /* enable rdma and ethernet interfaces, new quota locations,
                 * and reserved lkey
                 */
@@ -439,6 +519,10 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 
                size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00);
                MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET);
+
+               if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ)
+                       slave_state->vst_qinq_supported = true;
+
        } else
                err = -EINVAL;
 
@@ -454,10 +538,12 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
        u32                     size, qkey;
        int                     err = 0, quotas = 0;
        u32                     in_modifier;
+       u32                     slave_caps;
 
        op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
-       in_modifier = op_modifier ? gen_or_port :
+       slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ |
                QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS;
+       in_modifier = op_modifier ? gen_or_port : slave_caps;
 
        mailbox = mlx4_alloc_cmd_mailbox(dev);
        if (IS_ERR(mailbox))
@@ -612,8 +698,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
                MLX4_GET(func_cap->phys_port_id, outbox,
                         QUERY_FUNC_CAP_PHYS_PORT_ID);
 
-       MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
-       func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT);
+       MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
 
        /* All other resources are allocated by the master, but we still report
         * 'num' and 'reserved' capabilities as follows:
@@ -690,6 +775,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET    0x52
 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET         0x55
 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET    0x56
+#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET       0x5D
 #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET                0x61
 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET          0x62
 #define QUERY_DEV_CAP_MAX_MCG_OFFSET           0x63
@@ -857,6 +943,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET);
        dev_cap->max_sq_desc_sz = size;
 
+       MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET);
+       if (field & 0x1)
+               dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET);
        dev_cap->max_qp_per_mcg = 1 << field;
        MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET);
@@ -2914,7 +3003,7 @@ int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv)
        memset(&func_cap, 0, sizeof(func_cap));
        err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap);
        if (!err)
-               *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT;
+               *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT;
        return err;
 }
 EXPORT_SYMBOL(get_phv_bit);
@@ -2938,6 +3027,22 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val)
 }
 EXPORT_SYMBOL(set_phv_bit);
 
+int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
+                                     bool *vlan_offload_disabled)
+{
+       struct mlx4_func_cap func_cap;
+       int err;
+
+       memset(&func_cap, 0, sizeof(func_cap));
+       err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap);
+       if (!err)
+               *vlan_offload_disabled =
+                       !!(func_cap.flags0 &
+                          QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE);
+       return err;
+}
+EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled);
+
 void mlx4_replace_zero_macs(struct mlx4_dev *dev)
 {
        int i;
index cdbd76f..f11614f 100644 (file)
@@ -152,7 +152,7 @@ struct mlx4_func_cap {
        u32     qp1_proxy_qpn;
        u32     reserved_lkey;
        u8      physical_port;
-       u8      port_flags;
+       u8      flags0;
        u8      flags1;
        u64     phys_port_id;
        u32     extra_flags;
index 75dd2e3..7183ac4 100644 (file)
@@ -2970,6 +2970,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
                mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
                device_remove_file(&info->dev->persist->pdev->dev,
                                   &info->port_attr);
+               devlink_port_unregister(&info->devlink_port);
                info->port = -1;
        }
 
@@ -2984,6 +2985,8 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
        device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
        device_remove_file(&info->dev->persist->pdev->dev,
                           &info->port_mtu_attr);
+       devlink_port_unregister(&info->devlink_port);
+
 #ifdef CONFIG_RFS_ACCEL
        free_irq_cpu_rmap(info->rmap);
        info->rmap = NULL;
index c128ba3..e4878f3 100644 (file)
@@ -483,6 +483,7 @@ struct mlx4_slave_state {
        u8 init_port_mask;
        bool active;
        bool old_vlan_api;
+       bool vst_qinq_supported;
        u8 function;
        dma_addr_t vhcr_dma;
        u16 mtu[MLX4_MAX_PORTS + 1];
@@ -508,6 +509,7 @@ struct mlx4_vport_state {
        u64 mac;
        u16 default_vlan;
        u8  default_qos;
+       __be16 vlan_proto;
        u32 tx_rate;
        bool spoofchk;
        u32 link_state;
@@ -657,6 +659,7 @@ struct mlx4_vf_immed_vlan_work {
        u8                      qos_vport;
        u16                     vlan_id;
        u16                     orig_vlan_id;
+       __be16                  vlan_proto;
 };
 
 
index 8b81114..84d7857 100644 (file)
@@ -790,10 +790,22 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
                                MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED |
                                MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
                } else if (0 != vp_oper->state.default_vlan) {
-                       qpc->pri_path.vlan_control |=
-                               MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
-                               MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
-                               MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+                       if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) {
+                               /* vst QinQ should block untagged on TX,
+                                * but cvlan is in payload and phv is set so
+                                * hw see it as untagged. Block tagged instead.
+                                */
+                               qpc->pri_path.vlan_control |=
+                                       MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
+                                       MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+                                       MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+                                       MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+                       } else { /* vst 802.1Q */
+                               qpc->pri_path.vlan_control |=
+                                       MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+                                       MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+                                       MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+                       }
                } else { /* priority tagged */
                        qpc->pri_path.vlan_control |=
                                MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
@@ -802,7 +814,11 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
 
                qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN;
                qpc->pri_path.vlan_index = vp_oper->vlan_idx;
-               qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN;
+               qpc->pri_path.fl |= MLX4_FL_ETH_HIDE_CQE_VLAN;
+               if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD))
+                       qpc->pri_path.fl |= MLX4_FL_SV;
+               else
+                       qpc->pri_path.fl |= MLX4_FL_CV;
                qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
                qpc->pri_path.sched_queue &= 0xC7;
                qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3;
@@ -5238,6 +5254,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
        u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) |
                       (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) |
                       (1ULL << MLX4_UPD_QP_PATH_MASK_CV) |
+                      (1ULL << MLX4_UPD_QP_PATH_MASK_SV) |
                       (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) |
                       (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) |
                       (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) |
@@ -5266,7 +5283,12 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
        else if (!work->vlan_id)
                vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
                        MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
-       else
+       else if (work->vlan_proto == htons(ETH_P_8021AD))
+               vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
+                       MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+                       MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+                       MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+       else  /* vst 802.1Q */
                vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
                        MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
                        MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
@@ -5311,7 +5333,11 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work)
                                upd_context->qp_context.pri_path.fvl_rx =
                                        qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN;
                                upd_context->qp_context.pri_path.fl =
-                                       qp->pri_path_fl | MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN;
+                                       qp->pri_path_fl | MLX4_FL_ETH_HIDE_CQE_VLAN;
+                               if (work->vlan_proto == htons(ETH_P_8021AD))
+                                       upd_context->qp_context.pri_path.fl |= MLX4_FL_SV;
+                               else
+                                       upd_context->qp_context.pri_path.fl |= MLX4_FL_CV;
                                upd_context->qp_context.pri_path.feup =
                                        qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
                                upd_context->qp_context.pri_path.sched_queue =
index 3460154..460363b 100644 (file)
@@ -869,6 +869,7 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
 int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
index a9fc9d4..b58cfe3 100644 (file)
@@ -446,6 +446,16 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
        kfree(rq->mpwqe.info);
 }
 
+static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+{
+       struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+
+       if (rep && rep->vport != FDB_UPLINK_VPORT)
+               return true;
+
+       return false;
+}
+
 static int mlx5e_create_rq(struct mlx5e_channel *c,
                           struct mlx5e_rq_param *param,
                           struct mlx5e_rq *rq)
@@ -487,6 +497,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 
        switch (priv->params.rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+               if (mlx5e_is_vf_vport_rep(priv)) {
+                       err = -EINVAL;
+                       goto err_rq_wq_destroy;
+               }
+
                rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
                rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
                rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
@@ -512,7 +527,11 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                        goto err_rq_wq_destroy;
                }
 
-               rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
+               if (mlx5e_is_vf_vport_rep(priv))
+                       rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep;
+               else
+                       rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
+
                rq->alloc_wqe = mlx5e_alloc_rx_wqe;
                rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
 
@@ -2898,11 +2917,15 @@ static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
        return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
 }
 
-static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
+static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+                            __be16 vlan_proto)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct mlx5_core_dev *mdev = priv->mdev;
 
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+
        return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
                                           vlan, qos);
 }
@@ -3726,9 +3749,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
                mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id);
                rep.load = mlx5e_nic_rep_load;
                rep.unload = mlx5e_nic_rep_unload;
-               rep.vport = 0;
+               rep.vport = FDB_UPLINK_VPORT;
                rep.priv_data = priv;
-               mlx5_eswitch_register_vport_rep(esw, &rep);
+               mlx5_eswitch_register_vport_rep(esw, 0, &rep);
        }
 }
 
@@ -3867,7 +3890,7 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
                rep.unload = mlx5e_vport_rep_unload;
                rep.vport = vport;
                ether_addr_copy(rep.hw_id, mac);
-               mlx5_eswitch_register_vport_rep(esw, &rep);
+               mlx5_eswitch_register_vport_rep(esw, vport, &rep);
        }
 }
 
index 0a81bd3..c6de6fb 100644 (file)
@@ -36,6 +36,7 @@
 #include <net/busy_poll.h>
 #include "en.h"
 #include "en_tc.h"
+#include "eswitch.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 {
@@ -629,7 +630,6 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
        rq->stats.packets++;
        rq->stats.bytes += cqe_bcnt;
        mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
-       napi_gro_receive(rq->cq.napi, skb);
 }
 
 static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq)
@@ -733,20 +733,15 @@ static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
        }
 }
 
-void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+static inline
+struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+                            u16 wqe_counter, u32 cqe_bcnt)
 {
        struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog);
        struct mlx5e_dma_info *di;
-       struct mlx5e_rx_wqe *wqe;
-       __be16 wqe_counter_be;
        struct sk_buff *skb;
-       u16 wqe_counter;
        void *va, *data;
-       u32 cqe_bcnt;
 
-       wqe_counter_be = cqe->wqe_counter;
-       wqe_counter    = be16_to_cpu(wqe_counter_be);
-       wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
        di             = &rq->dma_info[wqe_counter];
        va             = page_address(di->page);
        data           = va + MLX5_RX_HEADROOM;
@@ -757,22 +752,21 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
                                      rq->buff.wqe_sz,
                                      DMA_FROM_DEVICE);
        prefetch(data);
-       cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
 
        if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
                rq->stats.wqe_err++;
                mlx5e_page_release(rq, di, true);
-               goto wq_ll_pop;
+               return NULL;
        }
 
        if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt))
-               goto wq_ll_pop; /* page/packet was consumed by XDP */
+               return NULL; /* page/packet was consumed by XDP */
 
        skb = build_skb(va, RQ_PAGE_SIZE(rq));
        if (unlikely(!skb)) {
                rq->stats.buff_alloc_err++;
                mlx5e_page_release(rq, di, true);
-               goto wq_ll_pop;
+               return NULL;
        }
 
        /* queue up for recycling ..*/
@@ -782,7 +776,60 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
        skb_reserve(skb, MLX5_RX_HEADROOM);
        skb_put(skb, cqe_bcnt);
 
+       return skb;
+}
+
+void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+       struct mlx5e_rx_wqe *wqe;
+       __be16 wqe_counter_be;
+       struct sk_buff *skb;
+       u16 wqe_counter;
+       u32 cqe_bcnt;
+
+       wqe_counter_be = cqe->wqe_counter;
+       wqe_counter    = be16_to_cpu(wqe_counter_be);
+       wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+       cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
+
+       skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
+       if (!skb)
+               goto wq_ll_pop;
+
        mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+       napi_gro_receive(rq->cq.napi, skb);
+
+wq_ll_pop:
+       mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
+                      &wqe->next.next_wqe_index);
+}
+
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+       struct net_device *netdev = rq->netdev;
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_eswitch_rep *rep = priv->ppriv;
+       struct mlx5e_rx_wqe *wqe;
+       struct sk_buff *skb;
+       __be16 wqe_counter_be;
+       u16 wqe_counter;
+       u32 cqe_bcnt;
+
+       wqe_counter_be = cqe->wqe_counter;
+       wqe_counter    = be16_to_cpu(wqe_counter_be);
+       wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+       cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
+
+       skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
+       if (!skb)
+               goto wq_ll_pop;
+
+       mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+
+       if (rep->vlan && skb_vlan_tag_present(skb))
+               skb_vlan_pop(skb);
+
+       napi_gro_receive(rq->cq.napi, skb);
 
 wq_ll_pop:
        mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
@@ -861,6 +908,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 
        mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb);
        mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+       napi_gro_receive(rq->cq.napi, skb);
 
 mpwrq_cqe_out:
        if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
index 22cfc4a..a350b71 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/rhashtable.h>
 #include <net/switchdev.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -47,6 +48,7 @@ struct mlx5e_tc_flow {
        struct rhash_head       node;
        u64                     cookie;
        struct mlx5_flow_rule   *rule;
+       struct mlx5_esw_flow_attr *attr;
 };
 
 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
@@ -114,27 +116,30 @@ err_create_ft:
 
 static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
                                                    struct mlx5_flow_spec *spec,
-                                                   u32 action, u32 dst_vport)
+                                                   struct mlx5_esw_flow_attr *attr)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-       struct mlx5_eswitch_rep *rep = priv->ppriv;
-       u32 src_vport;
+       int err;
 
-       if (rep->vport) /* set source vport for the flow */
-               src_vport = rep->vport;
-       else
-               src_vport = FDB_UPLINK_VPORT;
+       err = mlx5_eswitch_add_vlan_action(esw, attr);
+       if (err)
+               return ERR_PTR(err);
 
-       return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport);
+       return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 }
 
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
-                             struct mlx5_flow_rule *rule)
+                             struct mlx5_flow_rule *rule,
+                             struct mlx5_esw_flow_attr *attr)
 {
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_fc *counter = NULL;
 
        counter = mlx5_flow_rule_counter(rule);
 
+       if (esw && esw->mode == SRIOV_OFFLOADS)
+               mlx5_eswitch_del_vlan_action(esw, attr);
+
        mlx5_del_flow_rule(rule);
 
        mlx5_fc_destroy(priv->mdev, counter);
@@ -159,6 +164,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
            ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
              BIT(FLOW_DISSECTOR_KEY_BASIC) |
              BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_VLAN) |
              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_PORTS))) {
@@ -222,6 +228,24 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
                                key->src);
        }
 
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+               struct flow_dissector_key_vlan *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->key);
+               struct flow_dissector_key_vlan *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_VLAN,
+                                                 f->mask);
+               if (mask->vlan_id) {
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1);
+
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
+               }
+       }
+
        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
                struct flow_dissector_key_ipv4_addrs *key =
                        skb_flow_dissector_target(f->dissector,
@@ -361,7 +385,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 }
 
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-                               u32 *action, u32 *dest_vport)
+                               struct mlx5_esw_flow_attr *attr)
 {
        const struct tc_action *a;
        LIST_HEAD(actions);
@@ -369,17 +393,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
        if (tc_no_actions(exts))
                return -EINVAL;
 
-       *action = 0;
+       memset(attr, 0, sizeof(*attr));
+       attr->in_rep = priv->ppriv;
 
        tcf_exts_to_list(exts, &actions);
        list_for_each_entry(a, &actions, list) {
-               /* Only support a single action per rule */
-               if (*action)
-                       return -EINVAL;
-
                if (is_tcf_gact_shot(a)) {
-                       *action = MLX5_FLOW_CONTEXT_ACTION_DROP |
-                                 MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+                                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        continue;
                }
 
@@ -387,7 +408,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                        int ifindex = tcf_mirred_ifindex(a);
                        struct net_device *out_dev;
                        struct mlx5e_priv *out_priv;
-                       struct mlx5_eswitch_rep *out_rep;
 
                        out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
 
@@ -397,13 +417,22 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                                return -EINVAL;
                        }
 
+                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
                        out_priv = netdev_priv(out_dev);
-                       out_rep  = out_priv->ppriv;
-                       if (out_rep->vport == 0)
-                               *dest_vport = FDB_UPLINK_VPORT;
-                       else
-                               *dest_vport = out_rep->vport;
-                       *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+                       attr->out_rep = out_priv->ppriv;
+                       continue;
+               }
+
+               if (is_tcf_vlan(a)) {
+                       if (tcf_vlan_action(a) == VLAN_F_POP) {
+                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+                       } else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
+                               if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
+                                       return -EOPNOTSUPP;
+
+                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+                               attr->vlan = tcf_vlan_push_vid(a);
+                       }
                        continue;
                }
 
@@ -417,18 +446,29 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 {
        struct mlx5e_tc_table *tc = &priv->fs.tc;
        int err = 0;
-       u32 flow_tag, action, dest_vport = 0;
+       bool fdb_flow = false;
+       u32 flow_tag, action;
        struct mlx5e_tc_flow *flow;
        struct mlx5_flow_spec *spec;
        struct mlx5_flow_rule *old = NULL;
+       struct mlx5_esw_flow_attr *old_attr;
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
+       if (esw && esw->mode == SRIOV_OFFLOADS)
+               fdb_flow = true;
+
        flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
                                      tc->ht_params);
-       if (flow)
+       if (flow) {
                old = flow->rule;
-       else
-               flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+               old_attr = flow->attr;
+       } else {
+               if (fdb_flow)
+                       flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr),
+                                      GFP_KERNEL);
+               else
+                       flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+       }
 
        spec = mlx5_vzalloc(sizeof(*spec));
        if (!spec || !flow) {
@@ -442,11 +482,12 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
        if (err < 0)
                goto err_free;
 
-       if (esw && esw->mode == SRIOV_OFFLOADS) {
-               err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport);
+       if (fdb_flow) {
+               flow->attr  = (struct mlx5_esw_flow_attr *)(flow + 1);
+               err = parse_tc_fdb_actions(priv, f->exts, flow->attr);
                if (err < 0)
                        goto err_free;
-               flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport);
+               flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr);
        } else {
                err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag);
                if (err < 0)
@@ -465,7 +506,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
                goto err_del_rule;
 
        if (old)
-               mlx5e_tc_del_flow(priv, old);
+               mlx5e_tc_del_flow(priv, old, old_attr);
 
        goto out;
 
@@ -493,7 +534,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv,
 
        rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
 
-       mlx5e_tc_del_flow(priv, flow->rule);
+       mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
 
        kfree(flow);
 
@@ -550,7 +591,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg)
        struct mlx5e_tc_flow *flow = ptr;
        struct mlx5e_priv *priv = arg;
 
-       mlx5e_tc_del_flow(priv, flow->rule);
+       mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
        kfree(flow);
 }
 
index 4927494..abbf2c3 100644 (file)
@@ -127,7 +127,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
 }
 
 static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
-                                 u16 vlan, u8 qos, bool set)
+                                 u16 vlan, u8 qos, u8 set_flags)
 {
        u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
 
@@ -135,14 +135,18 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
            !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
                return -ENOTSUPP;
 
-       esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n",
-                 vport, vlan, qos, set);
-       if (set) {
+       esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n",
+                 vport, vlan, qos, set_flags);
+
+       if (set_flags & SET_VLAN_STRIP)
                MLX5_SET(modify_esw_vport_context_in, in,
                         esw_vport_context.vport_cvlan_strip, 1);
+
+       if (set_flags & SET_VLAN_INSERT) {
                /* insert only if no vlan in packet */
                MLX5_SET(modify_esw_vport_context_in, in,
                         esw_vport_context.vport_cvlan_insert, 1);
+
                MLX5_SET(modify_esw_vport_context_in, in,
                         esw_vport_context.cvlan_pcp, qos);
                MLX5_SET(modify_esw_vport_context_in, in,
@@ -1489,6 +1493,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 
 abort:
        esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+       esw->mode = SRIOV_NONE;
        return err;
 }
 
@@ -1777,25 +1782,21 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
        return 0;
 }
 
-int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
-                               int vport, u16 vlan, u8 qos)
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+                                 int vport, u16 vlan, u8 qos, u8 set_flags)
 {
        struct mlx5_vport *evport;
        int err = 0;
-       int set = 0;
 
        if (!ESW_ALLOWED(esw))
                return -EPERM;
        if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7))
                return -EINVAL;
 
-       if (vlan || qos)
-               set = 1;
-
        mutex_lock(&esw->state_lock);
        evport = &esw->vports[vport];
 
-       err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
+       err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
        if (err)
                goto unlock;
 
@@ -1813,6 +1814,17 @@ unlock:
        return err;
 }
 
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+                               int vport, u16 vlan, u8 qos)
+{
+       u8 set_flags = 0;
+
+       if (vlan || qos)
+               set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
+
+       return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+}
+
 int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
                                    int vport, bool spoofchk)
 {
index b96e8c9..2e2938e 100644 (file)
@@ -157,6 +157,7 @@ struct mlx5_eswitch_fdb {
                        struct mlx5_flow_group *send_to_vport_grp;
                        struct mlx5_flow_group *miss_grp;
                        struct mlx5_flow_rule  *miss_rule;
+                       int vlan_push_pop_refcount;
                } offloads;
        };
 };
@@ -178,11 +179,14 @@ struct mlx5_eswitch_rep {
        void                   (*unload)(struct mlx5_eswitch *esw,
                                         struct mlx5_eswitch_rep *rep);
        u16                    vport;
-       struct mlx5_flow_rule *vport_rx_rule;
+       u8                     hw_id[ETH_ALEN];
        void                  *priv_data;
+
+       struct mlx5_flow_rule *vport_rx_rule;
        struct list_head       vport_sqs_list;
+       u16                    vlan;
+       u32                    vlan_refcount;
        bool                   valid;
-       u8                     hw_id[ETH_ALEN];
 };
 
 struct mlx5_esw_offload {
@@ -237,14 +241,32 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
                                 struct ifla_vf_stats *vf_stats);
 
 struct mlx5_flow_spec;
+struct mlx5_esw_flow_attr;
 
 struct mlx5_flow_rule *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                                struct mlx5_flow_spec *spec,
-                               u32 action, u32 src_vport, u32 dst_vport);
+                               struct mlx5_esw_flow_attr *attr);
 struct mlx5_flow_rule *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
 
+enum {
+       SET_VLAN_STRIP  = BIT(0),
+       SET_VLAN_INSERT = BIT(1)
+};
+
+#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  0x40
+#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80
+
+struct mlx5_esw_flow_attr {
+       struct mlx5_eswitch_rep *in_rep;
+       struct mlx5_eswitch_rep *out_rep;
+
+       int     action;
+       u16     vlan;
+       bool    vlan_handled;
+};
+
 int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
                                 struct mlx5_eswitch_rep *rep,
                                 u16 *sqns_array, int sqns_num);
@@ -254,9 +276,17 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+                                    int vport_index,
                                     struct mlx5_eswitch_rep *rep);
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-                                      int vport);
+                                      int vport_index);
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+                                struct mlx5_esw_flow_attr *attr);
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+                                struct mlx5_esw_flow_attr *attr);
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+                                 int vport, u16 vlan, u8 qos, u8 set_flags);
 
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
index 3dc83a9..c55ad8d 100644 (file)
@@ -46,19 +46,22 @@ enum {
 struct mlx5_flow_rule *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                                struct mlx5_flow_spec *spec,
-                               u32 action, u32 src_vport, u32 dst_vport)
+                               struct mlx5_esw_flow_attr *attr)
 {
        struct mlx5_flow_destination dest = { 0 };
        struct mlx5_fc *counter = NULL;
        struct mlx5_flow_rule *rule;
        void *misc;
+       int action;
 
        if (esw->mode != SRIOV_OFFLOADS)
                return ERR_PTR(-EOPNOTSUPP);
 
+       action = attr->action;
+
        if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
                dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-               dest.vport_num = dst_vport;
+               dest.vport_num = attr->out_rep->vport;
                action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
                counter = mlx5_fc_create(esw->dev, true);
@@ -69,7 +72,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
        }
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-       MLX5_SET(fte_match_set_misc, misc, source_port, src_vport);
+       MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
@@ -86,6 +89,186 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
        return rule;
 }
 
+static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
+{
+       struct mlx5_eswitch_rep *rep;
+       int vf_vport, err = 0;
+
+       esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
+       for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
+               rep = &esw->offloads.vport_reps[vf_vport];
+               if (!rep->valid)
+                       continue;
+
+               err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
+               if (err)
+                       goto out;
+       }
+
+out:
+       return err;
+}
+
+static struct mlx5_eswitch_rep *
+esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
+{
+       struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
+
+       in_rep  = attr->in_rep;
+       out_rep = attr->out_rep;
+
+       if (push)
+               vport = in_rep;
+       else if (pop)
+               vport = out_rep;
+       else
+               vport = in_rep;
+
+       return vport;
+}
+
+static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
+                                    bool push, bool pop, bool fwd)
+{
+       struct mlx5_eswitch_rep *in_rep, *out_rep;
+
+       if ((push || pop) && !fwd)
+               goto out_notsupp;
+
+       in_rep  = attr->in_rep;
+       out_rep = attr->out_rep;
+
+       if (push && in_rep->vport == FDB_UPLINK_VPORT)
+               goto out_notsupp;
+
+       if (pop && out_rep->vport == FDB_UPLINK_VPORT)
+               goto out_notsupp;
+
+       /* vport has vlan push configured, can't offload VF --> wire rules w.o it */
+       if (!push && !pop && fwd)
+               if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT)
+                       goto out_notsupp;
+
+       /* protects against (1) setting rules with different vlans to push and
+        * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
+        */
+       if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan))
+               goto out_notsupp;
+
+       return 0;
+
+out_notsupp:
+       return -ENOTSUPP;
+}
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+                                struct mlx5_esw_flow_attr *attr)
+{
+       struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+       struct mlx5_eswitch_rep *vport = NULL;
+       bool push, pop, fwd;
+       int err = 0;
+
+       push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+       pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+       fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+
+       err = esw_add_vlan_action_check(attr, push, pop, fwd);
+       if (err)
+               return err;
+
+       attr->vlan_handled = false;
+
+       vport = esw_vlan_action_get_vport(attr, push, pop);
+
+       if (!push && !pop && fwd) {
+               /* tracks VF --> wire rules without vlan push action */
+               if (attr->out_rep->vport == FDB_UPLINK_VPORT) {
+                       vport->vlan_refcount++;
+                       attr->vlan_handled = true;
+               }
+
+               return 0;
+       }
+
+       if (!push && !pop)
+               return 0;
+
+       if (!(offloads->vlan_push_pop_refcount)) {
+               /* it's the 1st vlan rule, apply global vlan pop policy */
+               err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP);
+               if (err)
+                       goto out;
+       }
+       offloads->vlan_push_pop_refcount++;
+
+       if (push) {
+               if (vport->vlan_refcount)
+                       goto skip_set_push;
+
+               err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0,
+                                                   SET_VLAN_INSERT | SET_VLAN_STRIP);
+               if (err)
+                       goto out;
+               vport->vlan = attr->vlan;
+skip_set_push:
+               vport->vlan_refcount++;
+       }
+out:
+       if (!err)
+               attr->vlan_handled = true;
+       return err;
+}
+
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+                                struct mlx5_esw_flow_attr *attr)
+{
+       struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+       struct mlx5_eswitch_rep *vport = NULL;
+       bool push, pop, fwd;
+       int err = 0;
+
+       if (!attr->vlan_handled)
+               return 0;
+
+       push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+       pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+       fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+
+       vport = esw_vlan_action_get_vport(attr, push, pop);
+
+       if (!push && !pop && fwd) {
+               /* tracks VF --> wire rules without vlan push action */
+               if (attr->out_rep->vport == FDB_UPLINK_VPORT)
+                       vport->vlan_refcount--;
+
+               return 0;
+       }
+
+       if (push) {
+               vport->vlan_refcount--;
+               if (vport->vlan_refcount)
+                       goto skip_unset_push;
+
+               vport->vlan = 0;
+               err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport,
+                                                   0, 0, SET_VLAN_STRIP);
+               if (err)
+                       goto out;
+       }
+
+skip_unset_push:
+       offloads->vlan_push_pop_refcount--;
+       if (offloads->vlan_push_pop_refcount)
+               return 0;
+
+       /* no more vlan rules, stop global vlan pop policy */
+       err = esw_set_global_vlan_pop(esw, 0);
+
+out:
+       return err;
+}
+
 static struct mlx5_flow_rule *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
 {
@@ -144,16 +327,12 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
 {
        struct mlx5_flow_rule *flow_rule;
        struct mlx5_esw_sq *esw_sq;
-       int vport;
        int err;
        int i;
 
        if (esw->mode != SRIOV_OFFLOADS)
                return 0;
 
-       vport = rep->vport == 0 ?
-               FDB_UPLINK_VPORT : rep->vport;
-
        for (i = 0; i < sqns_num; i++) {
                esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL);
                if (!esw_sq) {
@@ -163,7 +342,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
 
                /* Add re-inject rule to the PF/representor sqs */
                flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
-                                                               vport,
+                                                               rep->vport,
                                                                sqns_array[i]);
                if (IS_ERR(flow_rule)) {
                        err = PTR_ERR(flow_rule);
@@ -446,7 +625,7 @@ out:
 
 static int esw_offloads_start(struct mlx5_eswitch *esw)
 {
-       int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+       int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
 
        if (esw->mode != SRIOV_LEGACY) {
                esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n");
@@ -455,8 +634,12 @@ static int esw_offloads_start(struct mlx5_eswitch *esw)
 
        mlx5_eswitch_disable_sriov(esw);
        err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
-       if (err)
-               esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err);
+       if (err) {
+               esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err);
+               err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+               if (err1)
+                       esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err);
+       }
        return err;
 }
 
@@ -508,12 +691,16 @@ create_ft_err:
 
 static int esw_offloads_stop(struct mlx5_eswitch *esw)
 {
-       int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+       int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
 
        mlx5_eswitch_disable_sriov(esw);
        err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
-       if (err)
-               esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err);
+       if (err) {
+               esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err);
+               err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+               if (err1)
+                       esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err);
+       }
 
        return err;
 }
@@ -612,27 +799,36 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 }
 
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-                                    struct mlx5_eswitch_rep *rep)
+                                    int vport_index,
+                                    struct mlx5_eswitch_rep *__rep)
 {
        struct mlx5_esw_offload *offloads = &esw->offloads;
+       struct mlx5_eswitch_rep *rep;
+
+       rep = &offloads->vport_reps[vport_index];
+
+       memset(rep, 0, sizeof(*rep));
 
-       memcpy(&offloads->vport_reps[rep->vport], rep,
-              sizeof(struct mlx5_eswitch_rep));
+       rep->load   = __rep->load;
+       rep->unload = __rep->unload;
+       rep->vport  = __rep->vport;
+       rep->priv_data = __rep->priv_data;
+       ether_addr_copy(rep->hw_id, __rep->hw_id);
 
-       INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list);
-       offloads->vport_reps[rep->vport].valid = true;
+       INIT_LIST_HEAD(&rep->vport_sqs_list);
+       rep->valid = true;
 }
 
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-                                      int vport)
+                                      int vport_index)
 {
        struct mlx5_esw_offload *offloads = &esw->offloads;
        struct mlx5_eswitch_rep *rep;
 
-       rep = &offloads->vport_reps[vport];
+       rep = &offloads->vport_reps[vport_index];
 
-       if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled)
+       if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled)
                rep->unload(esw, rep);
 
-       offloads->vport_reps[vport].valid = false;
+       rep->valid = false;
 }
index 7a0415e..113c323 100644 (file)
@@ -401,11 +401,11 @@ struct mlx5_cmd_fc_bulk *
 mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
 {
        struct mlx5_cmd_fc_bulk *b;
-       int outlen = sizeof(*b) +
+       int outlen =
                MLX5_ST_SZ_BYTES(query_flow_counter_out) +
                MLX5_ST_SZ_BYTES(traffic_counter) * num;
 
-       b = kzalloc(outlen, GFP_KERNEL);
+       b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
        if (!b)
                return NULL;
 
index 80f27b5..fd74d10 100644 (file)
@@ -1664,7 +1664,7 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev,
                return;
        mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl);
        for (i = 0; i < len; i++)
-               data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0;
+               data[data_index + i] = hw_stats[i].getter(ppcnt_pl);
 }
 
 static void mlxsw_sp_port_get_stats(struct net_device *dev,
index 415691e..aee3fd2 100644 (file)
@@ -2094,12 +2094,16 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 
        nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
                               GFP_KERNEL);
-       if (!nn->rx_rings)
+       if (!nn->rx_rings) {
+               err = -ENOMEM;
                goto err_free_lsc;
+       }
        nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings),
                               GFP_KERNEL);
-       if (!nn->tx_rings)
+       if (!nn->tx_rings) {
+               err = -ENOMEM;
                goto err_free_rx_rings;
+       }
 
        for (r = 0; r < nn->num_r_vecs; r++) {
                err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
index 7d39cb9..bdc9ba9 100644 (file)
@@ -1181,8 +1181,8 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
        p_drv_version = &union_data.drv_version;
        p_drv_version->version = p_ver->version;
 
-       for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) {
-               val = cpu_to_be32(p_ver->name[i]);
+       for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) {
+               val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)]));
                *(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val;
        }
 
index cd23a29..0e198fe 100644 (file)
@@ -100,7 +100,8 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev,
 static void qede_link_update(void *dev, struct qed_link_output *link);
 
 #ifdef CONFIG_QED_SRIOV
-static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos)
+static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
+                           __be16 vlan_proto)
 {
        struct qede_dev *edev = netdev_priv(ndev);
 
@@ -109,6 +110,9 @@ static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos)
                return -EINVAL;
        }
 
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+
        DP_VERBOSE(edev, QED_MSG_IOV, "Setting Vlan 0x%04x to VF [%d]\n",
                   vlan, vf);
 
index 24061b9..5f32765 100644 (file)
@@ -238,7 +238,7 @@ int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *);
 int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int);
 int qlcnic_sriov_get_vf_config(struct net_device *, int ,
                               struct ifla_vf_info *);
-int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8);
+int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
 int qlcnic_sriov_set_vf_spoofchk(struct net_device *, int, bool);
 #else
 static inline void qlcnic_sriov_pf_disable(struct qlcnic_adapter *adapter) {}
index afd687e..50eaafa 100644 (file)
@@ -1915,7 +1915,7 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf,
 }
 
 int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf,
-                            u16 vlan, u8 qos)
+                            u16 vlan, u8 qos, __be16 vlan_proto)
 {
        struct qlcnic_adapter *adapter = netdev_priv(netdev);
        struct qlcnic_sriov *sriov = adapter->ahw->sriov;
@@ -1928,6 +1928,9 @@ int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf,
        if (vf >= sriov->num_vfs || qos > 7)
                return -EINVAL;
 
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+
        if (vlan > MAX_VLAN_ID) {
                netdev_err(netdev,
                           "Invalid VLAN ID, allowed range is [0 - %d]\n",
index 9fbc12a..2415209 100644 (file)
@@ -1156,7 +1156,8 @@ void efx_mcdi_flush_async(struct efx_nic *efx)
         * acquired locks in the wrong order.
         */
        list_for_each_entry_safe(async, next, &mcdi->async_list, list) {
-               async->complete(efx, async->cookie, -ENETDOWN, NULL, 0);
+               if (async->complete)
+                       async->complete(efx, async->cookie, -ENETDOWN, NULL, 0);
                list_del(&async->list);
                kfree(async);
        }
index 816c446..9abcf4a 100644 (file)
@@ -22,7 +22,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac)
 }
 
 int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
-                         u8 qos)
+                         u8 qos, __be16 vlan_proto)
 {
        struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -31,6 +31,9 @@ int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
                    (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT)))
                        return -EINVAL;
 
+               if (vlan_proto != htons(ETH_P_8021Q))
+                       return -EPROTONOSUPPORT;
+
                return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos);
        } else {
                return -EOPNOTSUPP;
index 400df52..ba1762e 100644 (file)
@@ -16,7 +16,7 @@
 
 int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac);
 int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
-                         u8 qos);
+                         u8 qos, __be16 vlan_proto);
 int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i,
                              bool spoofchk);
 int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
index 503a3b6..7321259 100644 (file)
@@ -2323,6 +2323,9 @@ static int smc_drv_probe(struct platform_device *pdev)
                } else {
                        lp->cfg.flags |= SMC91X_USE_16BIT;
                }
+               if (!device_property_read_u32(&pdev->dev, "reg-shift",
+                                             &val))
+                       lp->io_shift = val;
        }
 #endif
 
index 6f6bbc5..7df4ff1 100644 (file)
@@ -261,7 +261,7 @@ static void dwmac1000_pmt(struct mac_device_info *hw, unsigned long mode)
        }
        if (mode & WAKE_UCAST) {
                pr_debug("GMAC: WOL on global unicast\n");
-               pmt |= global_unicast;
+               pmt |= power_down | global_unicast | wake_up_frame_en;
        }
 
        writel(pmt, ioaddr + GMAC_PMT);
index df5580d..51019b7 100644 (file)
@@ -102,7 +102,7 @@ static void dwmac4_pmt(struct mac_device_info *hw, unsigned long mode)
        }
        if (mode & WAKE_UCAST) {
                pr_debug("GMAC: WOL on global unicast\n");
-               pmt |= global_unicast;
+               pmt |= power_down | global_unicast | wake_up_frame_en;
        }
 
        writel(pmt, ioaddr + GMAC_PMT);
index 284b97b..f4fbcb5 100644 (file)
@@ -433,7 +433,7 @@ struct nvsp_1_message_revoke_send_buffer {
  */
 struct nvsp_1_message_send_rndis_packet {
        /*
-        * This field is specified by RNIDS. They assume there's two different
+        * This field is specified by RNDIS. They assume there's two different
         * channels of communication. However, the Network VSP only has one.
         * Therefore, the channel travels with the RNDIS packet.
         */
@@ -578,7 +578,7 @@ struct nvsp_5_send_indirect_table {
        /* The number of entries in the send indirection table */
        u32 count;
 
-       /* The offset of the send indireciton table from top of this struct.
+       /* The offset of the send indirection table from top of this struct.
         * The send indirection table tells which channel to put the send
         * traffic on. Each entry is a channel number.
         */
@@ -649,6 +649,8 @@ struct multi_recv_comp {
 struct netvsc_stats {
        u64 packets;
        u64 bytes;
+       u64 broadcast;
+       u64 multicast;
        struct u64_stats_sync syncp;
 };
 
@@ -695,9 +697,8 @@ struct net_device_context {
        bool start_remove;
 
        /* State to manage the associated VF interface. */
-       struct net_device *vf_netdev;
-       bool vf_inject;
-       atomic_t vf_use_cnt;
+       struct net_device __rcu *vf_netdev;
+
        /* 1: allocated, serial number is valid. 0: not allocated */
        u32 vf_alloc;
        /* Serial number of the VF to team with */
@@ -733,7 +734,6 @@ struct netvsc_device {
        struct nvsp_message channel_init_pkt;
 
        struct nvsp_message revoke_packet;
-       /* unsigned char HwMacAddr[HW_MACADDR_LEN]; */
 
        struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX];
        u32 send_table[VRSS_SEND_TAB_SIZE];
@@ -1238,7 +1238,7 @@ struct rndis_message {
        u32 ndis_msg_type;
 
        /* Total length of this message, from the beginning */
-       /* of the sruct rndis_message, in bytes. */
+       /* of the struct rndis_message, in bytes. */
        u32 msg_len;
 
        /* Actual message */
index ff05b9b..720b5fa 100644 (file)
@@ -635,7 +635,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
                q_idx = nvsc_packet->q_idx;
                channel = incoming_channel;
 
-               dev_kfree_skb_any(skb);
+               dev_consume_skb_any(skb);
        }
 
        num_outstanding_sends =
@@ -944,7 +944,7 @@ int netvsc_send(struct hv_device *device,
                }
 
                if (msdp->skb)
-                       dev_kfree_skb_any(msdp->skb);
+                       dev_consume_skb_any(msdp->skb);
 
                if (xmit_more && !packet->cp_partial) {
                        msdp->skb = skb;
index 2360e70..52eeb2f 100644 (file)
@@ -667,51 +667,23 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 {
        struct net_device *net = hv_get_drvdata(device_obj);
        struct net_device_context *net_device_ctx = netdev_priv(net);
+       struct net_device *vf_netdev;
        struct sk_buff *skb;
-       struct sk_buff *vf_skb;
        struct netvsc_stats *rx_stats;
-       u32 bytes_recvd = packet->total_data_buflen;
-       int ret = 0;
 
-       if (!net || net->reg_state != NETREG_REGISTERED)
+       if (net->reg_state != NETREG_REGISTERED)
                return NVSP_STAT_FAIL;
 
-       if (READ_ONCE(net_device_ctx->vf_inject)) {
-               atomic_inc(&net_device_ctx->vf_use_cnt);
-               if (!READ_ONCE(net_device_ctx->vf_inject)) {
-                       /*
-                        * We raced; just move on.
-                        */
-                       atomic_dec(&net_device_ctx->vf_use_cnt);
-                       goto vf_injection_done;
-               }
-
-               /*
-                * Inject this packet into the VF inerface.
-                * On Hyper-V, multicast and brodcast packets
-                * are only delivered on the synthetic interface
-                * (after subjecting these to policy filters on
-                * the host). Deliver these via the VF interface
-                * in the guest.
-                */
-               vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev,
-                                              packet, csum_info, *data,
-                                              vlan_tci);
-               if (vf_skb != NULL) {
-                       ++net_device_ctx->vf_netdev->stats.rx_packets;
-                       net_device_ctx->vf_netdev->stats.rx_bytes +=
-                               bytes_recvd;
-                       netif_receive_skb(vf_skb);
-               } else {
-                       ++net->stats.rx_dropped;
-                       ret = NVSP_STAT_FAIL;
-               }
-               atomic_dec(&net_device_ctx->vf_use_cnt);
-               return ret;
-       }
-
-vf_injection_done:
-       rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
+       /*
+        * If necessary, inject this packet into the VF interface.
+        * On Hyper-V, multicast and brodcast packets are only delivered
+        * to the synthetic interface (after subjecting these to
+        * policy filters on the host). Deliver these via the VF
+        * interface in the guest.
+        */
+       vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
+       if (vf_netdev && (vf_netdev->flags & IFF_UP))
+               net = vf_netdev;
 
        /* Allocate a skb - TODO direct I/O to pages? */
        skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci);
@@ -719,12 +691,25 @@ vf_injection_done:
                ++net->stats.rx_dropped;
                return NVSP_STAT_FAIL;
        }
-       skb_record_rx_queue(skb, channel->
-                           offermsg.offer.sub_channel_index);
 
+       if (net != vf_netdev)
+               skb_record_rx_queue(skb,
+                                   channel->offermsg.offer.sub_channel_index);
+
+       /*
+        * Even if injecting the packet, record the statistics
+        * on the synthetic device because modifying the VF device
+        * statistics will not work correctly.
+        */
+       rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
        u64_stats_update_begin(&rx_stats->syncp);
        rx_stats->packets++;
        rx_stats->bytes += packet->total_data_buflen;
+
+       if (skb->pkt_type == PACKET_BROADCAST)
+               ++rx_stats->broadcast;
+       else if (skb->pkt_type == PACKET_MULTICAST)
+               ++rx_stats->multicast;
        u64_stats_update_end(&rx_stats->syncp);
 
        /*
@@ -967,7 +952,7 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
                                                            cpu);
                struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
                                                            cpu);
-               u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+               u64 tx_packets, tx_bytes, rx_packets, rx_bytes, rx_multicast;
                unsigned int start;
 
                do {
@@ -980,12 +965,14 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
                        start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
                        rx_packets = rx_stats->packets;
                        rx_bytes = rx_stats->bytes;
+                       rx_multicast = rx_stats->multicast + rx_stats->broadcast;
                } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
 
                t->tx_bytes     += tx_bytes;
                t->tx_packets   += tx_packets;
                t->rx_bytes     += rx_bytes;
                t->rx_packets   += rx_packets;
+               t->multicast    += rx_multicast;
        }
 
        t->tx_dropped   = net->stats.tx_dropped;
@@ -1215,22 +1202,44 @@ static void netvsc_free_netdev(struct net_device *netdev)
        free_netdev(netdev);
 }
 
-static struct net_device *get_netvsc_net_device(char *mac)
+static struct net_device *get_netvsc_bymac(const u8 *mac)
 {
-       struct net_device *dev, *found = NULL;
+       struct net_device *dev;
 
        ASSERT_RTNL();
 
        for_each_netdev(&init_net, dev) {
-               if (memcmp(dev->dev_addr, mac, ETH_ALEN) == 0) {
-                       if (dev->netdev_ops != &device_ops)
-                               continue;
-                       found = dev;
-                       break;
-               }
+               if (dev->netdev_ops != &device_ops)
+                       continue;       /* not a netvsc device */
+
+               if (ether_addr_equal(mac, dev->perm_addr))
+                       return dev;
+       }
+
+       return NULL;
+}
+
+static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
+{
+       struct net_device *dev;
+
+       ASSERT_RTNL();
+
+       for_each_netdev(&init_net, dev) {
+               struct net_device_context *net_device_ctx;
+
+               if (dev->netdev_ops != &device_ops)
+                       continue;       /* not a netvsc device */
+
+               net_device_ctx = netdev_priv(dev);
+               if (net_device_ctx->nvdev == NULL)
+                       continue;       /* device is removed */
+
+               if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev)
+                       return dev;     /* a match */
        }
 
-       return found;
+       return NULL;
 }
 
 static int netvsc_register_vf(struct net_device *vf_netdev)
@@ -1238,9 +1247,8 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
        struct net_device *ndev;
        struct net_device_context *net_device_ctx;
        struct netvsc_device *netvsc_dev;
-       const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops;
 
-       if (eth_ops == NULL || eth_ops == &ethtool_ops)
+       if (vf_netdev->addr_len != ETH_ALEN)
                return NOTIFY_DONE;
 
        /*
@@ -1248,13 +1256,13 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
         * associate with the VF interface. If we don't find a matching
         * synthetic interface, move on.
         */
-       ndev = get_netvsc_net_device(vf_netdev->dev_addr);
+       ndev = get_netvsc_bymac(vf_netdev->perm_addr);
        if (!ndev)
                return NOTIFY_DONE;
 
        net_device_ctx = netdev_priv(ndev);
        netvsc_dev = net_device_ctx->nvdev;
-       if (!netvsc_dev || net_device_ctx->vf_netdev)
+       if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
                return NOTIFY_DONE;
 
        netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
@@ -1262,46 +1270,26 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
         * Take a reference on the module.
         */
        try_module_get(THIS_MODULE);
-       net_device_ctx->vf_netdev = vf_netdev;
-       return NOTIFY_OK;
-}
 
-static void netvsc_inject_enable(struct net_device_context *net_device_ctx)
-{
-       net_device_ctx->vf_inject = true;
-}
-
-static void netvsc_inject_disable(struct net_device_context *net_device_ctx)
-{
-       net_device_ctx->vf_inject = false;
-
-       /* Wait for currently active users to drain out. */
-       while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
-               udelay(50);
+       dev_hold(vf_netdev);
+       rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
+       return NOTIFY_OK;
 }
 
 static int netvsc_vf_up(struct net_device *vf_netdev)
 {
        struct net_device *ndev;
        struct netvsc_device *netvsc_dev;
-       const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops;
        struct net_device_context *net_device_ctx;
 
-       if (eth_ops == &ethtool_ops)
-               return NOTIFY_DONE;
-
-       ndev = get_netvsc_net_device(vf_netdev->dev_addr);
+       ndev = get_netvsc_byref(vf_netdev);
        if (!ndev)
                return NOTIFY_DONE;
 
        net_device_ctx = netdev_priv(ndev);
        netvsc_dev = net_device_ctx->nvdev;
 
-       if (!netvsc_dev || !net_device_ctx->vf_netdev)
-               return NOTIFY_DONE;
-
        netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-       netvsc_inject_enable(net_device_ctx);
 
        /*
         * Open the device before switching data path.
@@ -1327,23 +1315,15 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
        struct net_device *ndev;
        struct netvsc_device *netvsc_dev;
        struct net_device_context *net_device_ctx;
-       const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops;
-
-       if (eth_ops == &ethtool_ops)
-               return NOTIFY_DONE;
 
-       ndev = get_netvsc_net_device(vf_netdev->dev_addr);
+       ndev = get_netvsc_byref(vf_netdev);
        if (!ndev)
                return NOTIFY_DONE;
 
        net_device_ctx = netdev_priv(ndev);
        netvsc_dev = net_device_ctx->nvdev;
 
-       if (!netvsc_dev || !net_device_ctx->vf_netdev)
-               return NOTIFY_DONE;
-
        netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-       netvsc_inject_disable(net_device_ctx);
        netvsc_switch_datapath(ndev, false);
        netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
        rndis_filter_close(netvsc_dev);
@@ -1359,23 +1339,19 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 {
        struct net_device *ndev;
        struct netvsc_device *netvsc_dev;
-       const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops;
        struct net_device_context *net_device_ctx;
 
-       if (eth_ops == &ethtool_ops)
-               return NOTIFY_DONE;
-
-       ndev = get_netvsc_net_device(vf_netdev->dev_addr);
+       ndev = get_netvsc_byref(vf_netdev);
        if (!ndev)
                return NOTIFY_DONE;
 
        net_device_ctx = netdev_priv(ndev);
        netvsc_dev = net_device_ctx->nvdev;
-       if (!netvsc_dev || !net_device_ctx->vf_netdev)
-               return NOTIFY_DONE;
+
        netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
-       netvsc_inject_disable(net_device_ctx);
-       net_device_ctx->vf_netdev = NULL;
+
+       RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
+       dev_put(vf_netdev);
        module_put(THIS_MODULE);
        return NOTIFY_OK;
 }
@@ -1427,10 +1403,6 @@ static int netvsc_probe(struct hv_device *dev,
        spin_lock_init(&net_device_ctx->lock);
        INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
 
-       atomic_set(&net_device_ctx->vf_use_cnt, 0);
-       net_device_ctx->vf_netdev = NULL;
-       net_device_ctx->vf_inject = false;
-
        net->netdev_ops = &device_ops;
 
        net->hw_features = NETVSC_HW_FEATURES;
@@ -1539,13 +1511,21 @@ static int netvsc_netdev_event(struct notifier_block *this,
 {
        struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
 
+       /* Skip our own events */
+       if (event_dev->netdev_ops == &device_ops)
+               return NOTIFY_DONE;
+
+       /* Avoid non-Ethernet type devices */
+       if (event_dev->type != ARPHRD_ETHER)
+               return NOTIFY_DONE;
+
        /* Avoid Vlan dev with same MAC registering as VF */
        if (event_dev->priv_flags & IFF_802_1Q_VLAN)
                return NOTIFY_DONE;
 
        /* Avoid Bonding master dev with same MAC registering as VF */
-       if (event_dev->priv_flags & IFF_BONDING &&
-           event_dev->flags & IFF_MASTER)
+       if ((event_dev->priv_flags & IFF_BONDING) &&
+           (event_dev->flags & IFF_MASTER))
                return NOTIFY_DONE;
 
        switch (event) {
index 7756748..92af182 100644 (file)
@@ -424,10 +424,8 @@ static int xgene_mdio_remove(struct platform_device *pdev)
        mdiobus_unregister(mdio_bus);
        mdiobus_free(mdio_bus);
 
-       if (dev->of_node) {
-               if (IS_ERR(pdata->clk))
-                       clk_disable_unprepare(pdata->clk);
-       }
+       if (dev->of_node)
+               clk_disable_unprepare(pdata->clk);
 
        return 0;
 }
index 9338f58..44d439f 100644 (file)
@@ -32,7 +32,7 @@
 #define NETNEXT_VERSION                "08"
 
 /* Information for net */
-#define NET_VERSION            "5"
+#define NET_VERSION            "6"
 
 #define DRIVER_VERSION         "v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -2551,6 +2551,77 @@ static void r8152_aldps_en(struct r8152 *tp, bool enable)
        }
 }
 
+static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
+{
+       ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
+       ocp_reg_write(tp, OCP_EEE_DATA, reg);
+       ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
+}
+
+static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
+{
+       u16 data;
+
+       r8152_mmd_indirect(tp, dev, reg);
+       data = ocp_reg_read(tp, OCP_EEE_DATA);
+       ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
+
+       return data;
+}
+
+static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
+{
+       r8152_mmd_indirect(tp, dev, reg);
+       ocp_reg_write(tp, OCP_EEE_DATA, data);
+       ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
+}
+
+static void r8152_eee_en(struct r8152 *tp, bool enable)
+{
+       u16 config1, config2, config3;
+       u32 ocp_data;
+
+       ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
+       config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
+       config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
+       config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
+
+       if (enable) {
+               ocp_data |= EEE_RX_EN | EEE_TX_EN;
+               config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
+               config1 |= sd_rise_time(1);
+               config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
+               config3 |= fast_snr(42);
+       } else {
+               ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
+               config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
+                            RX_QUIET_EN);
+               config1 |= sd_rise_time(7);
+               config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
+               config3 |= fast_snr(511);
+       }
+
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
+       ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
+       ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
+       ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
+}
+
+static void r8152b_enable_eee(struct r8152 *tp)
+{
+       r8152_eee_en(tp, true);
+       r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
+}
+
+static void r8152b_enable_fc(struct r8152 *tp)
+{
+       u16 anar;
+
+       anar = r8152_mdio_read(tp, MII_ADVERTISE);
+       anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
+       r8152_mdio_write(tp, MII_ADVERTISE, anar);
+}
+
 static void rtl8152_disable(struct r8152 *tp)
 {
        r8152_aldps_en(tp, false);
@@ -2560,13 +2631,9 @@ static void rtl8152_disable(struct r8152 *tp)
 
 static void r8152b_hw_phy_cfg(struct r8152 *tp)
 {
-       u16 data;
-
-       data = r8152_mdio_read(tp, MII_BMCR);
-       if (data & BMCR_PDOWN) {
-               data &= ~BMCR_PDOWN;
-               r8152_mdio_write(tp, MII_BMCR, data);
-       }
+       r8152b_enable_eee(tp);
+       r8152_aldps_en(tp, true);
+       r8152b_enable_fc(tp);
 
        set_bit(PHY_RESET, &tp->flags);
 }
@@ -2700,20 +2767,52 @@ static void r8152b_enter_oob(struct r8152 *tp)
        ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
 }
 
+static void r8153_aldps_en(struct r8152 *tp, bool enable)
+{
+       u16 data;
+
+       data = ocp_reg_read(tp, OCP_POWER_CFG);
+       if (enable) {
+               data |= EN_ALDPS;
+               ocp_reg_write(tp, OCP_POWER_CFG, data);
+       } else {
+               data &= ~EN_ALDPS;
+               ocp_reg_write(tp, OCP_POWER_CFG, data);
+               msleep(20);
+       }
+}
+
+static void r8153_eee_en(struct r8152 *tp, bool enable)
+{
+       u32 ocp_data;
+       u16 config;
+
+       ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
+       config = ocp_reg_read(tp, OCP_EEE_CFG);
+
+       if (enable) {
+               ocp_data |= EEE_RX_EN | EEE_TX_EN;
+               config |= EEE10_EN;
+       } else {
+               ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
+               config &= ~EEE10_EN;
+       }
+
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
+       ocp_reg_write(tp, OCP_EEE_CFG, config);
+}
+
 static void r8153_hw_phy_cfg(struct r8152 *tp)
 {
        u32 ocp_data;
        u16 data;
 
-       if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
-           tp->version == RTL_VER_05)
-               ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+       /* disable ALDPS before updating the PHY parameters */
+       r8153_aldps_en(tp, false);
 
-       data = r8152_mdio_read(tp, MII_BMCR);
-       if (data & BMCR_PDOWN) {
-               data &= ~BMCR_PDOWN;
-               r8152_mdio_write(tp, MII_BMCR, data);
-       }
+       /* disable EEE before updating the PHY parameters */
+       r8153_eee_en(tp, false);
+       ocp_reg_write(tp, OCP_EEE_ADV, 0);
 
        if (tp->version == RTL_VER_03) {
                data = ocp_reg_read(tp, OCP_EEE_CFG);
@@ -2744,6 +2843,12 @@ static void r8153_hw_phy_cfg(struct r8152 *tp)
        sram_write(tp, SRAM_10M_AMP1, 0x00af);
        sram_write(tp, SRAM_10M_AMP2, 0x0208);
 
+       r8153_eee_en(tp, true);
+       ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
+
+       r8153_aldps_en(tp, true);
+       r8152b_enable_fc(tp);
+
        set_bit(PHY_RESET, &tp->flags);
 }
 
@@ -2865,21 +2970,6 @@ static void r8153_enter_oob(struct r8152 *tp)
        ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
 }
 
-static void r8153_aldps_en(struct r8152 *tp, bool enable)
-{
-       u16 data;
-
-       data = ocp_reg_read(tp, OCP_POWER_CFG);
-       if (enable) {
-               data |= EN_ALDPS;
-               ocp_reg_write(tp, OCP_POWER_CFG, data);
-       } else {
-               data &= ~EN_ALDPS;
-               ocp_reg_write(tp, OCP_POWER_CFG, data);
-               msleep(20);
-       }
-}
-
 static void rtl8153_disable(struct r8152 *tp)
 {
        r8153_aldps_en(tp, false);
@@ -3245,103 +3335,6 @@ static int rtl8152_close(struct net_device *netdev)
        return res;
 }
 
-static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
-{
-       ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
-       ocp_reg_write(tp, OCP_EEE_DATA, reg);
-       ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
-}
-
-static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
-{
-       u16 data;
-
-       r8152_mmd_indirect(tp, dev, reg);
-       data = ocp_reg_read(tp, OCP_EEE_DATA);
-       ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
-
-       return data;
-}
-
-static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
-{
-       r8152_mmd_indirect(tp, dev, reg);
-       ocp_reg_write(tp, OCP_EEE_DATA, data);
-       ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
-}
-
-static void r8152_eee_en(struct r8152 *tp, bool enable)
-{
-       u16 config1, config2, config3;
-       u32 ocp_data;
-
-       ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
-       config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
-       config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
-       config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
-
-       if (enable) {
-               ocp_data |= EEE_RX_EN | EEE_TX_EN;
-               config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
-               config1 |= sd_rise_time(1);
-               config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
-               config3 |= fast_snr(42);
-       } else {
-               ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
-               config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
-                            RX_QUIET_EN);
-               config1 |= sd_rise_time(7);
-               config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
-               config3 |= fast_snr(511);
-       }
-
-       ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
-       ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
-       ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
-       ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
-}
-
-static void r8152b_enable_eee(struct r8152 *tp)
-{
-       r8152_eee_en(tp, true);
-       r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
-}
-
-static void r8153_eee_en(struct r8152 *tp, bool enable)
-{
-       u32 ocp_data;
-       u16 config;
-
-       ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
-       config = ocp_reg_read(tp, OCP_EEE_CFG);
-
-       if (enable) {
-               ocp_data |= EEE_RX_EN | EEE_TX_EN;
-               config |= EEE10_EN;
-       } else {
-               ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
-               config &= ~EEE10_EN;
-       }
-
-       ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
-       ocp_reg_write(tp, OCP_EEE_CFG, config);
-}
-
-static void r8153_enable_eee(struct r8152 *tp)
-{
-       r8153_eee_en(tp, true);
-       ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
-}
-
-static void r8152b_enable_fc(struct r8152 *tp)
-{
-       u16 anar;
-
-       anar = r8152_mdio_read(tp, MII_ADVERTISE);
-       anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
-       r8152_mdio_write(tp, MII_ADVERTISE, anar);
-}
-
 static void rtl_tally_reset(struct r8152 *tp)
 {
        u32 ocp_data;
@@ -3354,10 +3347,17 @@ static void rtl_tally_reset(struct r8152 *tp)
 static void r8152b_init(struct r8152 *tp)
 {
        u32 ocp_data;
+       u16 data;
 
        if (test_bit(RTL8152_UNPLUG, &tp->flags))
                return;
 
+       data = r8152_mdio_read(tp, MII_BMCR);
+       if (data & BMCR_PDOWN) {
+               data &= ~BMCR_PDOWN;
+               r8152_mdio_write(tp, MII_BMCR, data);
+       }
+
        r8152_aldps_en(tp, false);
 
        if (tp->version == RTL_VER_01) {
@@ -3379,9 +3379,6 @@ static void r8152b_init(struct r8152 *tp)
                   SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK;
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data);
 
-       r8152b_enable_eee(tp);
-       r8152_aldps_en(tp, true);
-       r8152b_enable_fc(tp);
        rtl_tally_reset(tp);
 
        /* enable rx aggregation */
@@ -3393,12 +3390,12 @@ static void r8152b_init(struct r8152 *tp)
 static void r8153_init(struct r8152 *tp)
 {
        u32 ocp_data;
+       u16 data;
        int i;
 
        if (test_bit(RTL8152_UNPLUG, &tp->flags))
                return;
 
-       r8153_aldps_en(tp, false);
        r8153_u1u2en(tp, false);
 
        for (i = 0; i < 500; i++) {
@@ -3415,6 +3412,23 @@ static void r8153_init(struct r8152 *tp)
                msleep(20);
        }
 
+       if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
+           tp->version == RTL_VER_05)
+               ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+
+       data = r8152_mdio_read(tp, MII_BMCR);
+       if (data & BMCR_PDOWN) {
+               data &= ~BMCR_PDOWN;
+               r8152_mdio_write(tp, MII_BMCR, data);
+       }
+
+       for (i = 0; i < 500; i++) {
+               ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK;
+               if (ocp_data == PHY_STAT_LAN_ON)
+                       break;
+               msleep(20);
+       }
+
        usb_disable_lpm(tp->udev);
        r8153_u2p3en(tp, false);
 
@@ -3482,9 +3496,6 @@ static void r8153_init(struct r8152 *tp)
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
 
-       r8153_enable_eee(tp);
-       r8153_aldps_en(tp, true);
-       r8152b_enable_fc(tp);
        rtl_tally_reset(tp);
        r8153_u2p3en(tp, true);
 }
index 8b91544..f915024 100644 (file)
@@ -513,6 +513,15 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
        int hdrlen = ieee80211_hdrlen(hdr->frame_control);
        int queue;
 
+       /* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
+        * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
+        * queue. STATION (HS2.0) uses the auxiliary context of the FW,
+        * and hence needs to be sent on the aux queue
+        */
+       if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
+           skb_info->control.vif->type == NL80211_IFTYPE_STATION)
+               IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
+
        memcpy(&info, skb->cb, sizeof(info));
 
        if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU))
@@ -526,16 +535,6 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
        /* This holds the amsdu headers length */
        skb_info->driver_data[0] = (void *)(uintptr_t)0;
 
-       /*
-        * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
-        * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
-        * queue. STATION (HS2.0) uses the auxiliary context of the FW,
-        * and hence needs to be sent on the aux queue
-        */
-       if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
-           info.control.vif->type == NL80211_IFTYPE_STATION)
-               IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
-
        queue = info.hw_queue;
 
        /*
index 3a56268..b38fb2c 100644 (file)
@@ -292,8 +292,6 @@ struct xenvif {
 #endif
 
        struct xen_netif_ctrl_back_ring ctrl;
-       struct task_struct *ctrl_task;
-       wait_queue_head_t ctrl_wq;
        unsigned int ctrl_irq;
 
        /* Miscellaneous private stuff. */
@@ -359,7 +357,7 @@ void xenvif_kick_thread(struct xenvif_queue *queue);
 
 int xenvif_dealloc_kthread(void *data);
 
-int xenvif_ctrl_kthread(void *data);
+irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
 
 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
 
@@ -412,8 +410,4 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
 
 void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb);
 
-#ifdef CONFIG_DEBUG_FS
-void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m);
-#endif
-
 #endif /* __XEN_NETBACK__COMMON_H__ */
index e8c5ddd..613bac0 100644 (file)
@@ -360,74 +360,6 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
        return XEN_NETIF_CTRL_STATUS_SUCCESS;
 }
 
-#ifdef CONFIG_DEBUG_FS
-void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m)
-{
-       unsigned int i;
-
-       switch (vif->hash.alg) {
-       case XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ:
-               seq_puts(m, "Hash Algorithm: TOEPLITZ\n");
-               break;
-
-       case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE:
-               seq_puts(m, "Hash Algorithm: NONE\n");
-               /* FALLTHRU */
-       default:
-               return;
-       }
-
-       if (vif->hash.flags) {
-               seq_puts(m, "\nHash Flags:\n");
-
-               if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4)
-                       seq_puts(m, "- IPv4\n");
-               if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)
-                       seq_puts(m, "- IPv4 + TCP\n");
-               if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6)
-                       seq_puts(m, "- IPv6\n");
-               if (vif->hash.flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)
-                       seq_puts(m, "- IPv6 + TCP\n");
-       }
-
-       seq_puts(m, "\nHash Key:\n");
-
-       for (i = 0; i < XEN_NETBK_MAX_HASH_KEY_SIZE; ) {
-               unsigned int j, n;
-
-               n = 8;
-               if (i + n >= XEN_NETBK_MAX_HASH_KEY_SIZE)
-                       n = XEN_NETBK_MAX_HASH_KEY_SIZE - i;
-
-               seq_printf(m, "[%2u - %2u]: ", i, i + n - 1);
-
-               for (j = 0; j < n; j++, i++)
-                       seq_printf(m, "%02x ", vif->hash.key[i]);
-
-               seq_puts(m, "\n");
-       }
-
-       if (vif->hash.size != 0) {
-               seq_puts(m, "\nHash Mapping:\n");
-
-               for (i = 0; i < vif->hash.size; ) {
-                       unsigned int j, n;
-
-                       n = 8;
-                       if (i + n >= vif->hash.size)
-                               n = vif->hash.size - i;
-
-                       seq_printf(m, "[%4u - %4u]: ", i, i + n - 1);
-
-                       for (j = 0; j < n; j++, i++)
-                               seq_printf(m, "%4u ", vif->hash.mapping[i]);
-
-                       seq_puts(m, "\n");
-               }
-       }
-}
-#endif /* CONFIG_DEBUG_FS */
-
 void xenvif_init_hash(struct xenvif *vif)
 {
        if (xenvif_hash_cache_size == 0)
index 83deeeb..fb50c6d 100644 (file)
@@ -128,15 +128,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-irqreturn_t xenvif_ctrl_interrupt(int irq, void *dev_id)
-{
-       struct xenvif *vif = dev_id;
-
-       wake_up(&vif->ctrl_wq);
-
-       return IRQ_HANDLED;
-}
-
 int xenvif_queue_stopped(struct xenvif_queue *queue)
 {
        struct net_device *dev = queue->vif->dev;
@@ -570,8 +561,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
        struct net_device *dev = vif->dev;
        void *addr;
        struct xen_netif_ctrl_sring *shared;
-       struct task_struct *task;
-       int err = -ENOMEM;
+       int err;
 
        err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
                                     &ring_ref, 1, &addr);
@@ -581,11 +571,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
        shared = (struct xen_netif_ctrl_sring *)addr;
        BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE);
 
-       init_waitqueue_head(&vif->ctrl_wq);
-
-       err = bind_interdomain_evtchn_to_irqhandler(vif->domid, evtchn,
-                                                   xenvif_ctrl_interrupt,
-                                                   0, dev->name, vif);
+       err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn);
        if (err < 0)
                goto err_unmap;
 
@@ -593,19 +579,13 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
 
        xenvif_init_hash(vif);
 
-       task = kthread_create(xenvif_ctrl_kthread, (void *)vif,
-                             "%s-control", dev->name);
-       if (IS_ERR(task)) {
-               pr_warn("Could not allocate kthread for %s\n", dev->name);
-               err = PTR_ERR(task);
+       err = request_threaded_irq(vif->ctrl_irq, NULL, xenvif_ctrl_irq_fn,
+                                  IRQF_ONESHOT, "xen-netback-ctrl", vif);
+       if (err) {
+               pr_warn("Could not setup irq handler for %s\n", dev->name);
                goto err_deinit;
        }
 
-       get_task_struct(task);
-       vif->ctrl_task = task;
-
-       wake_up_process(vif->ctrl_task);
-
        return 0;
 
 err_deinit:
@@ -774,12 +754,6 @@ void xenvif_disconnect_data(struct xenvif *vif)
 
 void xenvif_disconnect_ctrl(struct xenvif *vif)
 {
-       if (vif->ctrl_task) {
-               kthread_stop(vif->ctrl_task);
-               put_task_struct(vif->ctrl_task);
-               vif->ctrl_task = NULL;
-       }
-
        if (vif->ctrl_irq) {
                xenvif_deinit_hash(vif);
                unbind_from_irqhandler(vif->ctrl_irq, vif);
index edbae0b..3d0c989 100644 (file)
@@ -2359,24 +2359,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif)
        return 0;
 }
 
-int xenvif_ctrl_kthread(void *data)
+irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
 {
        struct xenvif *vif = data;
 
-       for (;;) {
-               wait_event_interruptible(vif->ctrl_wq,
-                                        xenvif_ctrl_work_todo(vif) ||
-                                        kthread_should_stop());
-               if (kthread_should_stop())
-                       break;
-
-               while (xenvif_ctrl_work_todo(vif))
-                       xenvif_ctrl_action(vif);
+       while (xenvif_ctrl_work_todo(vif))
+               xenvif_ctrl_action(vif);
 
-               cond_resched();
-       }
-
-       return 0;
+       return IRQ_HANDLED;
 }
 
 static int __init netback_init(void)
index bacf6e0..daf4c78 100644 (file)
@@ -165,7 +165,7 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
        return count;
 }
 
-static int xenvif_io_ring_open(struct inode *inode, struct file *filp)
+static int xenvif_dump_open(struct inode *inode, struct file *filp)
 {
        int ret;
        void *queue = NULL;
@@ -179,35 +179,13 @@ static int xenvif_io_ring_open(struct inode *inode, struct file *filp)
 
 static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
        .owner = THIS_MODULE,
-       .open = xenvif_io_ring_open,
+       .open = xenvif_dump_open,
        .read = seq_read,
        .llseek = seq_lseek,
        .release = single_release,
        .write = xenvif_write_io_ring,
 };
 
-static int xenvif_read_ctrl(struct seq_file *m, void *v)
-{
-       struct xenvif *vif = m->private;
-
-       xenvif_dump_hash_info(vif, m);
-
-       return 0;
-}
-
-static int xenvif_ctrl_open(struct inode *inode, struct file *filp)
-{
-       return single_open(filp, xenvif_read_ctrl, inode->i_private);
-}
-
-static const struct file_operations xenvif_dbg_ctrl_ops_fops = {
-       .owner = THIS_MODULE,
-       .open = xenvif_ctrl_open,
-       .read = seq_read,
-       .llseek = seq_lseek,
-       .release = single_release,
-};
-
 static void xenvif_debugfs_addif(struct xenvif *vif)
 {
        struct dentry *pfile;
@@ -232,17 +210,6 @@ static void xenvif_debugfs_addif(struct xenvif *vif)
                                pr_warn("Creation of io_ring file returned %ld!\n",
                                        PTR_ERR(pfile));
                }
-
-               if (vif->ctrl_task) {
-                       pfile = debugfs_create_file("ctrl",
-                                                   S_IRUSR,
-                                                   vif->xenvif_dbg_root,
-                                                   vif,
-                                                   &xenvif_dbg_ctrl_ops_fops);
-                       if (IS_ERR_OR_NULL(pfile))
-                               pr_warn("Creation of ctrl file returned %ld!\n",
-                                       PTR_ERR(pfile));
-               }
        } else
                netdev_warn(vif->dev,
                            "Creation of vif debugfs dir returned %ld!\n",
@@ -304,6 +271,11 @@ static int netback_probe(struct xenbus_device *dev,
        be->dev = dev;
        dev_set_drvdata(&dev->dev, be);
 
+       be->state = XenbusStateInitialising;
+       err = xenbus_switch_state(dev, XenbusStateInitialising);
+       if (err)
+               goto fail;
+
        sg = 1;
 
        do {
@@ -416,11 +388,6 @@ static int netback_probe(struct xenbus_device *dev,
 
        be->hotplug_script = script;
 
-       err = xenbus_switch_state(dev, XenbusStateInitWait);
-       if (err)
-               goto fail;
-
-       be->state = XenbusStateInitWait;
 
        /* This kicks hotplug scripts, so do it immediately. */
        err = backend_create_xenvif(be);
@@ -525,20 +492,20 @@ static inline void backend_switch_state(struct backend_info *be,
 
 /* Handle backend state transitions:
  *
- * The backend state starts in InitWait and the following transitions are
+ * The backend state starts in Initialising and the following transitions are
  * allowed.
  *
- * InitWait -> Connected
- *
- *    ^    \         |
- *    |     \        |
- *    |      \       |
- *    |       \      |
- *    |        \     |
- *    |         \    |
- *    |          V   V
+ * Initialising -> InitWait -> Connected
+ *          \
+ *           \        ^    \         |
+ *            \       |     \        |
+ *             \      |      \       |
+ *              \     |       \      |
+ *               \    |        \     |
+ *                \   |         \    |
+ *                 V  |          V   V
  *
- *  Closed  <-> Closing
+ *                  Closed  <-> Closing
  *
  * The state argument specifies the eventual state of the backend and the
  * function transitions to that state via the shortest path.
@@ -548,6 +515,20 @@ static void set_backend_state(struct backend_info *be,
 {
        while (be->state != state) {
                switch (be->state) {
+               case XenbusStateInitialising:
+                       switch (state) {
+                       case XenbusStateInitWait:
+                       case XenbusStateConnected:
+                       case XenbusStateClosing:
+                               backend_switch_state(be, XenbusStateInitWait);
+                               break;
+                       case XenbusStateClosed:
+                               backend_switch_state(be, XenbusStateClosed);
+                               break;
+                       default:
+                               BUG();
+                       }
+                       break;
                case XenbusStateClosed:
                        switch (state) {
                        case XenbusStateInitWait:
index 8dcf5a9..60f7eab 100644 (file)
@@ -1693,7 +1693,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
                nvme_suspend_queue(dev->queues[i]);
 
        if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
-               nvme_suspend_queue(dev->queues[0]);
+               /* A device might become IO incapable very soon during
+                * probe, before the admin queue is configured. Thus,
+                * queue_count can be 0 here.
+                */
+               if (dev->queue_count)
+                       nvme_suspend_queue(dev->queues[0]);
        } else {
                nvme_disable_io_queues(dev);
                nvme_disable_admin_queue(dev, shutdown);
@@ -2112,6 +2117,8 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
        { PCI_DEVICE(0x1c58, 0x0003),   /* HGST adapter */
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+       { PCI_DEVICE(0x1c5f, 0x0540),   /* Memblaze Pblaze4 adapter */
+               .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
        { 0, }
index ab545fb..c2c2c28 100644 (file)
@@ -82,6 +82,8 @@ struct nvme_rdma_request {
 
 enum nvme_rdma_queue_flags {
        NVME_RDMA_Q_CONNECTED = (1 << 0),
+       NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
+       NVME_RDMA_Q_DELETING = (1 << 2),
 };
 
 struct nvme_rdma_queue {
@@ -291,6 +293,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
        if (IS_ERR(req->mr)) {
                ret = PTR_ERR(req->mr);
                req->mr = NULL;
+               goto out;
        }
 
        req->mr->need_inval = false;
@@ -480,9 +483,14 @@ out_err:
 
 static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 {
-       struct nvme_rdma_device *dev = queue->device;
-       struct ib_device *ibdev = dev->dev;
+       struct nvme_rdma_device *dev;
+       struct ib_device *ibdev;
+
+       if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
+               return;
 
+       dev = queue->device;
+       ibdev = dev->dev;
        rdma_destroy_qp(queue->cm_id);
        ib_free_cq(queue->ib_cq);
 
@@ -533,6 +541,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
                ret = -ENOMEM;
                goto out_destroy_qp;
        }
+       set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
 
        return 0;
 
@@ -552,6 +561,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
 
        queue = &ctrl->queues[idx];
        queue->ctrl = ctrl;
+       queue->flags = 0;
        init_completion(&queue->cm_done);
 
        if (idx > 0)
@@ -590,6 +600,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
        return 0;
 
 out_destroy_cm_id:
+       nvme_rdma_destroy_queue_ib(queue);
        rdma_destroy_id(queue->cm_id);
        return ret;
 }
@@ -608,7 +619,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
 
 static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue)
 {
-       if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
+       if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
                return;
        nvme_rdma_stop_queue(queue);
        nvme_rdma_free_queue(queue);
@@ -652,7 +663,7 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl)
        return 0;
 
 out_free_queues:
-       for (; i >= 1; i--)
+       for (i--; i >= 1; i--)
                nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
 
        return ret;
@@ -761,8 +772,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 {
        struct nvme_rdma_ctrl *ctrl = container_of(work,
                        struct nvme_rdma_ctrl, err_work);
+       int i;
 
        nvme_stop_keep_alive(&ctrl->ctrl);
+
+       for (i = 0; i < ctrl->queue_count; i++)
+               clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+
        if (ctrl->queue_count > 1)
                nvme_stop_queues(&ctrl->ctrl);
        blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1305,58 +1321,6 @@ out_destroy_queue_ib:
        return ret;
 }
 
-/**
- * nvme_rdma_device_unplug() - Handle RDMA device unplug
- * @queue:      Queue that owns the cm_id that caught the event
- *
- * DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
- *
- * In our case, the RDMA resources are managed per controller and not
- * only per queue. So the way we handle this is we trigger an implicit
- * controller deletion upon the first DEVICE_REMOVAL event we see, and
- * hold the event inflight until the controller deletion is completed.
- *
- * One exception that we need to handle is the destruction of the cm_id
- * that caught the event. Since we hold the callout until the controller
- * deletion is completed, we'll deadlock if the controller deletion will
- * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
- * of destroying this queue before-hand, destroy the queue resources,
- * then queue the controller deletion which won't destroy this queue and
- * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
- */
-static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
-{
-       struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-       int ret = 0;
-
-       /* Own the controller deletion */
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
-               return 0;
-
-       dev_warn(ctrl->ctrl.device,
-               "Got rdma device removal event, deleting ctrl\n");
-
-       /* Get rid of reconnect work if its running */
-       cancel_delayed_work_sync(&ctrl->reconnect_work);
-
-       /* Disable the queue so ctrl delete won't free it */
-       if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
-               /* Free this queue ourselves */
-               nvme_rdma_stop_queue(queue);
-               nvme_rdma_destroy_queue_ib(queue);
-
-               /* Return non-zero so the cm_id will destroy implicitly */
-               ret = 1;
-       }
-
-       /* Queue controller deletion */
-       queue_work(nvme_rdma_wq, &ctrl->delete_work);
-       flush_work(&ctrl->delete_work);
-       return ret;
-}
-
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                struct rdma_cm_event *ev)
 {
@@ -1398,8 +1362,8 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                nvme_rdma_error_recovery(queue->ctrl);
                break;
        case RDMA_CM_EVENT_DEVICE_REMOVAL:
-               /* return 1 means impliciy CM ID destroy */
-               return nvme_rdma_device_unplug(queue);
+               /* device removal is handled via the ib_client API */
+               break;
        default:
                dev_err(queue->ctrl->ctrl.device,
                        "Unexpected RDMA CM event (%d)\n", ev->event);
@@ -1700,15 +1664,19 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
 static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
 {
        struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-       int ret;
+       int ret = 0;
 
+       /*
+        * Keep a reference until all work is flushed since
+        * __nvme_rdma_del_ctrl can free the ctrl mem
+        */
+       if (!kref_get_unless_zero(&ctrl->ctrl.kref))
+               return -EBUSY;
        ret = __nvme_rdma_del_ctrl(ctrl);
-       if (ret)
-               return ret;
-
-       flush_work(&ctrl->delete_work);
-
-       return 0;
+       if (!ret)
+               flush_work(&ctrl->delete_work);
+       nvme_put_ctrl(&ctrl->ctrl);
+       return ret;
 }
 
 static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
@@ -2005,27 +1973,57 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
        .create_ctrl    = nvme_rdma_create_ctrl,
 };
 
+static void nvme_rdma_add_one(struct ib_device *ib_device)
+{
+}
+
+static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+       struct nvme_rdma_ctrl *ctrl;
+
+       /* Delete all controllers using this device */
+       mutex_lock(&nvme_rdma_ctrl_mutex);
+       list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
+               if (ctrl->device->dev != ib_device)
+                       continue;
+               dev_info(ctrl->ctrl.device,
+                       "Removing ctrl: NQN \"%s\", addr %pISp\n",
+                       ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
+               __nvme_rdma_del_ctrl(ctrl);
+       }
+       mutex_unlock(&nvme_rdma_ctrl_mutex);
+
+       flush_workqueue(nvme_rdma_wq);
+}
+
+static struct ib_client nvme_rdma_ib_client = {
+       .name   = "nvme_rdma",
+       .add = nvme_rdma_add_one,
+       .remove = nvme_rdma_remove_one
+};
+
 static int __init nvme_rdma_init_module(void)
 {
+       int ret;
+
        nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
        if (!nvme_rdma_wq)
                return -ENOMEM;
 
+       ret = ib_register_client(&nvme_rdma_ib_client);
+       if (ret) {
+               destroy_workqueue(nvme_rdma_wq);
+               return ret;
+       }
+
        nvmf_register_transport(&nvme_rdma_transport);
        return 0;
 }
 
 static void __exit nvme_rdma_cleanup_module(void)
 {
-       struct nvme_rdma_ctrl *ctrl;
-
        nvmf_unregister_transport(&nvme_rdma_transport);
-
-       mutex_lock(&nvme_rdma_ctrl_mutex);
-       list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
-               __nvme_rdma_del_ctrl(ctrl);
-       mutex_unlock(&nvme_rdma_ctrl_mutex);
-
+       ib_unregister_client(&nvme_rdma_ib_client);
        destroy_workqueue(nvme_rdma_wq);
 }
 
index d1ef7ac..f9357e0 100644 (file)
@@ -40,6 +40,7 @@ static void pci_destroy_dev(struct pci_dev *dev)
        list_del(&dev->bus_list);
        up_write(&pci_bus_sem);
 
+       pci_bridge_d3_device_removed(dev);
        pci_free_resources(dev);
        put_device(&dev->dev);
 }
@@ -96,8 +97,6 @@ static void pci_remove_bus_device(struct pci_dev *dev)
                dev->subordinate = NULL;
        }
 
-       pci_bridge_d3_device_removed(dev);
-
        pci_destroy_dev(dev);
 }
 
index 489ea10..69b5e81 100644 (file)
@@ -977,7 +977,7 @@ static int pcmcia_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 
 /************************ runtime PM support ***************************/
 
-static int pcmcia_dev_suspend(struct device *dev, pm_message_t state);
+static int pcmcia_dev_suspend(struct device *dev);
 static int pcmcia_dev_resume(struct device *dev);
 
 static int runtime_suspend(struct device *dev)
@@ -985,7 +985,7 @@ static int runtime_suspend(struct device *dev)
        int rc;
 
        device_lock(dev);
-       rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND);
+       rc = pcmcia_dev_suspend(dev);
        device_unlock(dev);
        return rc;
 }
@@ -1135,7 +1135,7 @@ ATTRIBUTE_GROUPS(pcmcia_dev);
 
 /* PM support, also needed for reset */
 
-static int pcmcia_dev_suspend(struct device *dev, pm_message_t state)
+static int pcmcia_dev_suspend(struct device *dev)
 {
        struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
        struct pcmcia_driver *p_drv = NULL;
@@ -1410,6 +1410,9 @@ static struct class_interface pcmcia_bus_interface __refdata = {
        .remove_dev = &pcmcia_bus_remove_socket,
 };
 
+static const struct dev_pm_ops pcmcia_bus_pm_ops = {
+       SET_SYSTEM_SLEEP_PM_OPS(pcmcia_dev_suspend, pcmcia_dev_resume)
+};
 
 struct bus_type pcmcia_bus_type = {
        .name = "pcmcia",
@@ -1418,8 +1421,7 @@ struct bus_type pcmcia_bus_type = {
        .dev_groups = pcmcia_dev_groups,
        .probe = pcmcia_device_probe,
        .remove = pcmcia_device_remove,
-       .suspend = pcmcia_dev_suspend,
-       .resume = pcmcia_dev_resume,
+       .pm = &pcmcia_bus_pm_ops,
 };
 
 
index 483f919..91b5f57 100644 (file)
@@ -214,9 +214,8 @@ pxa2xx_pcmcia_frequency_change(struct soc_pcmcia_socket *skt,
 }
 #endif
 
-void pxa2xx_configure_sockets(struct device *dev)
+void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops)
 {
-       struct pcmcia_low_level *ops = dev->platform_data;
        /*
         * We have at least one socket, so set MECR:CIT
         * (Card Is There)
@@ -322,7 +321,7 @@ static int pxa2xx_drv_pcmcia_probe(struct platform_device *dev)
                        goto err1;
        }
 
-       pxa2xx_configure_sockets(&dev->dev);
+       pxa2xx_configure_sockets(&dev->dev, ops);
        dev_set_drvdata(&dev->dev, sinfo);
 
        return 0;
@@ -348,7 +347,9 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev)
 
 static int pxa2xx_drv_pcmcia_resume(struct device *dev)
 {
-       pxa2xx_configure_sockets(dev);
+       struct pcmcia_low_level *ops = (struct pcmcia_low_level *)dev->platform_data;
+
+       pxa2xx_configure_sockets(dev, ops);
        return 0;
 }
 
index b609b45..e58c7a4 100644 (file)
@@ -1,4 +1,4 @@
 int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt);
 void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops);
-void pxa2xx_configure_sockets(struct device *dev);
+void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops);
 
index 12f0dd0..2f49093 100644 (file)
@@ -134,20 +134,14 @@ static struct pcmcia_low_level badge4_pcmcia_ops = {
 
 int pcmcia_badge4_init(struct sa1111_dev *dev)
 {
-       int ret = -ENODEV;
-
-       if (machine_is_badge4()) {
-               printk(KERN_INFO
-                      "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n",
-                      __func__,
-                      badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc);
-
-               sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops);
-               ret = sa1111_pcmcia_add(dev, &badge4_pcmcia_ops,
-                               sa11xx_drv_pcmcia_add_one);
-       }
-
-       return ret;
+       printk(KERN_INFO
+              "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n",
+              __func__,
+              badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc);
+
+       sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops);
+       return sa1111_pcmcia_add(dev, &badge4_pcmcia_ops,
+                                sa11xx_drv_pcmcia_add_one);
 }
 
 static int __init pcmv_setup(char *s)
index a1531fe..3d95dff 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <mach/hardware.h>
 #include <asm/hardware/sa1111.h>
+#include <asm/mach-types.h>
 #include <asm/irq.h>
 
 #include "sa1111_generic.h"
@@ -203,19 +204,30 @@ static int pcmcia_probe(struct sa1111_dev *dev)
        sa1111_writel(PCSSR_S0_SLEEP | PCSSR_S1_SLEEP, base + PCSSR);
        sa1111_writel(PCCR_S0_FLT | PCCR_S1_FLT, base + PCCR);
 
+       ret = -ENODEV;
 #ifdef CONFIG_SA1100_BADGE4
-       pcmcia_badge4_init(dev);
+       if (machine_is_badge4())
+               ret = pcmcia_badge4_init(dev);
 #endif
 #ifdef CONFIG_SA1100_JORNADA720
-       pcmcia_jornada720_init(dev);
+       if (machine_is_jornada720())
+               ret = pcmcia_jornada720_init(dev);
 #endif
 #ifdef CONFIG_ARCH_LUBBOCK
-       pcmcia_lubbock_init(dev);
+       if (machine_is_lubbock())
+               ret = pcmcia_lubbock_init(dev);
 #endif
 #ifdef CONFIG_ASSABET_NEPONSET
-       pcmcia_neponset_init(dev);
+       if (machine_is_assabet())
+               ret = pcmcia_neponset_init(dev);
 #endif
-       return 0;
+
+       if (ret) {
+               release_mem_region(dev->res.start, 512);
+               sa1111_disable_device(dev);
+       }
+
+       return ret;
 }
 
 static int pcmcia_remove(struct sa1111_dev *dev)
index c2c3058..480a3ed 100644 (file)
@@ -94,22 +94,17 @@ static struct pcmcia_low_level jornada720_pcmcia_ops = {
 
 int pcmcia_jornada720_init(struct sa1111_dev *sadev)
 {
-       int ret = -ENODEV;
+       unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3;
 
-       if (machine_is_jornada720()) {
-               unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3;
+       /* Fixme: why messing around with SA11x0's GPIO1? */
+       GRER |= 0x00000002;
 
-               GRER |= 0x00000002;
+       /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */
+       sa1111_set_io_dir(sadev, pin, 0, 0);
+       sa1111_set_io(sadev, pin, 0);
+       sa1111_set_sleep_io(sadev, pin, 0);
 
-               /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */
-               sa1111_set_io_dir(sadev, pin, 0, 0);
-               sa1111_set_io(sadev, pin, 0);
-               sa1111_set_sleep_io(sadev, pin, 0);
-
-               sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops);
-               ret = sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops,
-                               sa11xx_drv_pcmcia_add_one);
-       }
-
-       return ret;
+       sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops);
+       return sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops,
+                                sa11xx_drv_pcmcia_add_one);
 }
index c5caf57..e741f49 100644 (file)
@@ -210,27 +210,21 @@ static struct pcmcia_low_level lubbock_pcmcia_ops = {
 
 int pcmcia_lubbock_init(struct sa1111_dev *sadev)
 {
-       int ret = -ENODEV;
-
-       if (machine_is_lubbock()) {
-               /*
-                * Set GPIO_A<3:0> to be outputs for the MAX1600,
-                * and switch to standby mode.
-                */
-               sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0);
-               sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
-               sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
-
-               /* Set CF Socket 1 power to standby mode. */
-               lubbock_set_misc_wr((1 << 15) | (1 << 14), 0);
+       /*
+        * Set GPIO_A<3:0> to be outputs for the MAX1600,
+        * and switch to standby mode.
+        */
+       sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0);
+       sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
+       sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
 
-               pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops);
-               pxa2xx_configure_sockets(&sadev->dev);
-               ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops,
-                               pxa2xx_drv_pcmcia_add_one);
-       }
+       /* Set CF Socket 1 power to standby mode. */
+       lubbock_set_misc_wr((1 << 15) | (1 << 14), 0);
 
-       return ret;
+       pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops);
+       pxa2xx_configure_sockets(&sadev->dev, &lubbock_pcmcia_ops);
+       return sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops,
+                                pxa2xx_drv_pcmcia_add_one);
 }
 
 MODULE_LICENSE("GPL");
index 1d78739..019c395 100644 (file)
@@ -110,20 +110,14 @@ static struct pcmcia_low_level neponset_pcmcia_ops = {
 
 int pcmcia_neponset_init(struct sa1111_dev *sadev)
 {
-       int ret = -ENODEV;
-
-       if (machine_is_assabet()) {
-               /*
-                * Set GPIO_A<3:0> to be outputs for the MAX1600,
-                * and switch to standby mode.
-                */
-               sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0);
-               sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
-               sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
-               sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops);
-               ret = sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops,
-                               sa11xx_drv_pcmcia_add_one);
-       }
-
-       return ret;
+       /*
+        * Set GPIO_A<3:0> to be outputs for the MAX1600,
+        * and switch to standby mode.
+        */
+       sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0);
+       sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
+       sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
+       sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops);
+       return sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops,
+                                sa11xx_drv_pcmcia_add_one);
 }
index 9f6ec87..48140ac 100644 (file)
@@ -144,19 +144,19 @@ static int
 sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf)
 {
        struct soc_pcmcia_timing timing;
-       unsigned int clock = clk_get_rate(skt->clk);
+       unsigned int clock = clk_get_rate(skt->clk) / 1000;
        unsigned long mecr = MECR;
        char *p = buf;
 
        soc_common_pcmcia_get_timing(skt, &timing);
 
-       p+=sprintf(p, "I/O      : %u (%u)\n", timing.io,
+       p+=sprintf(p, "I/O      : %uns (%uns)\n", timing.io,
                   sa1100_pcmcia_cmd_time(clock, MECR_BSIO_GET(mecr, skt->nr)));
 
-       p+=sprintf(p, "attribute: %u (%u)\n", timing.attr,
+       p+=sprintf(p, "attribute: %uns (%uns)\n", timing.attr,
                   sa1100_pcmcia_cmd_time(clock, MECR_BSA_GET(mecr, skt->nr)));
 
-       p+=sprintf(p, "common   : %u (%u)\n", timing.mem,
+       p+=sprintf(p, "common   : %uns (%uns)\n", timing.mem,
                   sa1100_pcmcia_cmd_time(clock, MECR_BSM_GET(mecr, skt->nr)));
 
        return p - buf;
index eed5e9c..d5ca760 100644 (file)
@@ -235,7 +235,7 @@ static unsigned int soc_common_pcmcia_skt_state(struct soc_pcmcia_socket *skt)
        stat |= skt->cs_state.Vcc ? SS_POWERON : 0;
 
        if (skt->cs_state.flags & SS_IOCARD)
-               stat |= state.bvd1 ? SS_STSCHG : 0;
+               stat |= state.bvd1 ? 0 : SS_STSCHG;
        else {
                if (state.bvd1 == 0)
                        stat |= SS_BATDEAD;
index 3fa17ac..cebc296 100644 (file)
@@ -2247,17 +2247,30 @@ static int rio_cm_shutdown(struct notifier_block *nb, unsigned long code,
 {
        struct rio_channel *ch;
        unsigned int i;
+       LIST_HEAD(list);
 
        riocm_debug(EXIT, ".");
 
+       /*
+        * If there are any channels left in connected state send
+        * close notification to the connection partner.
+        * First build a list of channels that require a closing
+        * notification because function riocm_send_close() should
+        * be called outside of spinlock protected code.
+        */
        spin_lock_bh(&idr_lock);
        idr_for_each_entry(&ch_idr, ch, i) {
-               riocm_debug(EXIT, "close ch %d", ch->id);
-               if (ch->state == RIO_CM_CONNECTED)
-                       riocm_send_close(ch);
+               if (ch->state == RIO_CM_CONNECTED) {
+                       riocm_debug(EXIT, "close ch %d", ch->id);
+                       idr_remove(&ch_idr, ch->id);
+                       list_add(&ch->ch_node, &list);
+               }
        }
        spin_unlock_bh(&idr_lock);
 
+       list_for_each_entry(ch, &list, ch_node)
+               riocm_send_close(ch);
+
        return NOTIFY_DONE;
 }
 
index bf40063..6d4b68c 100644 (file)
@@ -999,6 +999,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
                                                 __u16, __u16,
                                                 enum qeth_prot_versions);
 int qeth_set_features(struct net_device *, netdev_features_t);
+int qeth_recover_features(struct net_device *);
 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
 
 /* exports for OSN */
index 7dba6c8..20cf296 100644 (file)
@@ -3619,7 +3619,8 @@ static void qeth_qdio_cq_handler(struct qeth_card *card,
                int e;
 
                e = 0;
-               while (buffer->element[e].addr) {
+               while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) &&
+                      buffer->element[e].addr) {
                        unsigned long phys_aob_addr;
 
                        phys_aob_addr = (unsigned long) buffer->element[e].addr;
@@ -6131,6 +6132,35 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on)
        return rc;
 }
 
+/* try to restore device features on a device after recovery */
+int qeth_recover_features(struct net_device *dev)
+{
+       struct qeth_card *card = dev->ml_priv;
+       netdev_features_t recover = dev->features;
+
+       if (recover & NETIF_F_IP_CSUM) {
+               if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM))
+                       recover ^= NETIF_F_IP_CSUM;
+       }
+       if (recover & NETIF_F_RXCSUM) {
+               if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM))
+                       recover ^= NETIF_F_RXCSUM;
+       }
+       if (recover & NETIF_F_TSO) {
+               if (qeth_set_ipa_tso(card, 1))
+                       recover ^= NETIF_F_TSO;
+       }
+
+       if (recover == dev->features)
+               return 0;
+
+       dev_warn(&card->gdev->dev,
+                "Device recovery failed to restore all offload features\n");
+       dev->features = recover;
+       return -EIO;
+}
+EXPORT_SYMBOL_GPL(qeth_recover_features);
+
 int qeth_set_features(struct net_device *dev, netdev_features_t features)
 {
        struct qeth_card *card = dev->ml_priv;
index 7bc20c5..bb27058 100644 (file)
@@ -1124,14 +1124,11 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
                        card->dev->hw_features |= NETIF_F_RXCSUM;
                        card->dev->vlan_features |= NETIF_F_RXCSUM;
                }
-               /* Turn on SG per default */
-               card->dev->features |= NETIF_F_SG;
        }
        card->info.broadcast_capable = 1;
        qeth_l2_request_initial_mac(card);
        card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
                                  PAGE_SIZE;
-       card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1);
        SET_NETDEV_DEV(card->dev, &card->gdev->dev);
        netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT);
        netif_carrier_off(card->dev);
@@ -1246,6 +1243,9 @@ contin:
                }
                /* this also sets saved unicast addresses */
                qeth_l2_set_rx_mode(card->dev);
+               rtnl_lock();
+               qeth_recover_features(card->dev);
+               rtnl_unlock();
        }
        /* let user_space know that device is online */
        kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
index 7293466..272d9e7 100644 (file)
@@ -257,6 +257,11 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
        if (addr->in_progress)
                return -EINPROGRESS;
 
+       if (!qeth_card_hw_is_reachable(card)) {
+               addr->disp_flag = QETH_DISP_ADDR_DELETE;
+               return 0;
+       }
+
        rc = qeth_l3_deregister_addr_entry(card, addr);
 
        hash_del(&addr->hnode);
@@ -296,6 +301,11 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
                hash_add(card->ip_htable, &addr->hnode,
                                qeth_l3_ipaddr_hash(addr));
 
+               if (!qeth_card_hw_is_reachable(card)) {
+                       addr->disp_flag = QETH_DISP_ADDR_ADD;
+                       return 0;
+               }
+
                /* qeth_l3_register_addr_entry can go to sleep
                 * if we add a IPV4 addr. It is caused by the reason
                 * that SETIP ipa cmd starts ARP staff for IPV4 addr.
@@ -390,12 +400,16 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
        int i;
        int rc;
 
-       QETH_CARD_TEXT(card, 4, "recoverip");
+       QETH_CARD_TEXT(card, 4, "recovrip");
 
        spin_lock_bh(&card->ip_lock);
 
        hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
-               if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
+               if (addr->disp_flag == QETH_DISP_ADDR_DELETE) {
+                       qeth_l3_deregister_addr_entry(card, addr);
+                       hash_del(&addr->hnode);
+                       kfree(addr);
+               } else if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
                        if (addr->proto == QETH_PROT_IPV4) {
                                addr->in_progress = 1;
                                spin_unlock_bh(&card->ip_lock);
@@ -407,10 +421,8 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
 
                        if (!rc) {
                                addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
-                               if (addr->ref_counter < 1) {
+                               if (addr->ref_counter < 1)
                                        qeth_l3_delete_ip(card, addr);
-                                       kfree(addr);
-                               }
                        } else {
                                hash_del(&addr->hnode);
                                kfree(addr);
@@ -689,7 +701,7 @@ int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto,
 
        spin_lock_bh(&card->ip_lock);
 
-       if (!qeth_l3_ip_from_hash(card, ipaddr))
+       if (qeth_l3_ip_from_hash(card, ipaddr))
                rc = -EEXIST;
        else
                qeth_l3_add_ip(card, ipaddr);
@@ -757,7 +769,7 @@ int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto,
 
        spin_lock_bh(&card->ip_lock);
 
-       if (!qeth_l3_ip_from_hash(card, ipaddr))
+       if (qeth_l3_ip_from_hash(card, ipaddr))
                rc = -EEXIST;
        else
                qeth_l3_add_ip(card, ipaddr);
@@ -3108,7 +3120,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
                                card->dev->vlan_features = NETIF_F_SG |
                                        NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
                                        NETIF_F_TSO;
-                               card->dev->features = NETIF_F_SG;
                        }
                }
        } else if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -3136,7 +3147,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
        netif_keep_dst(card->dev);
        card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
                                  PAGE_SIZE;
-       card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1);
 
        SET_NETDEV_DEV(card->dev, &card->gdev->dev);
        netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT);
@@ -3269,6 +3279,7 @@ contin:
                else
                        dev_open(card->dev);
                qeth_l3_set_multicast_list(card->dev);
+               qeth_recover_features(card->dev);
                rtnl_unlock();
        }
        qeth_trace_features(card);
index 65645b1..0e00a5c 100644 (file)
@@ -297,7 +297,9 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
                addr->u.a6.pfxlen = 0;
                addr->type = QETH_IP_TYPE_NORMAL;
 
+               spin_lock_bh(&card->ip_lock);
                qeth_l3_delete_ip(card, addr);
+               spin_unlock_bh(&card->ip_lock);
                kfree(addr);
        }
 
@@ -329,7 +331,10 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
                addr->type = QETH_IP_TYPE_NORMAL;
        } else
                return -ENOMEM;
+
+       spin_lock_bh(&card->ip_lock);
        qeth_l3_add_ip(card, addr);
+       spin_unlock_bh(&card->ip_lock);
        kfree(addr);
 
        return count;
index a10d4f8..1322469 100644 (file)
@@ -12,6 +12,7 @@ Hopefully this will happen later in 2016.
 
 Other TODOs:
 
+- There are two possible replies to CEC_MSG_INITIATE_ARC. How to handle that?
 - Add a flag to inhibit passing CEC RC messages to the rc subsystem.
   Applications should be able to choose this when calling S_LOG_ADDRS.
 - If the reply field of cec_msg is set then when the reply arrives it
index b2393bb..946986f 100644 (file)
@@ -124,10 +124,10 @@ static void cec_queue_event(struct cec_adapter *adap,
        u64 ts = ktime_get_ns();
        struct cec_fh *fh;
 
-       mutex_lock(&adap->devnode.fhs_lock);
+       mutex_lock(&adap->devnode.lock);
        list_for_each_entry(fh, &adap->devnode.fhs, list)
                cec_queue_event_fh(fh, ev, ts);
-       mutex_unlock(&adap->devnode.fhs_lock);
+       mutex_unlock(&adap->devnode.lock);
 }
 
 /*
@@ -191,12 +191,12 @@ static void cec_queue_msg_monitor(struct cec_adapter *adap,
        u32 monitor_mode = valid_la ? CEC_MODE_MONITOR :
                                      CEC_MODE_MONITOR_ALL;
 
-       mutex_lock(&adap->devnode.fhs_lock);
+       mutex_lock(&adap->devnode.lock);
        list_for_each_entry(fh, &adap->devnode.fhs, list) {
                if (fh->mode_follower >= monitor_mode)
                        cec_queue_msg_fh(fh, msg);
        }
-       mutex_unlock(&adap->devnode.fhs_lock);
+       mutex_unlock(&adap->devnode.lock);
 }
 
 /*
@@ -207,12 +207,12 @@ static void cec_queue_msg_followers(struct cec_adapter *adap,
 {
        struct cec_fh *fh;
 
-       mutex_lock(&adap->devnode.fhs_lock);
+       mutex_lock(&adap->devnode.lock);
        list_for_each_entry(fh, &adap->devnode.fhs, list) {
                if (fh->mode_follower == CEC_MODE_FOLLOWER)
                        cec_queue_msg_fh(fh, msg);
        }
-       mutex_unlock(&adap->devnode.fhs_lock);
+       mutex_unlock(&adap->devnode.lock);
 }
 
 /* Notify userspace of an adapter state change. */
@@ -851,6 +851,9 @@ void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg)
        if (!valid_la || msg->len <= 1)
                return;
 
+       if (adap->log_addrs.log_addr_mask == 0)
+               return;
+
        /*
         * Process the message on the protocol level. If is_reply is true,
         * then cec_receive_notify() won't pass on the reply to the listener(s)
@@ -1047,11 +1050,17 @@ static int cec_config_thread_func(void *arg)
                        dprintk(1, "could not claim LA %d\n", i);
        }
 
+       if (adap->log_addrs.log_addr_mask == 0 &&
+           !(las->flags & CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK))
+               goto unconfigure;
+
 configured:
        if (adap->log_addrs.log_addr_mask == 0) {
                /* Fall back to unregistered */
                las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED;
                las->log_addr_mask = 1 << las->log_addr[0];
+               for (i = 1; i < las->num_log_addrs; i++)
+                       las->log_addr[i] = CEC_LOG_ADDR_INVALID;
        }
        adap->is_configured = true;
        adap->is_configuring = false;
@@ -1070,6 +1079,8 @@ configured:
                        cec_report_features(adap, i);
                cec_report_phys_addr(adap, i);
        }
+       for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++)
+               las->log_addr[i] = CEC_LOG_ADDR_INVALID;
        mutex_lock(&adap->lock);
        adap->kthread_config = NULL;
        mutex_unlock(&adap->lock);
@@ -1398,7 +1409,6 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
        u8 init_laddr = cec_msg_initiator(msg);
        u8 devtype = cec_log_addr2dev(adap, dest_laddr);
        int la_idx = cec_log_addr2idx(adap, dest_laddr);
-       bool is_directed = la_idx >= 0;
        bool from_unregistered = init_laddr == 0xf;
        struct cec_msg tx_cec_msg = { };
 
@@ -1560,7 +1570,7 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg,
                 * Unprocessed messages are aborted if userspace isn't doing
                 * any processing either.
                 */
-               if (is_directed && !is_reply && !adap->follower_cnt &&
+               if (!is_broadcast && !is_reply && !adap->follower_cnt &&
                    !adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT)
                        return cec_feature_abort(adap, msg);
                break;
index 7be7615..e274e2f 100644 (file)
@@ -162,7 +162,7 @@ static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh,
                return -ENOTTY;
        if (copy_from_user(&log_addrs, parg, sizeof(log_addrs)))
                return -EFAULT;
-       log_addrs.flags = 0;
+       log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK;
        mutex_lock(&adap->lock);
        if (!adap->is_configuring &&
            (!log_addrs.num_log_addrs || !adap->is_configured) &&
@@ -435,7 +435,7 @@ static long cec_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        void __user *parg = (void __user *)arg;
 
        if (!devnode->registered)
-               return -EIO;
+               return -ENODEV;
 
        switch (cmd) {
        case CEC_ADAP_G_CAPS:
@@ -508,14 +508,14 @@ static int cec_open(struct inode *inode, struct file *filp)
 
        filp->private_data = fh;
 
-       mutex_lock(&devnode->fhs_lock);
+       mutex_lock(&devnode->lock);
        /* Queue up initial state events */
        ev_state.state_change.phys_addr = adap->phys_addr;
        ev_state.state_change.log_addr_mask = adap->log_addrs.log_addr_mask;
        cec_queue_event_fh(fh, &ev_state, 0);
 
        list_add(&fh->list, &devnode->fhs);
-       mutex_unlock(&devnode->fhs_lock);
+       mutex_unlock(&devnode->lock);
 
        return 0;
 }
@@ -540,9 +540,9 @@ static int cec_release(struct inode *inode, struct file *filp)
                cec_monitor_all_cnt_dec(adap);
        mutex_unlock(&adap->lock);
 
-       mutex_lock(&devnode->fhs_lock);
+       mutex_lock(&devnode->lock);
        list_del(&fh->list);
-       mutex_unlock(&devnode->fhs_lock);
+       mutex_unlock(&devnode->lock);
 
        /* Unhook pending transmits from this filehandle. */
        mutex_lock(&adap->lock);
index 112a5fa..3b1e4d2 100644 (file)
@@ -51,31 +51,29 @@ int cec_get_device(struct cec_devnode *devnode)
 {
        /*
         * Check if the cec device is available. This needs to be done with
-        * the cec_devnode_lock held to prevent an open/unregister race:
+        * the devnode->lock held to prevent an open/unregister race:
         * without the lock, the device could be unregistered and freed between
         * the devnode->registered check and get_device() calls, leading to
         * a crash.
         */
-       mutex_lock(&cec_devnode_lock);
+       mutex_lock(&devnode->lock);
        /*
         * return ENXIO if the cec device has been removed
         * already or if it is not registered anymore.
         */
        if (!devnode->registered) {
-               mutex_unlock(&cec_devnode_lock);
+               mutex_unlock(&devnode->lock);
                return -ENXIO;
        }
        /* and increase the device refcount */
        get_device(&devnode->dev);
-       mutex_unlock(&cec_devnode_lock);
+       mutex_unlock(&devnode->lock);
        return 0;
 }
 
 void cec_put_device(struct cec_devnode *devnode)
 {
-       mutex_lock(&cec_devnode_lock);
        put_device(&devnode->dev);
-       mutex_unlock(&cec_devnode_lock);
 }
 
 /* Called when the last user of the cec device exits. */
@@ -84,11 +82,10 @@ static void cec_devnode_release(struct device *cd)
        struct cec_devnode *devnode = to_cec_devnode(cd);
 
        mutex_lock(&cec_devnode_lock);
-
        /* Mark device node number as free */
        clear_bit(devnode->minor, cec_devnode_nums);
-
        mutex_unlock(&cec_devnode_lock);
+
        cec_delete_adapter(to_cec_adapter(devnode));
 }
 
@@ -117,7 +114,7 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode,
 
        /* Initialization */
        INIT_LIST_HEAD(&devnode->fhs);
-       mutex_init(&devnode->fhs_lock);
+       mutex_init(&devnode->lock);
 
        /* Part 1: Find a free minor number */
        mutex_lock(&cec_devnode_lock);
@@ -160,7 +157,9 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode,
 cdev_del:
        cdev_del(&devnode->cdev);
 clr_bit:
+       mutex_lock(&cec_devnode_lock);
        clear_bit(devnode->minor, cec_devnode_nums);
+       mutex_unlock(&cec_devnode_lock);
        return ret;
 }
 
@@ -177,17 +176,21 @@ static void cec_devnode_unregister(struct cec_devnode *devnode)
 {
        struct cec_fh *fh;
 
+       mutex_lock(&devnode->lock);
+
        /* Check if devnode was never registered or already unregistered */
-       if (!devnode->registered || devnode->unregistered)
+       if (!devnode->registered || devnode->unregistered) {
+               mutex_unlock(&devnode->lock);
                return;
+       }
 
-       mutex_lock(&devnode->fhs_lock);
        list_for_each_entry(fh, &devnode->fhs, list)
                wake_up_interruptible(&fh->wait);
-       mutex_unlock(&devnode->fhs_lock);
 
        devnode->registered = false;
        devnode->unregistered = true;
+       mutex_unlock(&devnode->lock);
+
        device_del(&devnode->dev);
        cdev_del(&devnode->cdev);
        put_device(&devnode->dev);
index 94f8590..ed8bd95 100644 (file)
@@ -114,14 +114,11 @@ static void pulse8_irq_work_handler(struct work_struct *work)
                cec_transmit_done(pulse8->adap, CEC_TX_STATUS_OK,
                                  0, 0, 0, 0);
                break;
-       case MSGCODE_TRANSMIT_FAILED_LINE:
-               cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ARB_LOST,
-                                 1, 0, 0, 0);
-               break;
        case MSGCODE_TRANSMIT_FAILED_ACK:
                cec_transmit_done(pulse8->adap, CEC_TX_STATUS_NACK,
                                  0, 1, 0, 0);
                break;
+       case MSGCODE_TRANSMIT_FAILED_LINE:
        case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA:
        case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
                cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ERROR,
@@ -170,6 +167,9 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data,
                case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
                        schedule_work(&pulse8->work);
                        break;
+               case MSGCODE_HIGH_ERROR:
+               case MSGCODE_LOW_ERROR:
+               case MSGCODE_RECEIVE_FAILED:
                case MSGCODE_TIMEOUT_ERROR:
                        break;
                case MSGCODE_COMMAND_ACCEPTED:
@@ -388,7 +388,7 @@ static int pulse8_cec_adap_transmit(struct cec_adapter *adap, u8 attempts,
        int err;
 
        cmd[0] = MSGCODE_TRANSMIT_IDLETIME;
-       cmd[1] = 3;
+       cmd[1] = signal_free_time;
        err = pulse8_send_and_wait(pulse8, cmd, 2,
                                   MSGCODE_COMMAND_ACCEPTED, 1);
        cmd[0] = MSGCODE_TRANSMIT_ACK_POLARITY;
index 15ce4ab..a2d90ac 100644 (file)
@@ -240,8 +240,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
        memcpy(&endpoint->desc, d, n);
        INIT_LIST_HEAD(&endpoint->urb_list);
 
-       /* Fix up bInterval values outside the legal range. Use 32 ms if no
-        * proper value can be guessed. */
+       /*
+        * Fix up bInterval values outside the legal range.
+        * Use 10 or 8 ms if no proper value can be guessed.
+        */
        i = 0;          /* i = min, j = max, n = default */
        j = 255;
        if (usb_endpoint_xfer_int(d)) {
@@ -250,13 +252,15 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
                case USB_SPEED_SUPER_PLUS:
                case USB_SPEED_SUPER:
                case USB_SPEED_HIGH:
-                       /* Many device manufacturers are using full-speed
+                       /*
+                        * Many device manufacturers are using full-speed
                         * bInterval values in high-speed interrupt endpoint
-                        * descriptors. Try to fix those and fall back to a
-                        * 32 ms default value otherwise. */
+                        * descriptors. Try to fix those and fall back to an
+                        * 8-ms default value otherwise.
+                        */
                        n = fls(d->bInterval*8);
                        if (n == 0)
-                               n = 9;  /* 32 ms = 2^(9-1) uframes */
+                               n = 7;  /* 8 ms = 2^(7-1) uframes */
                        j = 16;
 
                        /*
@@ -271,10 +275,12 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
                        }
                        break;
                default:                /* USB_SPEED_FULL or _LOW */
-                       /* For low-speed, 10 ms is the official minimum.
+                       /*
+                        * For low-speed, 10 ms is the official minimum.
                         * But some "overclocked" devices might want faster
-                        * polling so we'll allow it. */
-                       n = 32;
+                        * polling so we'll allow it.
+                        */
+                       n = 10;
                        break;
                }
        } else if (usb_endpoint_xfer_isoc(d)) {
@@ -282,10 +288,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
                j = 16;
                switch (to_usb_device(ddev)->speed) {
                case USB_SPEED_HIGH:
-                       n = 9;          /* 32 ms = 2^(9-1) uframes */
+                       n = 7;          /* 8 ms = 2^(7-1) uframes */
                        break;
                default:                /* USB_SPEED_FULL */
-                       n = 6;          /* 32 ms = 2^(6-1) frames */
+                       n = 4;          /* 8 ms = 2^(4-1) frames */
                        break;
                }
        }
index 886526b..73cfa13 100644 (file)
@@ -87,7 +87,7 @@ config USB_MUSB_DA8XX
 config USB_MUSB_TUSB6010
        tristate "TUSB6010"
        depends on HAS_IOMEM
-       depends on ARCH_OMAP2PLUS || COMPILE_TEST
+       depends on (ARCH_OMAP2PLUS || COMPILE_TEST) && !BLACKFIN
        depends on NOP_USB_XCEIV = USB_MUSB_HDRC # both built-in or both modules
 
 config USB_MUSB_OMAP2PLUS
index a204782..e98b6e5 100644 (file)
@@ -54,7 +54,8 @@ DEVICE(funsoft, FUNSOFT_IDS);
 /* Infineon Flashloader driver */
 #define FLASHLOADER_IDS()              \
        { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \
-       { USB_DEVICE(0x8087, 0x0716) }
+       { USB_DEVICE(0x8087, 0x0716) }, \
+       { USB_DEVICE(0x8087, 0x0801) }
 DEVICE(flashloader, FLASHLOADER_IDS);
 
 /* Google Serial USB SubClass */
index fb8e45b..4fe81d1 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,7 +239,12 @@ static struct dentry *aio_mount(struct file_system_type *fs_type,
        static const struct dentry_operations ops = {
                .d_dname        = simple_dname,
        };
-       return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
+       struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+                                          AIO_RING_MAGIC);
+
+       if (!IS_ERR(root))
+               root->d_sb->s_iflags |= SB_I_NOEXEC;
+       return root;
 }
 
 /* aio_setup
index b493909..d8e6d42 100644 (file)
@@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry,
        }
        return NULL;
 }
+
 /*
  * Find an eligible tree to time-out
  * A tree is eligible if :-
@@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
        struct dentry *root = sb->s_root;
        struct dentry *dentry;
        struct dentry *expired;
+       struct dentry *found;
        struct autofs_info *ino;
 
        if (!root)
@@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 
        dentry = NULL;
        while ((dentry = get_next_positive_subdir(dentry, root))) {
+               int flags = how;
+
                spin_lock(&sbi->fs_lock);
                ino = autofs4_dentry_ino(dentry);
-               if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
-                       expired = NULL;
-               else
-                       expired = should_expire(dentry, mnt, timeout, how);
-               if (!expired) {
+               if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
                        spin_unlock(&sbi->fs_lock);
                        continue;
                }
+               spin_unlock(&sbi->fs_lock);
+
+               expired = should_expire(dentry, mnt, timeout, flags);
+               if (!expired)
+                       continue;
+
+               spin_lock(&sbi->fs_lock);
                ino = autofs4_dentry_ino(expired);
                ino->flags |= AUTOFS_INF_WANT_EXPIRE;
                spin_unlock(&sbi->fs_lock);
                synchronize_rcu();
-               spin_lock(&sbi->fs_lock);
-               if (should_expire(expired, mnt, timeout, how)) {
-                       if (expired != dentry)
-                               dput(dentry);
-                       goto found;
-               }
 
+               /* Make sure a reference is not taken on found if
+                * things have changed.
+                */
+               flags &= ~AUTOFS_EXP_LEAVES;
+               found = should_expire(expired, mnt, timeout, how);
+               if (!found || found != expired)
+                       /* Something has changed, continue */
+                       goto next;
+
+               if (expired != dentry)
+                       dput(dentry);
+
+               spin_lock(&sbi->fs_lock);
+               goto found;
+next:
+               spin_lock(&sbi->fs_lock);
                ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
+               spin_unlock(&sbi->fs_lock);
                if (expired != dentry)
                        dput(expired);
-               spin_unlock(&sbi->fs_lock);
        }
        return NULL;
 
@@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
        struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
        struct autofs_info *ino = autofs4_dentry_ino(dentry);
        int status;
+       int state;
 
        /* Block on any pending expire */
        if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
@@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
        if (rcu_walk)
                return -ECHILD;
 
+retry:
        spin_lock(&sbi->fs_lock);
-       if (ino->flags & AUTOFS_INF_EXPIRING) {
+       state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
+       if (state == AUTOFS_INF_WANT_EXPIRE) {
+               spin_unlock(&sbi->fs_lock);
+               /*
+                * Possibly being selected for expire, wait until
+                * it's selected or not.
+                */
+               schedule_timeout_uninterruptible(HZ/10);
+               goto retry;
+       }
+       if (state & AUTOFS_INF_EXPIRING) {
                spin_unlock(&sbi->fs_lock);
 
                pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
index 6bbec5e..14ae4b8 100644 (file)
@@ -609,6 +609,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
        char *s, *p;
        char sep;
 
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
+               return dget(sb->s_root);
+
        full_path = cifs_build_path_to_root(vol, cifs_sb,
                                            cifs_sb_master_tcon(cifs_sb));
        if (full_path == NULL)
@@ -686,26 +689,22 @@ cifs_do_mount(struct file_system_type *fs_type,
        cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
        if (cifs_sb->mountdata == NULL) {
                root = ERR_PTR(-ENOMEM);
-               goto out_cifs_sb;
+               goto out_free;
        }
 
-       if (volume_info->prepath) {
-               cifs_sb->prepath = kstrdup(volume_info->prepath, GFP_KERNEL);
-               if (cifs_sb->prepath == NULL) {
-                       root = ERR_PTR(-ENOMEM);
-                       goto out_cifs_sb;
-               }
+       rc = cifs_setup_cifs_sb(volume_info, cifs_sb);
+       if (rc) {
+               root = ERR_PTR(rc);
+               goto out_free;
        }
 
-       cifs_setup_cifs_sb(volume_info, cifs_sb);
-
        rc = cifs_mount(cifs_sb, volume_info);
        if (rc) {
                if (!(flags & MS_SILENT))
                        cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n",
                                 rc);
                root = ERR_PTR(rc);
-               goto out_mountdata;
+               goto out_free;
        }
 
        mnt_data.vol = volume_info;
@@ -735,11 +734,7 @@ cifs_do_mount(struct file_system_type *fs_type,
                sb->s_flags |= MS_ACTIVE;
        }
 
-       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
-               root = dget(sb->s_root);
-       else
-               root = cifs_get_root(volume_info, sb);
-
+       root = cifs_get_root(volume_info, sb);
        if (IS_ERR(root))
                goto out_super;
 
@@ -752,9 +747,9 @@ out:
        cifs_cleanup_volume_info(volume_info);
        return root;
 
-out_mountdata:
+out_free:
+       kfree(cifs_sb->prepath);
        kfree(cifs_sb->mountdata);
-out_cifs_sb:
        kfree(cifs_sb);
 out_nls:
        unload_nls(volume_info->local_nls);
index 1243bd3..95dab43 100644 (file)
@@ -184,7 +184,7 @@ extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
                                 unsigned int to_read);
 extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
                                      struct page *page, unsigned int to_read);
-extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
+extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
                               struct cifs_sb_info *cifs_sb);
 extern int cifs_match_super(struct super_block *, void *);
 extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info);
index 7ae0328..2e4f4ba 100644 (file)
@@ -2781,6 +2781,24 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
        return 1;
 }
 
+static int
+match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data)
+{
+       struct cifs_sb_info *old = CIFS_SB(sb);
+       struct cifs_sb_info *new = mnt_data->cifs_sb;
+
+       if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) {
+               if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH))
+                       return 0;
+               /* The prepath should be null terminated strings */
+               if (strcmp(new->prepath, old->prepath))
+                       return 0;
+
+               return 1;
+       }
+       return 0;
+}
+
 int
 cifs_match_super(struct super_block *sb, void *data)
 {
@@ -2808,7 +2826,8 @@ cifs_match_super(struct super_block *sb, void *data)
 
        if (!match_server(tcp_srv, volume_info) ||
            !match_session(ses, volume_info) ||
-           !match_tcon(tcon, volume_info->UNC)) {
+           !match_tcon(tcon, volume_info->UNC) ||
+           !match_prepath(sb, mnt_data)) {
                rc = 0;
                goto out;
        }
@@ -3222,7 +3241,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
        }
 }
 
-void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
+int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
                        struct cifs_sb_info *cifs_sb)
 {
        INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
@@ -3316,6 +3335,14 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 
        if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm))
                cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n");
+
+       if (pvolume_info->prepath) {
+               cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL);
+               if (cifs_sb->prepath == NULL)
+                       return -ENOMEM;
+       }
+
+       return 0;
 }
 
 static void
index 0f56deb..c415668 100644 (file)
@@ -568,7 +568,7 @@ static int ioctl_fsthaw(struct file *filp)
        return thaw_super(sb);
 }
 
-static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
+static int ioctl_file_dedupe_range(struct file *file, void __user *arg)
 {
        struct file_dedupe_range __user *argp = arg;
        struct file_dedupe_range *same = NULL;
@@ -582,6 +582,10 @@ static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
        }
 
        size = offsetof(struct file_dedupe_range __user, info[count]);
+       if (size > PAGE_SIZE) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
        same = memdup_user(argp, size);
        if (IS_ERR(same)) {
index 7d62097..ca699dd 100644 (file)
@@ -657,7 +657,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
        if (result <= 0)
                goto out;
 
-       written = generic_write_sync(iocb, result);
+       result = generic_write_sync(iocb, result);
+       if (result < 0)
+               goto out;
+       written = result;
        iocb->ki_pos += written;
 
        /* Return error values */
index f5aecaa..a9dec32 100644 (file)
@@ -7570,12 +7570,20 @@ static int _nfs4_proc_create_session(struct nfs_client *clp,
        status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
        trace_nfs4_create_session(clp, status);
 
+       switch (status) {
+       case -NFS4ERR_STALE_CLIENTID:
+       case -NFS4ERR_DELAY:
+       case -ETIMEDOUT:
+       case -EACCES:
+       case -EAGAIN:
+               goto out;
+       };
+
+       clp->cl_seqid++;
        if (!status) {
                /* Verify the session's negotiated channel_attrs values */
                status = nfs4_verify_channel_attrs(&args, &res);
                /* Increment the clientid slot sequence id */
-               if (clp->cl_seqid == res.seqid)
-                       clp->cl_seqid++;
                if (status)
                        goto out;
                nfs4_update_session(session, &res);
@@ -8190,10 +8198,13 @@ static void nfs4_layoutreturn_release(void *calldata)
 
        dprintk("--> %s\n", __func__);
        spin_lock(&lo->plh_inode->i_lock);
-       pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range,
-                       be32_to_cpu(lrp->args.stateid.seqid));
-       if (lrp->res.lrs_present && pnfs_layout_is_valid(lo))
+       if (lrp->res.lrs_present) {
+               pnfs_mark_matching_lsegs_invalid(lo, &freeme,
+                               &lrp->args.range,
+                               be32_to_cpu(lrp->args.stateid.seqid));
                pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+       } else
+               pnfs_mark_layout_stateid_invalid(lo, &freeme);
        pnfs_clear_layoutreturn_waitbit(lo);
        spin_unlock(&lo->plh_inode->i_lock);
        nfs4_sequence_free_slot(&lrp->res.seq_res);
index 6daf034..2c93a85 100644 (file)
@@ -365,7 +365,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
        /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
        atomic_dec(&lo->plh_refcount);
        if (list_empty(&lo->plh_segs)) {
-               set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+               if (atomic_read(&lo->plh_outstanding) == 0)
+                       set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
                clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
        }
        rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
@@ -768,17 +769,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
        pnfs_destroy_layouts_byclid(clp, false);
 }
 
+static void
+pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
+{
+       lo->plh_return_iomode = 0;
+       lo->plh_return_seq = 0;
+       clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+}
+
 /* update lo->plh_stateid with new if is more recent */
 void
 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
                        bool update_barrier)
 {
        u32 oldseq, newseq, new_barrier = 0;
-       bool invalid = !pnfs_layout_is_valid(lo);
 
        oldseq = be32_to_cpu(lo->plh_stateid.seqid);
        newseq = be32_to_cpu(new->seqid);
-       if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) {
+
+       if (!pnfs_layout_is_valid(lo)) {
+               nfs4_stateid_copy(&lo->plh_stateid, new);
+               lo->plh_barrier = newseq;
+               pnfs_clear_layoutreturn_info(lo);
+               clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+               return;
+       }
+       if (pnfs_seqid_is_newer(newseq, oldseq)) {
                nfs4_stateid_copy(&lo->plh_stateid, new);
                /*
                 * Because of wraparound, we want to keep the barrier
@@ -790,7 +806,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
                new_barrier = be32_to_cpu(new->seqid);
        else if (new_barrier == 0)
                return;
-       if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
+       if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
                lo->plh_barrier = new_barrier;
 }
 
@@ -886,19 +902,14 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
        rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
-static void
-pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
-{
-       lo->plh_return_iomode = 0;
-       lo->plh_return_seq = 0;
-       clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
-}
-
 static bool
 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
                nfs4_stateid *stateid,
                enum pnfs_iomode *iomode)
 {
+       /* Serialise LAYOUTGET/LAYOUTRETURN */
+       if (atomic_read(&lo->plh_outstanding) != 0)
+               return false;
        if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
                return false;
        pnfs_get_layout_hdr(lo);
@@ -1798,16 +1809,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                 */
                pnfs_mark_layout_stateid_invalid(lo, &free_me);
 
-               nfs4_stateid_copy(&lo->plh_stateid, &res->stateid);
-               lo->plh_barrier = be32_to_cpu(res->stateid.seqid);
+               pnfs_set_layout_stateid(lo, &res->stateid, true);
        }
 
        pnfs_get_lseg(lseg);
        pnfs_layout_insert_lseg(lo, lseg, &free_me);
-       if (!pnfs_layout_is_valid(lo)) {
-               pnfs_clear_layoutreturn_info(lo);
-               clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-       }
 
 
        if (res->return_on_close)
index d2f97ec..e0e5f7c 100644 (file)
@@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
 
        pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-       wait_event(group->fanotify_data.access_waitq, event->response ||
-                               atomic_read(&group->fanotify_data.bypass_perm));
-
-       if (!event->response) { /* bypass_perm set */
-               /*
-                * Event was canceled because group is being destroyed. Remove
-                * it from group's event list because we are responsible for
-                * freeing the permission event.
-                */
-               fsnotify_remove_event(group, &event->fae.fse);
-               return 0;
-       }
+       wait_event(group->fanotify_data.access_waitq, event->response);
 
        /* userspace responded, convert to something usable */
        switch (event->response) {
index 8e8e6bc..a643138 100644 (file)
@@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
        struct fanotify_perm_event_info *event, *next;
+       struct fsnotify_event *fsn_event;
 
        /*
-        * There may be still new events arriving in the notification queue
-        * but since userspace cannot use fanotify fd anymore, no event can
-        * enter or leave access_list by now.
+        * Stop new events from arriving in the notification queue. since
+        * userspace cannot use fanotify fd anymore, no event can enter or
+        * leave access_list by now either.
         */
-       spin_lock(&group->fanotify_data.access_lock);
-
-       atomic_inc(&group->fanotify_data.bypass_perm);
+       fsnotify_group_stop_queueing(group);
 
+       /*
+        * Process all permission events on access_list and notification queue
+        * and simulate reply from userspace.
+        */
+       spin_lock(&group->fanotify_data.access_lock);
        list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
                                 fae.fse.list) {
                pr_debug("%s: found group=%p event=%p\n", __func__, group,
@@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file)
        spin_unlock(&group->fanotify_data.access_lock);
 
        /*
-        * Since bypass_perm is set, newly queued events will not wait for
-        * access response. Wake up the already sleeping ones now.
-        * synchronize_srcu() in fsnotify_destroy_group() will wait for all
-        * processes sleeping in fanotify_handle_event() waiting for access
-        * response and thus also for all permission events to be freed.
+        * Destroy all non-permission events. For permission events just
+        * dequeue them and set the response. They will be freed once the
+        * response is consumed and fanotify_get_response() returns.
         */
+       mutex_lock(&group->notification_mutex);
+       while (!fsnotify_notify_queue_is_empty(group)) {
+               fsn_event = fsnotify_remove_first_event(group);
+               if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
+                       fsnotify_destroy_event(group, fsn_event);
+               else
+                       FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
+       }
+       mutex_unlock(&group->notification_mutex);
+
+       /* Response for all permission events it set, wakeup waiters */
        wake_up(&group->fanotify_data.access_waitq);
 #endif
 
@@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
        spin_lock_init(&group->fanotify_data.access_lock);
        init_waitqueue_head(&group->fanotify_data.access_waitq);
        INIT_LIST_HEAD(&group->fanotify_data.access_list);
-       atomic_set(&group->fanotify_data.bypass_perm, 0);
 #endif
        switch (flags & FAN_ALL_CLASS_BITS) {
        case FAN_CLASS_NOTIF:
index 3e2dd85..b47f7cf 100644 (file)
@@ -39,6 +39,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
        kfree(group);
 }
 
+/*
+ * Stop queueing new events for this group. Once this function returns
+ * fsnotify_add_event() will not add any new events to the group's queue.
+ */
+void fsnotify_group_stop_queueing(struct fsnotify_group *group)
+{
+       mutex_lock(&group->notification_mutex);
+       group->shutdown = true;
+       mutex_unlock(&group->notification_mutex);
+}
+
 /*
  * Trying to get rid of a group. Remove all marks, flush all events and release
  * the group reference.
@@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
  */
 void fsnotify_destroy_group(struct fsnotify_group *group)
 {
+       /*
+        * Stop queueing new events. The code below is careful enough to not
+        * require this but fanotify needs to stop queuing events even before
+        * fsnotify_destroy_group() is called and this makes the other callers
+        * of fsnotify_destroy_group() to see the same behavior.
+        */
+       fsnotify_group_stop_queueing(group);
+
        /* clear all inode marks for this group, attach them to destroy_list */
        fsnotify_detach_group_marks(group);
 
index a95d8e0..e455e83 100644 (file)
@@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
  * Add an event to the group notification queue.  The group can later pull this
  * event off the queue to deal with.  The function returns 0 if the event was
  * added to the queue, 1 if the event was merged with some other queued event,
- * 2 if the queue of events has overflown.
+ * 2 if the event was not queued - either the queue of events has overflown
+ * or the group is shutting down.
  */
 int fsnotify_add_event(struct fsnotify_group *group,
                       struct fsnotify_event *event,
@@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group,
 
        mutex_lock(&group->notification_mutex);
 
+       if (group->shutdown) {
+               mutex_unlock(&group->notification_mutex);
+               return 2;
+       }
+
        if (group->q_len >= group->max_events) {
                ret = 2;
                /* Queue overflow event only if it isn't already queued */
@@ -125,21 +131,6 @@ queue:
        return ret;
 }
 
-/*
- * Remove @event from group's notification queue. It is the responsibility of
- * the caller to destroy the event.
- */
-void fsnotify_remove_event(struct fsnotify_group *group,
-                          struct fsnotify_event *event)
-{
-       mutex_lock(&group->notification_mutex);
-       if (!list_empty(&event->list)) {
-               list_del_init(&event->list);
-               group->q_len--;
-       }
-       mutex_unlock(&group->notification_mutex);
-}
-
 /*
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
index 7dabbc3..f165f86 100644 (file)
@@ -5922,7 +5922,6 @@ bail:
 }
 
 static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
-                                        handle_t *handle,
                                         struct inode *data_alloc_inode,
                                         struct buffer_head *data_alloc_bh)
 {
@@ -5935,11 +5934,19 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
        struct ocfs2_truncate_log *tl;
        struct inode *tl_inode = osb->osb_tl_inode;
        struct buffer_head *tl_bh = osb->osb_tl_bh;
+       handle_t *handle;
 
        di = (struct ocfs2_dinode *) tl_bh->b_data;
        tl = &di->id2.i_dealloc;
        i = le16_to_cpu(tl->tl_used) - 1;
        while (i >= 0) {
+               handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
+               if (IS_ERR(handle)) {
+                       status = PTR_ERR(handle);
+                       mlog_errno(status);
+                       goto bail;
+               }
+
                /* Caller has given us at least enough credits to
                 * update the truncate log dinode */
                status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
@@ -5974,12 +5981,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
                        }
                }
 
-               status = ocfs2_extend_trans(handle,
-                               OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
-               if (status < 0) {
-                       mlog_errno(status);
-                       goto bail;
-               }
+               ocfs2_commit_trans(osb, handle);
                i--;
        }
 
@@ -5994,7 +5996,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
 {
        int status;
        unsigned int num_to_flush;
-       handle_t *handle;
        struct inode *tl_inode = osb->osb_tl_inode;
        struct inode *data_alloc_inode = NULL;
        struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -6038,21 +6039,11 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
                goto out_mutex;
        }
 
-       handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
-       if (IS_ERR(handle)) {
-               status = PTR_ERR(handle);
-               mlog_errno(status);
-               goto out_unlock;
-       }
-
-       status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
+       status = ocfs2_replay_truncate_records(osb, data_alloc_inode,
                                               data_alloc_bh);
        if (status < 0)
                mlog_errno(status);
 
-       ocfs2_commit_trans(osb, handle);
-
-out_unlock:
        brelse(data_alloc_bh);
        ocfs2_inode_unlock(data_alloc_inode, 1);
 
@@ -6413,43 +6404,34 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
                goto out_mutex;
        }
 
-       handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
-       if (IS_ERR(handle)) {
-               ret = PTR_ERR(handle);
-               mlog_errno(ret);
-               goto out_unlock;
-       }
-
        while (head) {
                if (head->free_bg)
                        bg_blkno = head->free_bg;
                else
                        bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
                                                              head->free_bit);
+               handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
+               if (IS_ERR(handle)) {
+                       ret = PTR_ERR(handle);
+                       mlog_errno(ret);
+                       goto out_unlock;
+               }
+
                trace_ocfs2_free_cached_blocks(
                     (unsigned long long)head->free_blk, head->free_bit);
 
                ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
                                               head->free_bit, bg_blkno, 1);
-               if (ret) {
+               if (ret)
                        mlog_errno(ret);
-                       goto out_journal;
-               }
 
-               ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
-               if (ret) {
-                       mlog_errno(ret);
-                       goto out_journal;
-               }
+               ocfs2_commit_trans(osb, handle);
 
                tmp = head;
                head = head->free_next;
                kfree(tmp);
        }
 
-out_journal:
-       ocfs2_commit_trans(osb, handle);
-
 out_unlock:
        ocfs2_inode_unlock(inode, 1);
        brelse(di_bh);
index 94b1836..b95e7df 100644 (file)
@@ -44,9 +44,6 @@
  * version here in tcp_internal.h should not need to be bumped for
  * filesystem locking changes.
  *
- * New in version 12
- *     - Negotiate hb timeout when storage is down.
- *
  * New in version 11
  *     - Negotiation of filesystem locking in the dlm join.
  *
@@ -78,7 +75,7 @@
  *     - full 64 bit i_size in the metadata lock lvbs
  *     - introduction of "rw" lock and pushing meta/data locking down
  */
-#define O2NET_PROTOCOL_VERSION 12ULL
+#define O2NET_PROTOCOL_VERSION 11ULL
 struct o2net_handshake {
        __be64  protocol_version;
        __be64  connector_id;
index cdeafb4..0bb1286 100644 (file)
@@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
                                  struct dlm_lock *lock, int flags, int type)
 {
        enum dlm_status status;
-       u8 old_owner = res->owner;
 
        mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
             lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 
        spin_lock(&res->spinlock);
        res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
-       lock->convert_pending = 0;
        /* if it failed, move it back to granted queue.
         * if master returns DLM_NORMAL and then down before sending ast,
         * it may have already been moved to granted queue, reset to
@@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
                if (status != DLM_NOTQUEUED)
                        dlm_error(status);
                dlm_revert_pending_convert(res, lock);
-       } else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
-                       (old_owner != res->owner)) {
-               mlog(0, "res %.*s is in recovering or has been recovered.\n",
-                               res->lockname.len, res->lockname.name);
+       } else if (!lock->convert_pending) {
+               mlog(0, "%s: res %.*s, owner died and lock has been moved back "
+                               "to granted list, retry convert.\n",
+                               dlm->name, res->lockname.len, res->lockname.name);
                status = DLM_RECOVERING;
        }
+
+       lock->convert_pending = 0;
 bail:
        spin_unlock(&res->spinlock);
 
index 4e7b0dc..0b055bf 100644 (file)
@@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
                                       u64 start, u64 len)
 {
        int ret = 0;
-       u64 tmpend, end = start + len;
+       u64 tmpend = 0;
+       u64 end = start + len;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        unsigned int csize = osb->s_clustersize;
        handle_t *handle;
@@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
        }
 
        /*
-        * We want to get the byte offset of the end of the 1st cluster.
+        * If start is on a cluster boundary and end is somewhere in another
+        * cluster, we have not COWed the cluster starting at start, unless
+        * end is also within the same cluster. So, in this case, we skip this
+        * first call to ocfs2_zero_range_for_truncate() truncate and move on
+        * to the next one.
         */
-       tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
-       if (tmpend > end)
-               tmpend = end;
+       if ((start & (csize - 1)) != 0) {
+               /*
+                * We want to get the byte offset of the end of the 1st
+                * cluster.
+                */
+               tmpend = (u64)osb->s_clustersize +
+                       (start & ~(osb->s_clustersize - 1));
+               if (tmpend > end)
+                       tmpend = end;
 
-       trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
-                                                (unsigned long long)tmpend);
+               trace_ocfs2_zero_partial_clusters_range1(
+                       (unsigned long long)start,
+                       (unsigned long long)tmpend);
 
-       ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
-       if (ret)
-               mlog_errno(ret);
+               ret = ocfs2_zero_range_for_truncate(inode, handle, start,
+                                                   tmpend);
+               if (ret)
+                       mlog_errno(ret);
+       }
 
        if (tmpend < end) {
                /*
index ea47120..6ad3533 100644 (file)
@@ -1199,14 +1199,24 @@ retry:
                        inode_unlock((*ac)->ac_inode);
 
                        ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
-                       if (ret == 1)
+                       if (ret == 1) {
+                               iput((*ac)->ac_inode);
+                               (*ac)->ac_inode = NULL;
                                goto retry;
+                       }
 
                        if (ret < 0)
                                mlog_errno(ret);
 
                        inode_lock((*ac)->ac_inode);
-                       ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+                       ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+                       if (ret < 0) {
+                               mlog_errno(ret);
+                               inode_unlock((*ac)->ac_inode);
+                               iput((*ac)->ac_inode);
+                               (*ac)->ac_inode = NULL;
+                               goto bail;
+                       }
                }
                if (status < 0) {
                        if (status != -ENOSPC)
index a939f5e..5c89a07 100644 (file)
@@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 static ssize_t
 read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 {
+       char *buf = file->private_data;
        ssize_t acc = 0;
        size_t size, tsz;
        size_t elf_buflen;
@@ -500,23 +501,20 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
                        if (clear_user(buffer, tsz))
                                return -EFAULT;
                } else if (is_vmalloc_or_module_addr((void *)start)) {
-                       char * elf_buf;
-
-                       elf_buf = kzalloc(tsz, GFP_KERNEL);
-                       if (!elf_buf)
-                               return -ENOMEM;
-                       vread(elf_buf, (char *)start, tsz);
+                       vread(buf, (char *)start, tsz);
                        /* we have to zero-fill user buffer even if no read */
-                       if (copy_to_user(buffer, elf_buf, tsz)) {
-                               kfree(elf_buf);
+                       if (copy_to_user(buffer, buf, tsz))
                                return -EFAULT;
-                       }
-                       kfree(elf_buf);
                } else {
                        if (kern_addr_valid(start)) {
                                unsigned long n;
 
-                               n = copy_to_user(buffer, (char *)start, tsz);
+                               /*
+                                * Using bounce buffer to bypass the
+                                * hardened user copy kernel text checks.
+                                */
+                               memcpy(buf, (char *) start, tsz);
+                               n = copy_to_user(buffer, buf, tsz);
                                /*
                                 * We cannot distinguish between fault on source
                                 * and fault on destination. When this happens
@@ -549,6 +547,11 @@ static int open_kcore(struct inode *inode, struct file *filp)
 {
        if (!capable(CAP_SYS_RAWIO))
                return -EPERM;
+
+       filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!filp->private_data)
+               return -ENOMEM;
+
        if (kcore_need_update)
                kcore_update_ram();
        if (i_size_read(inode) != proc_root_kcore->size) {
@@ -559,10 +562,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
        return 0;
 }
 
+static int release_kcore(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+       return 0;
+}
 
 static const struct file_operations proc_kcore_operations = {
        .read           = read_kcore,
        .open           = open_kcore,
+       .release        = release_kcore,
        .llseek         = default_llseek,
 };
 
index 183a212..12af049 100644 (file)
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/ramfs.h>
+#include <linux/sched.h>
 
 #include "internal.h"
 
+static unsigned long ramfs_mmu_get_unmapped_area(struct file *file,
+               unsigned long addr, unsigned long len, unsigned long pgoff,
+               unsigned long flags)
+{
+       return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
 const struct file_operations ramfs_file_operations = {
        .read_iter      = generic_file_read_iter,
        .write_iter     = generic_file_write_iter,
@@ -38,6 +46,7 @@ const struct file_operations ramfs_file_operations = {
        .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .llseek         = generic_file_llseek,
+       .get_unmapped_area      = ramfs_mmu_get_unmapped_area,
 };
 
 const struct inode_operations ramfs_file_inode_operations = {
index 5dea1fb..6df9b07 100644 (file)
@@ -231,14 +231,18 @@ extern int __put_user_bad(void) __attribute__((noreturn));
        might_fault();                                          \
        access_ok(VERIFY_READ, __p, sizeof(*ptr)) ?             \
                __get_user((x), (__typeof__(*(ptr)) *)__p) :    \
-               -EFAULT;                                        \
+               ((x) = (__typeof__(*(ptr)))0,-EFAULT);          \
 })
 
 #ifndef __get_user_fn
 static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
 {
-       size = __copy_from_user(x, ptr, size);
-       return size ? -EFAULT : size;
+       size_t n = __copy_from_user(x, ptr, size);
+       if (unlikely(n)) {
+               memset(x + (size - n), 0, n);
+               return -EFAULT;
+       }
+       return 0;
 }
 
 #define __get_user_fn(sz, u, k)        __get_user_fn(sz, u, k)
@@ -258,11 +262,13 @@ extern int __get_user_bad(void) __attribute__((noreturn));
 static inline long copy_from_user(void *to,
                const void __user * from, unsigned long n)
 {
+       unsigned long res = n;
        might_fault();
-       if (access_ok(VERIFY_READ, from, n))
-               return __copy_from_user(to, from, n);
-       else
-               return n;
+       if (likely(access_ok(VERIFY_READ, from, n)))
+               res = __copy_from_user(to, from, n);
+       if (unlikely(res))
+               memset(to + (n - res), 0, res);
+       return res;
 }
 
 static inline long copy_to_user(void __user *to,
index 82c3d3b..138bbf7 100644 (file)
@@ -162,10 +162,11 @@ static inline void cec_msg_standby(struct cec_msg *msg)
 
 
 /* One Touch Record Feature */
-static inline void cec_msg_record_off(struct cec_msg *msg)
+static inline void cec_msg_record_off(struct cec_msg *msg, bool reply)
 {
        msg->len = 2;
        msg->msg[1] = CEC_MSG_RECORD_OFF;
+       msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
 }
 
 struct cec_op_arib_data {
@@ -227,7 +228,7 @@ static inline void cec_set_digital_service_id(__u8 *msg,
        if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) {
                *msg++ = (digital->channel.channel_number_fmt << 2) |
                         (digital->channel.major >> 8);
-               *msg++ = digital->channel.major && 0xff;
+               *msg++ = digital->channel.major & 0xff;
                *msg++ = digital->channel.minor >> 8;
                *msg++ = digital->channel.minor & 0xff;
                *msg++ = 0;
@@ -323,6 +324,7 @@ static inline void cec_msg_record_on_phys_addr(struct cec_msg *msg,
 }
 
 static inline void cec_msg_record_on(struct cec_msg *msg,
+                                    bool reply,
                                     const struct cec_op_record_src *rec_src)
 {
        switch (rec_src->type) {
@@ -346,6 +348,7 @@ static inline void cec_msg_record_on(struct cec_msg *msg,
                                            rec_src->ext_phys_addr.phys_addr);
                break;
        }
+       msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
 }
 
 static inline void cec_ops_record_on(const struct cec_msg *msg,
@@ -1141,6 +1144,75 @@ static inline void cec_msg_give_device_vendor_id(struct cec_msg *msg,
        msg->reply = reply ? CEC_MSG_DEVICE_VENDOR_ID : 0;
 }
 
+static inline void cec_msg_vendor_command(struct cec_msg *msg,
+                                         __u8 size, const __u8 *vendor_cmd)
+{
+       if (size > 14)
+               size = 14;
+       msg->len = 2 + size;
+       msg->msg[1] = CEC_MSG_VENDOR_COMMAND;
+       memcpy(msg->msg + 2, vendor_cmd, size);
+}
+
+static inline void cec_ops_vendor_command(const struct cec_msg *msg,
+                                         __u8 *size,
+                                         const __u8 **vendor_cmd)
+{
+       *size = msg->len - 2;
+
+       if (*size > 14)
+               *size = 14;
+       *vendor_cmd = msg->msg + 2;
+}
+
+static inline void cec_msg_vendor_command_with_id(struct cec_msg *msg,
+                                                 __u32 vendor_id, __u8 size,
+                                                 const __u8 *vendor_cmd)
+{
+       if (size > 11)
+               size = 11;
+       msg->len = 5 + size;
+       msg->msg[1] = CEC_MSG_VENDOR_COMMAND_WITH_ID;
+       msg->msg[2] = vendor_id >> 16;
+       msg->msg[3] = (vendor_id >> 8) & 0xff;
+       msg->msg[4] = vendor_id & 0xff;
+       memcpy(msg->msg + 5, vendor_cmd, size);
+}
+
+static inline void cec_ops_vendor_command_with_id(const struct cec_msg *msg,
+                                                 __u32 *vendor_id,  __u8 *size,
+                                                 const __u8 **vendor_cmd)
+{
+       *size = msg->len - 5;
+
+       if (*size > 11)
+               *size = 11;
+       *vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4];
+       *vendor_cmd = msg->msg + 5;
+}
+
+static inline void cec_msg_vendor_remote_button_down(struct cec_msg *msg,
+                                                    __u8 size,
+                                                    const __u8 *rc_code)
+{
+       if (size > 14)
+               size = 14;
+       msg->len = 2 + size;
+       msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN;
+       memcpy(msg->msg + 2, rc_code, size);
+}
+
+static inline void cec_ops_vendor_remote_button_down(const struct cec_msg *msg,
+                                                    __u8 *size,
+                                                    const __u8 **rc_code)
+{
+       *size = msg->len - 2;
+
+       if (*size > 14)
+               *size = 14;
+       *rc_code = msg->msg + 2;
+}
+
 static inline void cec_msg_vendor_remote_button_up(struct cec_msg *msg)
 {
        msg->len = 2;
@@ -1277,7 +1349,7 @@ static inline void cec_msg_user_control_pressed(struct cec_msg *msg,
                msg->len += 4;
                msg->msg[3] = (ui_cmd->channel_identifier.channel_number_fmt << 2) |
                              (ui_cmd->channel_identifier.major >> 8);
-               msg->msg[4] = ui_cmd->channel_identifier.major && 0xff;
+               msg->msg[4] = ui_cmd->channel_identifier.major & 0xff;
                msg->msg[5] = ui_cmd->channel_identifier.minor >> 8;
                msg->msg[6] = ui_cmd->channel_identifier.minor & 0xff;
                break;
index b3e2289..851968e 100644 (file)
@@ -364,7 +364,7 @@ struct cec_caps {
  * @num_log_addrs: how many logical addresses should be claimed. Set by the
  *     caller.
  * @vendor_id: the vendor ID of the device. Set by the caller.
- * @flags: set to 0.
+ * @flags: flags.
  * @osd_name: the OSD name of the device. Set by the caller.
  * @primary_device_type: the primary device type for each logical address.
  *     Set by the caller.
@@ -389,6 +389,9 @@ struct cec_log_addrs {
        __u8 features[CEC_MAX_LOG_ADDRS][12];
 };
 
+/* Allow a fallback to unregistered */
+#define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK  (1 << 0)
+
 /* Events */
 
 /* Event that occurs when the adapter state changes */
index 436aa4e..6685698 100644 (file)
@@ -527,13 +527,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
  * object's lifetime is managed by something other than RCU.  That
  * "something other" might be reference counting or simple immortality.
  *
- * The seemingly unused size_t variable is to validate @p is indeed a pointer
- * type by making sure it can be dereferenced.
+ * The seemingly unused variable ___typecheck_p validates that @p is
+ * indeed a pointer type by using a pointer to typeof(*p) as the type.
+ * Taking a pointer to typeof(*p) again is needed in case p is void *.
  */
 #define lockless_dereference(p) \
 ({ \
        typeof(p) _________p1 = READ_ONCE(p); \
-       size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \
+       typeof(*(p)) *___typecheck_p __maybe_unused; \
        smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
        (_________p1); \
 })
index 242bf53..34bd805 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __CPUHOTPLUG_H
 #define __CPUHOTPLUG_H
 
+#include <linux/types.h>
+
 enum cpuhp_state {
        CPUHP_OFFLINE,
        CPUHP_CREATE_THREADS,
index 7f5a582..0148a30 100644 (file)
@@ -118,6 +118,15 @@ typedef struct {
        u32 imagesize;
 } efi_capsule_header_t;
 
+struct efi_boot_memmap {
+       efi_memory_desc_t       **map;
+       unsigned long           *map_size;
+       unsigned long           *desc_size;
+       u32                     *desc_ver;
+       unsigned long           *key_ptr;
+       unsigned long           *buff_size;
+};
+
 /*
  * EFI capsule flags
  */
@@ -946,7 +955,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm,
 /* Iterate through an efi_memory_map */
 #define for_each_efi_memory_desc_in_map(m, md)                            \
        for ((md) = (m)->map;                                              \
-            ((void *)(md) + (m)->desc_size) <= (m)->map_end;              \
+            (md) && ((void *)(md) + (m)->desc_size) <= (m)->map_end;      \
             (md) = (void *)(md) + (m)->desc_size)
 
 /**
@@ -1371,11 +1380,7 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
                          efi_loaded_image_t *image, int *cmd_line_len);
 
 efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
-                               efi_memory_desc_t **map,
-                               unsigned long *map_size,
-                               unsigned long *desc_size,
-                               u32 *desc_ver,
-                               unsigned long *key_ptr);
+                               struct efi_boot_memmap *map);
 
 efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
                           unsigned long size, unsigned long align,
@@ -1457,4 +1462,14 @@ extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
        arch_efi_call_virt_teardown();                                  \
 })
 
+typedef efi_status_t (*efi_exit_boot_map_processing)(
+       efi_system_table_t *sys_table_arg,
+       struct efi_boot_memmap *map,
+       void *priv);
+
+efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table,
+                                   void *handle,
+                                   struct efi_boot_memmap *map,
+                                   void *priv,
+                                   efi_exit_boot_map_processing priv_func);
 #endif /* _LINUX_EFI_H */
index 58205f3..7268ed0 100644 (file)
@@ -148,6 +148,7 @@ struct fsnotify_group {
        #define FS_PRIO_1       1 /* fanotify content based access control */
        #define FS_PRIO_2       2 /* fanotify pre-content access */
        unsigned int priority;
+       bool shutdown;          /* group is being shut down, don't queue more events */
 
        /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
        struct mutex mark_mutex;        /* protect marks_list */
@@ -179,7 +180,6 @@ struct fsnotify_group {
                        spinlock_t access_lock;
                        struct list_head access_list;
                        wait_queue_head_t access_waitq;
-                       atomic_t bypass_perm;
 #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
                        int f_flags;
                        unsigned int max_marks;
@@ -292,6 +292,8 @@ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *op
 extern void fsnotify_get_group(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
+/* group destruction begins, stop queuing new events */
+extern void fsnotify_group_stop_queueing(struct fsnotify_group *group);
 /* destroy group */
 extern void fsnotify_destroy_group(struct fsnotify_group *group);
 /* fasync handler function */
@@ -304,8 +306,6 @@ extern int fsnotify_add_event(struct fsnotify_group *group,
                              struct fsnotify_event *event,
                              int (*merge)(struct list_head *,
                                           struct fsnotify_event *));
-/* Remove passed event from groups notification queue */
-extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
index f923d15..0b17c58 100644 (file)
@@ -25,5 +25,6 @@ struct ifla_vf_info {
        __u32 max_tx_rate;
        __u32 rss_query_en;
        __u32 trusted;
+       __be16 vlan_proto;
 };
 #endif /* _LINUX_IF_LINK_H */
index b52424e..0ac26c8 100644 (file)
@@ -945,6 +945,16 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
 static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
 #endif
 
+/*
+ * The irqsave variants are for usage in non interrupt code. Do not use
+ * them in irq_chip callbacks. Use irq_gc_lock() instead.
+ */
+#define irq_gc_lock_irqsave(gc, flags) \
+       raw_spin_lock_irqsave(&(gc)->lock, flags)
+
+#define irq_gc_unlock_irqrestore(gc, flags)    \
+       raw_spin_unlock_irqrestore(&(gc)->lock, flags)
+
 static inline void irq_reg_writel(struct irq_chip_generic *gc,
                                  u32 val, int reg_offset)
 {
index 2b6a204..aa118ba 100644 (file)
@@ -231,6 +231,11 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec)
        return ktime_sub_ns(kt, usec * NSEC_PER_USEC);
 }
 
+static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec)
+{
+       return ktime_sub_ns(kt, msec * NSEC_PER_MSEC);
+}
+
 extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);
 
 /**
index 116b284..1f35686 100644 (file)
@@ -309,7 +309,8 @@ int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx,
                      struct ifla_vf_stats *vf_stats);
 u32 mlx4_comm_get_version(void);
 int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac);
-int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan,
+                    u8 qos, __be16 proto);
 int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate,
                     int max_tx_rate);
 int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting);
index 42da355..59b50d3 100644 (file)
@@ -221,6 +221,7 @@ enum {
        MLX4_DEV_CAP_FLAG2_ROCE_V1_V2           = 1ULL <<  33,
        MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER   = 1ULL <<  34,
        MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT        = 1ULL <<  35,
+       MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP          = 1ULL <<  36,
 };
 
 enum {
@@ -1371,6 +1372,8 @@ int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port,
 int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable);
 int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val);
 int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv);
+int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
+                                     bool *vlan_offload_disabled);
 int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
index deaa221..b4ee8f6 100644 (file)
@@ -160,6 +160,7 @@ struct mlx4_qp_path {
 
 enum { /* fl */
        MLX4_FL_CV      = 1 << 6,
+       MLX4_FL_SV      = 1 << 5,
        MLX4_FL_ETH_HIDE_CQE_VLAN       = 1 << 2,
        MLX4_FL_ETH_SRC_CHECK_MC_LB     = 1 << 1,
        MLX4_FL_ETH_SRC_CHECK_UC_LB     = 1 << 0,
@@ -267,6 +268,7 @@ enum {
        MLX4_UPD_QP_PATH_MASK_FVL_RX                    = 16 + 32,
        MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB       = 18 + 32,
        MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB       = 19 + 32,
+       MLX4_UPD_QP_PATH_MASK_SV                        = 22 + 32,
 };
 
 enum { /* param3 */
index 69f242c..136ae6b 100644 (file)
@@ -946,7 +946,8 @@ struct netdev_xdp {
  *
  *     SR-IOV management functions.
  * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
- * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
+ * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan,
+ *                       u8 qos, __be16 proto);
  * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
  *                       int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
@@ -1187,7 +1188,8 @@ struct net_device_ops {
        int                     (*ndo_set_vf_mac)(struct net_device *dev,
                                                  int queue, u8 *mac);
        int                     (*ndo_set_vf_vlan)(struct net_device *dev,
-                                                  int queue, u16 vlan, u8 qos);
+                                                  int queue, u16 vlan,
+                                                  u8 qos, __be16 proto);
        int                     (*ndo_set_vf_rate)(struct net_device *dev,
                                                   int vf, int min_tx_rate,
                                                   int max_tx_rate);
@@ -1797,7 +1799,7 @@ struct net_device {
 #endif
        struct netdev_queue __rcu *ingress_queue;
 #ifdef CONFIG_NETFILTER_INGRESS
-       struct list_head        nf_hooks_ingress;
+       struct nf_hook_entry __rcu *nf_hooks_ingress;
 #endif
 
        unsigned char           broadcast[MAX_ADDR_LEN];
index e82b767..abc7fdc 100644 (file)
@@ -55,12 +55,34 @@ struct nf_hook_state {
        struct net_device *out;
        struct sock *sk;
        struct net *net;
-       struct list_head *hook_list;
+       struct nf_hook_entry __rcu *hook_entries;
        int (*okfn)(struct net *, struct sock *, struct sk_buff *);
 };
 
+typedef unsigned int nf_hookfn(void *priv,
+                              struct sk_buff *skb,
+                              const struct nf_hook_state *state);
+struct nf_hook_ops {
+       struct list_head        list;
+
+       /* User fills in from here down. */
+       nf_hookfn               *hook;
+       struct net_device       *dev;
+       void                    *priv;
+       u_int8_t                pf;
+       unsigned int            hooknum;
+       /* Hooks are ordered in ascending priority. */
+       int                     priority;
+};
+
+struct nf_hook_entry {
+       struct nf_hook_entry __rcu      *next;
+       struct nf_hook_ops              ops;
+       const struct nf_hook_ops        *orig_ops;
+};
+
 static inline void nf_hook_state_init(struct nf_hook_state *p,
-                                     struct list_head *hook_list,
+                                     struct nf_hook_entry *hook_entry,
                                      unsigned int hook,
                                      int thresh, u_int8_t pf,
                                      struct net_device *indev,
@@ -76,26 +98,11 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
        p->out = outdev;
        p->sk = sk;
        p->net = net;
-       p->hook_list = hook_list;
+       RCU_INIT_POINTER(p->hook_entries, hook_entry);
        p->okfn = okfn;
 }
 
-typedef unsigned int nf_hookfn(void *priv,
-                              struct sk_buff *skb,
-                              const struct nf_hook_state *state);
-
-struct nf_hook_ops {
-       struct list_head        list;
 
-       /* User fills in from here down. */
-       nf_hookfn               *hook;
-       struct net_device       *dev;
-       void                    *priv;
-       u_int8_t                pf;
-       unsigned int            hooknum;
-       /* Hooks are ordered in ascending priority. */
-       int                     priority;
-};
 
 struct nf_sockopt_ops {
        struct list_head list;
@@ -163,7 +170,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
                                 int (*okfn)(struct net *, struct sock *, struct sk_buff *),
                                 int thresh)
 {
-       struct list_head *hook_list;
+       struct nf_hook_entry *hook_head;
+       int ret = 1;
 
 #ifdef HAVE_JUMP_LABEL
        if (__builtin_constant_p(pf) &&
@@ -172,16 +180,19 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
                return 1;
 #endif
 
-       hook_list = &net->nf.hooks[pf][hook];
-
-       if (!list_empty(hook_list)) {
+       rcu_read_lock();
+       hook_head = rcu_dereference(net->nf.hooks[pf][hook]);
+       if (hook_head) {
                struct nf_hook_state state;
 
-               nf_hook_state_init(&state, hook_list, hook, thresh,
+               nf_hook_state_init(&state, hook_head, hook, thresh,
                                   pf, indev, outdev, sk, net, okfn);
-               return nf_hook_slow(skb, &state);
+
+               ret = nf_hook_slow(skb, &state);
        }
-       return 1;
+       rcu_read_unlock();
+
+       return ret;
 }
 
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
index 2755057..1d1ef4e 100644 (file)
@@ -4,13 +4,9 @@
 #include <uapi/linux/netfilter/nf_conntrack_common.h>
 
 struct ip_conntrack_stat {
-       unsigned int searched;
        unsigned int found;
-       unsigned int new;
        unsigned int invalid;
        unsigned int ignore;
-       unsigned int delete;
-       unsigned int delete_list;
        unsigned int insert;
        unsigned int insert_failed;
        unsigned int drop;
index df78dc2..dee0acd 100644 (file)
@@ -1,68 +1,8 @@
 #ifndef _CONNTRACK_PROTO_GRE_H
 #define _CONNTRACK_PROTO_GRE_H
 #include <asm/byteorder.h>
-
-/* GRE PROTOCOL HEADER */
-
-/* GRE Version field */
-#define GRE_VERSION_1701       0x0
-#define GRE_VERSION_PPTP       0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP      0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C             0x80
-#define GRE_FLAG_R             0x40
-#define GRE_FLAG_K             0x20
-#define GRE_FLAG_S             0x10
-#define GRE_FLAG_A             0x80
-
-#define GRE_IS_C(f)    ((f)&GRE_FLAG_C)
-#define GRE_IS_R(f)    ((f)&GRE_FLAG_R)
-#define GRE_IS_K(f)    ((f)&GRE_FLAG_K)
-#define GRE_IS_S(f)    ((f)&GRE_FLAG_S)
-#define GRE_IS_A(f)    ((f)&GRE_FLAG_A)
-
-/* GRE is a mess: Four different standards */
-struct gre_hdr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-       __u16   rec:3,
-               srr:1,
-               seq:1,
-               key:1,
-               routing:1,
-               csum:1,
-               version:3,
-               reserved:4,
-               ack:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-       __u16   csum:1,
-               routing:1,
-               key:1,
-               seq:1,
-               srr:1,
-               rec:3,
-               ack:1,
-               reserved:4,
-               version:3;
-#else
-#error "Adjust your <asm/byteorder.h> defines"
-#endif
-       __be16  protocol;
-};
-
-/* modified GRE header for PPTP */
-struct gre_hdr_pptp {
-       __u8   flags;           /* bitfield */
-       __u8   version;         /* should be GRE_VERSION_PPTP */
-       __be16 protocol;        /* should be GRE_PROTOCOL_PPTP */
-       __be16 payload_len;     /* size of ppp payload, not inc. gre header */
-       __be16 call_id;         /* peer's call_id for this session */
-       __be32 seq;             /* sequence number.  Present if S==1 */
-       __be32 ack;             /* seq number of highest packet received by */
-                               /*  sender in this session */
-};
+#include <net/gre.h>
+#include <net/pptp.h>
 
 struct nf_ct_gre {
        unsigned int stream_timeout;
index 5fcd375..33e37fb 100644 (file)
@@ -11,22 +11,30 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
        if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
                return false;
 #endif
-       return !list_empty(&skb->dev->nf_hooks_ingress);
+       return rcu_access_pointer(skb->dev->nf_hooks_ingress);
 }
 
+/* caller must hold rcu_read_lock */
 static inline int nf_hook_ingress(struct sk_buff *skb)
 {
+       struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress);
        struct nf_hook_state state;
 
-       nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
-                          NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV,
-                          skb->dev, NULL, NULL, dev_net(skb->dev), NULL);
+       /* Must recheck the ingress hook head, in the event it became NULL
+        * after the check in nf_hook_ingress_active evaluated to true.
+        */
+       if (unlikely(!e))
+               return 0;
+
+       nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN,
+                          NFPROTO_NETDEV, skb->dev, NULL, NULL,
+                          dev_net(skb->dev), NULL);
        return nf_hook_slow(skb, &state);
 }
 
 static inline void nf_hook_ingress_init(struct net_device *dev)
 {
-       INIT_LIST_HEAD(&dev->nf_hooks_ingress);
+       RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
 }
 #else /* CONFIG_NETFILTER_INGRESS */
 static inline int nf_hook_ingress_active(struct sk_buff *skb)
index 66a1260..7e3d537 100644 (file)
@@ -571,56 +571,56 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
  */
 static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
 {
-       int ret = 0;
        char __user *end = uaddr + size - 1;
 
        if (unlikely(size == 0))
-               return ret;
+               return 0;
 
+       if (unlikely(uaddr > end))
+               return -EFAULT;
        /*
         * Writing zeroes into userspace here is OK, because we know that if
         * the zero gets there, we'll be overwriting it.
         */
-       while (uaddr <= end) {
-               ret = __put_user(0, uaddr);
-               if (ret != 0)
-                       return ret;
+       do {
+               if (unlikely(__put_user(0, uaddr) != 0))
+                       return -EFAULT;
                uaddr += PAGE_SIZE;
-       }
+       } while (uaddr <= end);
 
        /* Check whether the range spilled into the next page. */
        if (((unsigned long)uaddr & PAGE_MASK) ==
                        ((unsigned long)end & PAGE_MASK))
-               ret = __put_user(0, end);
+               return __put_user(0, end);
 
-       return ret;
+       return 0;
 }
 
 static inline int fault_in_multipages_readable(const char __user *uaddr,
                                               int size)
 {
        volatile char c;
-       int ret = 0;
        const char __user *end = uaddr + size - 1;
 
        if (unlikely(size == 0))
-               return ret;
+               return 0;
 
-       while (uaddr <= end) {
-               ret = __get_user(c, uaddr);
-               if (ret != 0)
-                       return ret;
+       if (unlikely(uaddr > end))
+               return -EFAULT;
+
+       do {
+               if (unlikely(__get_user(c, uaddr) != 0))
+                       return -EFAULT;
                uaddr += PAGE_SIZE;
-       }
+       } while (uaddr <= end);
 
        /* Check whether the range spilled into the next page. */
        if (((unsigned long)uaddr & PAGE_MASK) ==
                        ((unsigned long)end & PAGE_MASK)) {
-               ret = __get_user(c, end);
-               (void)c;
+               return __get_user(c, end);
        }
 
-       return ret;
+       return 0;
 }
 
 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
index 2d24b28..e25f183 100644 (file)
@@ -80,6 +80,7 @@ typedef enum {
        PHY_INTERFACE_MODE_XGMII,
        PHY_INTERFACE_MODE_MOCA,
        PHY_INTERFACE_MODE_QSGMII,
+       PHY_INTERFACE_MODE_TRGMII,
        PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
@@ -123,6 +124,8 @@ static inline const char *phy_modes(phy_interface_t interface)
                return "moca";
        case PHY_INTERFACE_MODE_QSGMII:
                return "qsgmii";
+       case PHY_INTERFACE_MODE_TRGMII:
+               return "trgmii";
        default:
                return "unknown";
        }
index 1b5d1cd..75b4aaf 100644 (file)
@@ -76,7 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
                struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
-int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes);
+#define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i);
index dc7854b..fdb5d60 100644 (file)
@@ -57,8 +57,8 @@ struct cec_devnode {
        int minor;
        bool registered;
        bool unregistered;
-       struct mutex fhs_lock;
        struct list_head fhs;
+       struct mutex lock;
 };
 
 struct cec_adapter;
index 7556646..b122196 100644 (file)
@@ -143,6 +143,7 @@ struct dsa_port {
        struct net_device       *netdev;
        struct device_node      *dn;
        unsigned int            ageing_time;
+       u8                      stp_state;
 };
 
 struct dsa_switch {
@@ -339,6 +340,7 @@ struct dsa_switch_ops {
        void    (*port_bridge_leave)(struct dsa_switch *ds, int port);
        void    (*port_stp_state_set)(struct dsa_switch *ds, int port,
                                      u8 state);
+       void    (*port_fast_age)(struct dsa_switch *ds, int port);
 
        /*
         * VLAN support
index e8d1448..0b0c35c 100644 (file)
@@ -15,6 +15,12 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
 
 void nf_bridge_update_protocol(struct sk_buff *skb);
 
+int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk,
+                     struct sk_buff *skb, struct net_device *indev,
+                     struct net_device *outdev,
+                     int (*okfn)(struct net *, struct sock *,
+                                 struct sk_buff *));
+
 static inline struct nf_bridge_info *
 nf_bridge_info_get(const struct sk_buff *skb)
 {
index cdc920b..8992e42 100644 (file)
@@ -63,10 +63,6 @@ struct nf_conntrack_l3proto {
 
        size_t nla_size;
 
-#ifdef CONFIG_SYSCTL
-       const char              *ctl_table_path;
-#endif /* CONFIG_SYSCTL */
-
        /* Init l3proto pernet data */
        int (*init_net)(struct net *net);
 
index 6793614..e693731 100644 (file)
@@ -27,6 +27,20 @@ static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct)
 #endif
 }
 
+static inline bool nf_ct_add_synproxy(struct nf_conn *ct,
+                                     const struct nf_conn *tmpl)
+{
+       if (tmpl && nfct_synproxy(tmpl)) {
+               if (!nfct_seqadj_ext_add(ct))
+                       return false;
+
+               if (!nfct_synproxy_ext_add(ct))
+                       return false;
+       }
+
+       return true;
+}
+
 struct synproxy_stats {
        unsigned int                    syn_received;
        unsigned int                    cookie_invalid;
index ee07dc8..309cd26 100644 (file)
@@ -2,15 +2,10 @@
 #define _NF_LOG_H
 
 #include <linux/netfilter.h>
+#include <linux/netfilter/nf_log.h>
 
-/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
- * disappear once iptables is replaced with pkttables.  Please DO NOT use them
- * for any new code! */
-#define NF_LOG_TCPSEQ          0x01    /* Log TCP sequence numbers */
-#define NF_LOG_TCPOPT          0x02    /* Log TCP options */
-#define NF_LOG_IPOPT           0x04    /* Log IP options */
-#define NF_LOG_UID             0x08    /* Log UID owning local socket */
-#define NF_LOG_MASK            0x0f
+/* Log tcp sequence, tcp options, ip options and uid owning local socket */
+#define NF_LOG_DEFAULT_MASK    0x0f
 
 /* This flag indicates that copy_len field in nf_loginfo is set */
 #define NF_LOG_F_COPY_LEN      0x1
index 0dbce55..2280cfe 100644 (file)
@@ -11,7 +11,6 @@ struct nf_queue_entry {
        struct sk_buff          *skb;
        unsigned int            id;
 
-       struct nf_hook_ops      *elem;
        struct nf_hook_state    state;
        u16                     size; /* sizeof(entry) + saved route keys */
 
@@ -22,10 +21,10 @@ struct nf_queue_entry {
 
 /* Packet queuing */
 struct nf_queue_handler {
-       int                     (*outfn)(struct nf_queue_entry *entry,
-                                        unsigned int queuenum);
-       void                    (*nf_hook_drop)(struct net *net,
-                                               struct nf_hook_ops *ops);
+       int             (*outfn)(struct nf_queue_entry *entry,
+                                unsigned int queuenum);
+       void            (*nf_hook_drop)(struct net *net,
+                                       const struct nf_hook_entry *hooks);
 };
 
 void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
@@ -41,23 +40,19 @@ static inline void init_hashrandom(u32 *jhash_initval)
                *jhash_initval = prandom_u32();
 }
 
-static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval)
+static inline u32 hash_v4(const struct iphdr *iph, u32 initval)
 {
-       const struct iphdr *iph = ip_hdr(skb);
-
        /* packets in either direction go into same queue */
        if ((__force u32)iph->saddr < (__force u32)iph->daddr)
                return jhash_3words((__force u32)iph->saddr,
-                       (__force u32)iph->daddr, iph->protocol, jhash_initval);
+                       (__force u32)iph->daddr, iph->protocol, initval);
 
        return jhash_3words((__force u32)iph->daddr,
-                       (__force u32)iph->saddr, iph->protocol, jhash_initval);
+                       (__force u32)iph->saddr, iph->protocol, initval);
 }
 
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
+static inline u32 hash_v6(const struct ipv6hdr *ip6h, u32 initval)
 {
-       const struct ipv6hdr *ip6h = ipv6_hdr(skb);
        u32 a, b, c;
 
        if ((__force u32)ip6h->saddr.s6_addr32[3] <
@@ -75,20 +70,50 @@ static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
        else
                c = (__force u32) ip6h->daddr.s6_addr32[1];
 
-       return jhash_3words(a, b, c, jhash_initval);
+       return jhash_3words(a, b, c, initval);
+}
+
+static inline u32 hash_bridge(const struct sk_buff *skb, u32 initval)
+{
+       struct ipv6hdr *ip6h, _ip6h;
+       struct iphdr *iph, _iph;
+
+       switch (eth_hdr(skb)->h_proto) {
+       case htons(ETH_P_IP):
+               iph = skb_header_pointer(skb, skb_network_offset(skb),
+                                        sizeof(*iph), &_iph);
+               if (iph)
+                       return hash_v4(iph, initval);
+               break;
+       case htons(ETH_P_IPV6):
+               ip6h = skb_header_pointer(skb, skb_network_offset(skb),
+                                         sizeof(*ip6h), &_ip6h);
+               if (ip6h)
+                       return hash_v6(ip6h, initval);
+               break;
+       }
+
+       return 0;
 }
-#endif
 
 static inline u32
 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
-            u32 jhash_initval)
+            u32 initval)
 {
-       if (family == NFPROTO_IPV4)
-               queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32;
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-       else if (family == NFPROTO_IPV6)
-               queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32;
-#endif
+       switch (family) {
+       case NFPROTO_IPV4:
+               queue += reciprocal_scale(hash_v4(ip_hdr(skb), initval),
+                                         queues_total);
+               break;
+       case NFPROTO_IPV6:
+               queue += reciprocal_scale(hash_v6(ipv6_hdr(skb), initval),
+                                         queues_total);
+               break;
+       case NFPROTO_BRIDGE:
+               queue += reciprocal_scale(hash_bridge(skb, initval),
+                                         queues_total);
+               break;
+       }
 
        return queue;
 }
index 8972468..5031e07 100644 (file)
@@ -19,6 +19,7 @@ struct nft_pktinfo {
        const struct net_device         *out;
        u8                              pf;
        u8                              hook;
+       bool                            tprot_set;
        u8                              tprot;
        /* for x_tables compatibility */
        struct xt_action_param          xt;
@@ -36,6 +37,23 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
        pkt->pf = pkt->xt.family = state->pf;
 }
 
+static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
+                                               struct sk_buff *skb)
+{
+       pkt->tprot_set = false;
+       pkt->tprot = 0;
+       pkt->xt.thoff = 0;
+       pkt->xt.fragoff = 0;
+}
+
+static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
+                                         struct sk_buff *skb,
+                                         const struct nf_hook_state *state)
+{
+       nft_set_pktinfo(pkt, skb, state);
+       nft_set_pktinfo_proto_unspec(pkt, skb);
+}
+
 /**
  *     struct nft_verdict - nf_tables verdict
  *
@@ -127,6 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
        return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
 }
 
+unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
 unsigned int nft_parse_register(const struct nlattr *attr);
 int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
 
diff --git a/include/net/netfilter/nf_tables_bridge.h b/include/net/netfilter/nf_tables_bridge.h
deleted file mode 100644 (file)
index 511fb79..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _NET_NF_TABLES_BRIDGE_H
-#define _NET_NF_TABLES_BRIDGE_H
-
-int nft_bridge_iphdr_validate(struct sk_buff *skb);
-int nft_bridge_ip6hdr_validate(struct sk_buff *skb);
-
-#endif /* _NET_NF_TABLES_BRIDGE_H */
index a9060dd..00f4f6b 100644 (file)
@@ -28,6 +28,9 @@ extern const struct nft_expr_ops nft_cmp_fast_ops;
 int nft_cmp_module_init(void);
 void nft_cmp_module_exit(void);
 
+int nft_range_module_init(void);
+void nft_range_module_exit(void);
+
 int nft_lookup_module_init(void);
 void nft_lookup_module_exit(void);
 
index ca6ef6b..968f00b 100644 (file)
@@ -14,11 +14,54 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
        nft_set_pktinfo(pkt, skb, state);
 
        ip = ip_hdr(pkt->skb);
+       pkt->tprot_set = true;
        pkt->tprot = ip->protocol;
        pkt->xt.thoff = ip_hdrlen(pkt->skb);
        pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 }
 
+static inline int
+__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+                               struct sk_buff *skb,
+                               const struct nf_hook_state *state)
+{
+       struct iphdr *iph, _iph;
+       u32 len, thoff;
+
+       iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
+                                &_iph);
+       if (!iph)
+               return -1;
+
+       iph = ip_hdr(skb);
+       if (iph->ihl < 5 || iph->version != 4)
+               return -1;
+
+       len = ntohs(iph->tot_len);
+       thoff = iph->ihl * 4;
+       if (skb->len < len)
+               return -1;
+       else if (len < thoff)
+               return -1;
+
+       pkt->tprot_set = true;
+       pkt->tprot = iph->protocol;
+       pkt->xt.thoff = thoff;
+       pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+
+       return 0;
+}
+
+static inline void
+nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+                             struct sk_buff *skb,
+                             const struct nf_hook_state *state)
+{
+       nft_set_pktinfo(pkt, skb, state);
+       if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0)
+               nft_set_pktinfo_proto_unspec(pkt, skb);
+}
+
 extern struct nft_af_info nft_af_ipv4;
 
 #endif
index 8ad39a6..d150b50 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/ipv6.h>
 
-static inline int
+static inline void
 nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
                     struct sk_buff *skb,
                     const struct nf_hook_state *state)
@@ -15,15 +15,64 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
        nft_set_pktinfo(pkt, skb, state);
 
        protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
-       /* If malformed, drop it */
+       if (protohdr < 0) {
+               nft_set_pktinfo_proto_unspec(pkt, skb);
+               return;
+       }
+
+       pkt->tprot_set = true;
+       pkt->tprot = protohdr;
+       pkt->xt.thoff = thoff;
+       pkt->xt.fragoff = frag_off;
+}
+
+static inline int
+__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+                               struct sk_buff *skb,
+                               const struct nf_hook_state *state)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       struct ipv6hdr *ip6h, _ip6h;
+       unsigned int thoff = 0;
+       unsigned short frag_off;
+       int protohdr;
+       u32 pkt_len;
+
+       ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
+                                 &_ip6h);
+       if (!ip6h)
+               return -1;
+
+       if (ip6h->version != 6)
+               return -1;
+
+       pkt_len = ntohs(ip6h->payload_len);
+       if (pkt_len + sizeof(*ip6h) > skb->len)
+               return -1;
+
+       protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
        if (protohdr < 0)
                return -1;
 
+       pkt->tprot_set = true;
        pkt->tprot = protohdr;
        pkt->xt.thoff = thoff;
        pkt->xt.fragoff = frag_off;
 
        return 0;
+#else
+       return -1;
+#endif
+}
+
+static inline void
+nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+                             struct sk_buff *skb,
+                             const struct nf_hook_state *state)
+{
+       nft_set_pktinfo(pkt, skb, state);
+       if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0)
+               nft_set_pktinfo_proto_unspec(pkt, skb);
 }
 
 extern struct nft_af_info nft_af_ipv6;
index 36d7235..58487b1 100644 (file)
@@ -16,6 +16,6 @@ struct netns_nf {
 #ifdef CONFIG_SYSCTL
        struct ctl_table_header *nf_log_dir_header;
 #endif
-       struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+       struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 };
 #endif
index efc0174..ca6c971 100644 (file)
@@ -307,85 +307,27 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
 }
 
 /* Compare two TSNs */
+#define TSN_lt(a,b)    \
+       (typecheck(__u32, a) && \
+        typecheck(__u32, b) && \
+        ((__s32)((a) - (b)) < 0))
 
-/* RFC 1982 - Serial Number Arithmetic
- *
- * 2. Comparison
- *  Then, s1 is said to be equal to s2 if and only if i1 is equal to i2,
- *  in all other cases, s1 is not equal to s2.
- *
- * s1 is said to be less than s2 if, and only if, s1 is not equal to s2,
- * and
- *
- *      (i1 < i2 and i2 - i1 < 2^(SERIAL_BITS - 1)) or
- *      (i1 > i2 and i1 - i2 > 2^(SERIAL_BITS - 1))
- *
- * s1 is said to be greater than s2 if, and only if, s1 is not equal to
- * s2, and
- *
- *      (i1 < i2 and i2 - i1 > 2^(SERIAL_BITS - 1)) or
- *      (i1 > i2 and i1 - i2 < 2^(SERIAL_BITS - 1))
- */
-
-/*
- * RFC 2960
- *  1.6 Serial Number Arithmetic
- *
- * Comparisons and arithmetic on TSNs in this document SHOULD use Serial
- * Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 32.
- */
-
-enum {
-       TSN_SIGN_BIT = (1<<31)
-};
-
-static inline int TSN_lt(__u32 s, __u32 t)
-{
-       return ((s) - (t)) & TSN_SIGN_BIT;
-}
-
-static inline int TSN_lte(__u32 s, __u32 t)
-{
-       return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT);
-}
+#define TSN_lte(a,b)   \
+       (typecheck(__u32, a) && \
+        typecheck(__u32, b) && \
+        ((__s32)((a) - (b)) <= 0))
 
 /* Compare two SSNs */
-
-/*
- * RFC 2960
- *  1.6 Serial Number Arithmetic
- *
- * Comparisons and arithmetic on Stream Sequence Numbers in this document
- * SHOULD use Serial Number Arithmetic as defined in [RFC1982] where
- * SERIAL_BITS = 16.
- */
-enum {
-       SSN_SIGN_BIT = (1<<15)
-};
-
-static inline int SSN_lt(__u16 s, __u16 t)
-{
-       return ((s) - (t)) & SSN_SIGN_BIT;
-}
-
-static inline int SSN_lte(__u16 s, __u16 t)
-{
-       return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT);
-}
-
-/*
- * ADDIP 3.1.1
- * The valid range of Serial Number is from 0 to 4294967295 (2**32 - 1). Serial
- * Numbers wrap back to 0 after reaching 4294967295.
- */
-enum {
-       ADDIP_SERIAL_SIGN_BIT = (1<<31)
-};
-
-static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t)
-{
-       return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT);
-}
+#define SSN_lt(a,b)            \
+       (typecheck(__u16, a) && \
+        typecheck(__u16, b) && \
+        ((__s16)((a) - (b)) < 0))
+
+/* ADDIP 3.1.1 */
+#define ADDIP_SERIAL_gte(a,b)  \
+       (typecheck(__u32, a) && \
+        typecheck(__u32, b) && \
+        ((__s32)((b) - (a)) <= 0))
 
 /* Check VTAG of the packet matches the sender's own tag. */
 static inline int
index c797c57..ebf75db 100644 (file)
@@ -1339,6 +1339,16 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
        if (!sk_has_account(sk))
                return;
        sk->sk_forward_alloc += size;
+
+       /* Avoid a possible overflow.
+        * TCP send queues can make this happen, if sk_mem_reclaim()
+        * is not called and more than 2 GBytes are released at once.
+        *
+        * If we reach 2 MBytes, reclaim 1 MBytes right now, there is
+        * no need to hold that much forward allocation anyway.
+        */
+       if (unlikely(sk->sk_forward_alloc >= 1 << 21))
+               __sk_mem_reclaim(sk, 1 << 20);
 }
 
 static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
index 6b83588..48cca32 100644 (file)
@@ -11,6 +11,7 @@
 #define __NET_TC_VLAN_H
 
 #include <net/act_api.h>
+#include <linux/tc_act/tc_vlan.h>
 
 #define VLAN_F_POP             0x1
 #define VLAN_F_PUSH            0x2
@@ -24,4 +25,28 @@ struct tcf_vlan {
 };
 #define to_vlan(a) ((struct tcf_vlan *)a)
 
+static inline bool is_tcf_vlan(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       if (a->ops && a->ops->type == TCA_ACT_VLAN)
+               return true;
+#endif
+       return false;
+}
+
+static inline u32 tcf_vlan_action(const struct tc_action *a)
+{
+       return to_vlan(a)->tcfv_action;
+}
+
+static inline u16 tcf_vlan_push_vid(const struct tc_action *a)
+{
+       return to_vlan(a)->tcfv_push_vid;
+}
+
+static inline __be16 tcf_vlan_push_proto(const struct tc_action *a)
+{
+       return to_vlan(a)->tcfv_push_proto;
+}
+
 #endif /* __NET_TC_VLAN_H */
index d2fdd6d..31947b9 100644 (file)
@@ -1540,8 +1540,10 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
 void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
 int xfrm6_extract_header(struct sk_buff *skb);
 int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi);
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+                 struct ip6_tnl *t);
 int xfrm6_transport_finish(struct sk_buff *skb, int async);
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t);
 int xfrm6_rcv(struct sk_buff *skb);
 int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
                     xfrm_address_t *saddr, u8 proto);
index fd6eb3a..703a64b 100644 (file)
@@ -123,6 +123,7 @@ struct rxrpc_ackpacket {
 #define RXRPC_ACK_PING_RESPONSE                7       /* response to RXRPC_ACK_PING */
 #define RXRPC_ACK_DELAY                        8       /* nothing happened since received packet */
 #define RXRPC_ACK_IDLE                 9       /* ACK due to fully received ACK window */
+#define RXRPC_ACK__INVALID             10      /* Representation of invalid ACK reason */
 
        uint8_t         nAcks;          /* number of ACKs */
 #define RXRPC_MAXACKS  255
index 75a5d8b..ada12d0 100644 (file)
@@ -251,38 +251,72 @@ TRACE_EVENT(rxrpc_rx_ack,
 
            TP_printk("c=%p %s f=%08x n=%u",
                      __entry->call,
-                     rxrpc_acks(__entry->reason),
+                     rxrpc_ack_names[__entry->reason],
                      __entry->first,
                      __entry->n_acks)
            );
 
+TRACE_EVENT(rxrpc_tx_data,
+           TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
+                    rxrpc_serial_t serial, u8 flags, bool lose),
+
+           TP_ARGS(call, seq, serial, flags, lose),
+
+           TP_STRUCT__entry(
+                   __field(struct rxrpc_call *,        call            )
+                   __field(rxrpc_seq_t,                seq             )
+                   __field(rxrpc_serial_t,             serial          )
+                   __field(u8,                         flags           )
+                   __field(bool,                       lose            )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call;
+                   __entry->seq = seq;
+                   __entry->serial = serial;
+                   __entry->flags = flags;
+                   __entry->lose = lose;
+                          ),
+
+           TP_printk("c=%p DATA %08x q=%08x fl=%02x%s",
+                     __entry->call,
+                     __entry->serial,
+                     __entry->seq,
+                     __entry->flags,
+                     __entry->lose ? " *LOSE*" : "")
+           );
+
 TRACE_EVENT(rxrpc_tx_ack,
-           TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first,
-                    rxrpc_serial_t serial, u8 reason, u8 n_acks),
+           TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial,
+                    rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial,
+                    u8 reason, u8 n_acks),
 
-           TP_ARGS(call, first, serial, reason, n_acks),
+           TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks),
 
            TP_STRUCT__entry(
                    __field(struct rxrpc_call *,        call            )
-                   __field(rxrpc_seq_t,                first           )
                    __field(rxrpc_serial_t,             serial          )
+                   __field(rxrpc_seq_t,                ack_first       )
+                   __field(rxrpc_serial_t,             ack_serial      )
                    __field(u8,                         reason          )
                    __field(u8,                         n_acks          )
                             ),
 
            TP_fast_assign(
                    __entry->call = call;
-                   __entry->first = first;
                    __entry->serial = serial;
+                   __entry->ack_first = ack_first;
+                   __entry->ack_serial = ack_serial;
                    __entry->reason = reason;
                    __entry->n_acks = n_acks;
                           ),
 
-           TP_printk("c=%p %s f=%08x r=%08x n=%u",
+           TP_printk(" c=%p ACK  %08x %s f=%08x r=%08x n=%u",
                      __entry->call,
-                     rxrpc_acks(__entry->reason),
-                     __entry->first,
                      __entry->serial,
+                     rxrpc_ack_names[__entry->reason],
+                     __entry->ack_first,
+                     __entry->ack_serial,
                      __entry->n_acks)
            );
 
@@ -353,6 +387,234 @@ TRACE_EVENT(rxrpc_recvmsg,
                      __entry->ret)
            );
 
+TRACE_EVENT(rxrpc_rtt_tx,
+           TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_tx_trace why,
+                    rxrpc_serial_t send_serial),
+
+           TP_ARGS(call, why, send_serial),
+
+           TP_STRUCT__entry(
+                   __field(struct rxrpc_call *,        call            )
+                   __field(enum rxrpc_rtt_tx_trace,    why             )
+                   __field(rxrpc_serial_t,             send_serial     )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call;
+                   __entry->why = why;
+                   __entry->send_serial = send_serial;
+                          ),
+
+           TP_printk("c=%p %s sr=%08x",
+                     __entry->call,
+                     rxrpc_rtt_tx_traces[__entry->why],
+                     __entry->send_serial)
+           );
+
+TRACE_EVENT(rxrpc_rtt_rx,
+           TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+                    rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
+                    s64 rtt, u8 nr, s64 avg),
+
+           TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg),
+
+           TP_STRUCT__entry(
+                   __field(struct rxrpc_call *,        call            )
+                   __field(enum rxrpc_rtt_rx_trace,    why             )
+                   __field(u8,                         nr              )
+                   __field(rxrpc_serial_t,             send_serial     )
+                   __field(rxrpc_serial_t,             resp_serial     )
+                   __field(s64,                        rtt             )
+                   __field(u64,                        avg             )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call;
+                   __entry->why = why;
+                   __entry->send_serial = send_serial;
+                   __entry->resp_serial = resp_serial;
+                   __entry->rtt = rtt;
+                   __entry->nr = nr;
+                   __entry->avg = avg;
+                          ),
+
+           TP_printk("c=%p %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld",
+                     __entry->call,
+                     rxrpc_rtt_rx_traces[__entry->why],
+                     __entry->send_serial,
+                     __entry->resp_serial,
+                     __entry->rtt,
+                     __entry->nr,
+                     __entry->avg)
+           );
+
+TRACE_EVENT(rxrpc_timer,
+           TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why,
+                    unsigned long now),
+
+           TP_ARGS(call, why, now),
+
+           TP_STRUCT__entry(
+                   __field(struct rxrpc_call *,                call            )
+                   __field(enum rxrpc_timer_trace,             why             )
+                   __field(unsigned long,                      now             )
+                   __field(unsigned long,                      expire_at       )
+                   __field(unsigned long,                      ack_at          )
+                   __field(unsigned long,                      resend_at       )
+                   __field(unsigned long,                      timer           )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call       = call;
+                   __entry->why        = why;
+                   __entry->now        = now;
+                   __entry->expire_at  = call->expire_at;
+                   __entry->ack_at     = call->ack_at;
+                   __entry->resend_at  = call->resend_at;
+                   __entry->timer      = call->timer.expires;
+                          ),
+
+           TP_printk("c=%p %s now=%lx x=%ld a=%ld r=%ld t=%ld",
+                     __entry->call,
+                     rxrpc_timer_traces[__entry->why],
+                     __entry->now,
+                     __entry->expire_at - __entry->now,
+                     __entry->ack_at - __entry->now,
+                     __entry->resend_at - __entry->now,
+                     __entry->timer - __entry->now)
+           );
+
+TRACE_EVENT(rxrpc_rx_lose,
+           TP_PROTO(struct rxrpc_skb_priv *sp),
+
+           TP_ARGS(sp),
+
+           TP_STRUCT__entry(
+                   __field_struct(struct rxrpc_host_header,    hdr             )
+                            ),
+
+           TP_fast_assign(
+                   memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr));
+                          ),
+
+           TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s *LOSE*",
+                     __entry->hdr.epoch, __entry->hdr.cid,
+                     __entry->hdr.callNumber, __entry->hdr.serviceId,
+                     __entry->hdr.serial, __entry->hdr.seq,
+                     __entry->hdr.type, __entry->hdr.flags,
+                     __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK")
+           );
+
+TRACE_EVENT(rxrpc_propose_ack,
+           TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why,
+                    u8 ack_reason, rxrpc_serial_t serial, bool immediate,
+                    bool background, enum rxrpc_propose_ack_outcome outcome),
+
+           TP_ARGS(call, why, ack_reason, serial, immediate, background,
+                   outcome),
+
+           TP_STRUCT__entry(
+                   __field(struct rxrpc_call *,                call            )
+                   __field(enum rxrpc_propose_ack_trace,       why             )
+                   __field(rxrpc_serial_t,                     serial          )
+                   __field(u8,                                 ack_reason      )
+                   __field(bool,                               immediate       )
+                   __field(bool,                               background      )
+                   __field(enum rxrpc_propose_ack_outcome,     outcome         )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call       = call;
+                   __entry->why        = why;
+                   __entry->serial     = serial;
+                   __entry->ack_reason = ack_reason;
+                   __entry->immediate  = immediate;
+                   __entry->background = background;
+                   __entry->outcome    = outcome;
+                          ),
+
+           TP_printk("c=%p %s %s r=%08x i=%u b=%u%s",
+                     __entry->call,
+                     rxrpc_propose_ack_traces[__entry->why],
+                     rxrpc_ack_names[__entry->ack_reason],
+                     __entry->serial,
+                     __entry->immediate,
+                     __entry->background,
+                     rxrpc_propose_ack_outcomes[__entry->outcome])
+           );
+
+TRACE_EVENT(rxrpc_retransmit,
+           TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, u8 annotation,
+                    s64 expiry),
+
+           TP_ARGS(call, seq, annotation, expiry),
+
+           TP_STRUCT__entry(
+                   __field(struct rxrpc_call *,        call            )
+                   __field(rxrpc_seq_t,                seq             )
+                   __field(u8,                         annotation      )
+                   __field(s64,                        expiry          )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call = call;
+                   __entry->seq = seq;
+                   __entry->annotation = annotation;
+                   __entry->expiry = expiry;
+                          ),
+
+           TP_printk("c=%p q=%x a=%02x xp=%lld",
+                     __entry->call,
+                     __entry->seq,
+                     __entry->annotation,
+                     __entry->expiry)
+           );
+
+TRACE_EVENT(rxrpc_congest,
+           TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
+                    rxrpc_serial_t ack_serial, enum rxrpc_congest_change change),
+
+           TP_ARGS(call, summary, ack_serial, change),
+
+           TP_STRUCT__entry(
+                   __field(struct rxrpc_call *,                call            )
+                   __field(enum rxrpc_congest_change,          change          )
+                   __field(rxrpc_seq_t,                        hard_ack        )
+                   __field(rxrpc_seq_t,                        top             )
+                   __field(rxrpc_seq_t,                        lowest_nak      )
+                   __field(rxrpc_serial_t,                     ack_serial      )
+                   __field_struct(struct rxrpc_ack_summary,    sum             )
+                            ),
+
+           TP_fast_assign(
+                   __entry->call       = call;
+                   __entry->change     = change;
+                   __entry->hard_ack   = call->tx_hard_ack;
+                   __entry->top        = call->tx_top;
+                   __entry->lowest_nak = call->acks_lowest_nak;
+                   __entry->ack_serial = ack_serial;
+                   memcpy(&__entry->sum, summary, sizeof(__entry->sum));
+                          ),
+
+           TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
+                     __entry->call,
+                     __entry->ack_serial,
+                     rxrpc_ack_names[__entry->sum.ack_reason],
+                     __entry->hard_ack,
+                     rxrpc_congest_modes[__entry->sum.mode],
+                     __entry->sum.cwnd,
+                     __entry->sum.ssthresh,
+                     __entry->sum.nr_acks, __entry->sum.nr_nacks,
+                     __entry->sum.nr_new_acks, __entry->sum.nr_new_nacks,
+                     __entry->sum.nr_rot_new_acks,
+                     __entry->top - __entry->hard_ack,
+                     __entry->sum.cumulative_acks,
+                     __entry->sum.dup_acks,
+                     __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "",
+                     rxrpc_congest_changes[__entry->change],
+                     __entry->sum.retrans_timeo ? " rTxTo" : "")
+           );
+
 #endif /* _TRACE_RXRPC_H */
 
 /* This part must be outside protection */
index e07432b..f09c70b 100644 (file)
@@ -419,6 +419,13 @@ enum bpf_func_id {
         */
        BPF_FUNC_csum_update,
 
+       /**
+        * bpf_set_hash_invalid(skb)
+        * Invalidate current skb>hash.
+        * @skb: pointer to skb
+        */
+       BPF_FUNC_set_hash_invalid,
+
        __BPF_FUNC_MAX_ID,
 };
 
index 7ec9e99..b4fba66 100644 (file)
@@ -619,7 +619,7 @@ enum {
 enum {
        IFLA_VF_UNSPEC,
        IFLA_VF_MAC,            /* Hardware queue specific attributes */
-       IFLA_VF_VLAN,
+       IFLA_VF_VLAN,           /* VLAN ID and QoS */
        IFLA_VF_TX_RATE,        /* Max TX Bandwidth Allocation */
        IFLA_VF_SPOOFCHK,       /* Spoof Checking on/off switch */
        IFLA_VF_LINK_STATE,     /* link state enable/disable/auto switch */
@@ -631,6 +631,7 @@ enum {
        IFLA_VF_TRUST,          /* Trust VF */
        IFLA_VF_IB_NODE_GUID,   /* VF Infiniband node GUID */
        IFLA_VF_IB_PORT_GUID,   /* VF Infiniband port GUID */
+       IFLA_VF_VLAN_LIST,      /* nested list of vlans, option for QinQ */
        __IFLA_VF_MAX,
 };
 
@@ -647,6 +648,22 @@ struct ifla_vf_vlan {
        __u32 qos;
 };
 
+enum {
+       IFLA_VF_VLAN_INFO_UNSPEC,
+       IFLA_VF_VLAN_INFO,      /* VLAN ID, QoS and VLAN protocol */
+       __IFLA_VF_VLAN_INFO_MAX,
+};
+
+#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
+#define MAX_VLAN_LIST_LEN 1
+
+struct ifla_vf_vlan_info {
+       __u32 vf;
+       __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+       __u32 qos;
+       __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+};
+
 struct ifla_vf_tx_rate {
        __u32 vf;
        __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
index 18d5dc1..92f3c86 100644 (file)
@@ -39,6 +39,7 @@
 #define GRE_IS_REC(f)          ((f) & GRE_REC)
 #define GRE_IS_ACK(f)          ((f) & GRE_ACK)
 
+#define GRE_VERSION_0          __cpu_to_be16(0x0000)
 #define GRE_VERSION_1          __cpu_to_be16(0x0001)
 #define GRE_PROTO_PPP          __cpu_to_be16(0x880b)
 #define GRE_PPTP_KEY_MASK      __cpu_to_be32(0xffff)
diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h
new file mode 100644 (file)
index 0000000..8be21e0
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _NETFILTER_NF_LOG_H
+#define _NETFILTER_NF_LOG_H
+
+#define NF_LOG_TCPSEQ          0x01    /* Log TCP sequence numbers */
+#define NF_LOG_TCPOPT          0x02    /* Log TCP options */
+#define NF_LOG_IPOPT           0x04    /* Log IP options */
+#define NF_LOG_UID             0x08    /* Log UID owning local socket */
+#define NF_LOG_NFLOG           0x10    /* Unsupported, don't reuse */
+#define NF_LOG_MACDECODE       0x20    /* Decode MAC header */
+#define NF_LOG_MASK            0x2f
+
+#endif /* _NETFILTER_NF_LOG_H */
index 28ce01d..c6c4477 100644 (file)
@@ -546,6 +546,35 @@ enum nft_cmp_attributes {
 };
 #define NFTA_CMP_MAX           (__NFTA_CMP_MAX - 1)
 
+/**
+ * enum nft_range_ops - nf_tables range operator
+ *
+ * @NFT_RANGE_EQ: equal
+ * @NFT_RANGE_NEQ: not equal
+ */
+enum nft_range_ops {
+       NFT_RANGE_EQ,
+       NFT_RANGE_NEQ,
+};
+
+/**
+ * enum nft_range_attributes - nf_tables range expression netlink attributes
+ *
+ * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers)
+ * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops)
+ * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes)
+ * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_range_attributes {
+       NFTA_RANGE_UNSPEC,
+       NFTA_RANGE_SREG,
+       NFTA_RANGE_OP,
+       NFTA_RANGE_FROM_DATA,
+       NFTA_RANGE_TO_DATA,
+       __NFTA_RANGE_MAX
+};
+#define NFTA_RANGE_MAX         (__NFTA_RANGE_MAX - 1)
+
 enum nft_lookup_flags {
        NFT_LOOKUP_F_INV = (1 << 0),
 };
@@ -575,6 +604,10 @@ enum nft_dynset_ops {
        NFT_DYNSET_OP_UPDATE,
 };
 
+enum nft_dynset_flags {
+       NFT_DYNSET_F_INV        = (1 << 0),
+};
+
 /**
  * enum nft_dynset_attributes - dynset expression attributes
  *
@@ -585,6 +618,7 @@ enum nft_dynset_ops {
  * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32)
  * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
  * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes)
+ * @NFTA_DYNSET_FLAGS: flags (NLA_U32)
  */
 enum nft_dynset_attributes {
        NFTA_DYNSET_UNSPEC,
@@ -596,6 +630,7 @@ enum nft_dynset_attributes {
        NFTA_DYNSET_TIMEOUT,
        NFTA_DYNSET_EXPR,
        NFTA_DYNSET_PAD,
+       NFTA_DYNSET_FLAGS,
        __NFTA_DYNSET_MAX,
 };
 #define NFTA_DYNSET_MAX                (__NFTA_DYNSET_MAX - 1)
@@ -731,6 +766,7 @@ enum nft_meta_keys {
  * @NFTA_HASH_LEN: source data length (NLA_U32)
  * @NFTA_HASH_MODULUS: modulus value (NLA_U32)
  * @NFTA_HASH_SEED: seed value (NLA_U32)
+ * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32)
  */
 enum nft_hash_attributes {
        NFTA_HASH_UNSPEC,
@@ -739,6 +775,7 @@ enum nft_hash_attributes {
        NFTA_HASH_LEN,
        NFTA_HASH_MODULUS,
        NFTA_HASH_SEED,
+       NFTA_HASH_OFFSET,
        __NFTA_HASH_MAX,
 };
 #define NFTA_HASH_MAX  (__NFTA_HASH_MAX - 1)
@@ -886,12 +923,14 @@ enum nft_log_attributes {
  * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
  * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
  * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
+ * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers)
  */
 enum nft_queue_attributes {
        NFTA_QUEUE_UNSPEC,
        NFTA_QUEUE_NUM,
        NFTA_QUEUE_TOTAL,
        NFTA_QUEUE_FLAGS,
+       NFTA_QUEUE_SREG_QNUM,
        __NFTA_QUEUE_MAX
 };
 #define NFTA_QUEUE_MAX         (__NFTA_QUEUE_MAX - 1)
@@ -1126,14 +1165,16 @@ enum nft_trace_types {
  * enum nft_ng_attributes - nf_tables number generator expression netlink attributes
  *
  * @NFTA_NG_DREG: destination register (NLA_U32)
- * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32)
+ * @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
  * @NFTA_NG_TYPE: operation type (NLA_U32)
+ * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32)
  */
 enum nft_ng_attributes {
        NFTA_NG_UNSPEC,
        NFTA_NG_DREG,
-       NFTA_NG_UNTIL,
+       NFTA_NG_MODULUS,
        NFTA_NG_TYPE,
+       NFTA_NG_OFFSET,
        __NFTA_NG_MAX
 };
 #define NFTA_NG_MAX    (__NFTA_NG_MAX - 1)
index 9df7897..6deb886 100644 (file)
@@ -231,13 +231,13 @@ enum ctattr_secctx {
 
 enum ctattr_stats_cpu {
        CTA_STATS_UNSPEC,
-       CTA_STATS_SEARCHED,
+       CTA_STATS_SEARCHED,     /* no longer used */
        CTA_STATS_FOUND,
-       CTA_STATS_NEW,
+       CTA_STATS_NEW,          /* no longer used */
        CTA_STATS_INVALID,
        CTA_STATS_IGNORE,
-       CTA_STATS_DELETE,
-       CTA_STATS_DELETE_LIST,
+       CTA_STATS_DELETE,       /* no longer used */
+       CTA_STATS_DELETE_LIST,  /* no longer used */
        CTA_STATS_INSERT,
        CTA_STATS_INSERT_FAILED,
        CTA_STATS_DROP,
index 6db9037..3efc0ca 100644 (file)
@@ -6,6 +6,7 @@
 
 /* timings are in milliseconds. */
 #define XT_HASHLIMIT_SCALE 10000
+#define XT_HASHLIMIT_SCALE_v2 1000000llu
 /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
  * seconds, or one packet every 59 hours.
  */
@@ -63,6 +64,20 @@ struct hashlimit_cfg1 {
        __u8 srcmask, dstmask;
 };
 
+struct hashlimit_cfg2 {
+       __u64 avg;              /* Average secs between packets * scale */
+       __u64 burst;            /* Period multiplier for upper limit. */
+       __u32 mode;             /* bitmask of XT_HASHLIMIT_HASH_* */
+
+       /* user specified */
+       __u32 size;             /* how many buckets */
+       __u32 max;              /* max number of entries */
+       __u32 gc_interval;      /* gc interval */
+       __u32 expire;           /* when do entries expire? */
+
+       __u8 srcmask, dstmask;
+};
+
 struct xt_hashlimit_mtinfo1 {
        char name[IFNAMSIZ];
        struct hashlimit_cfg1 cfg;
@@ -71,4 +86,12 @@ struct xt_hashlimit_mtinfo1 {
        struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
 };
 
+struct xt_hashlimit_mtinfo2 {
+       char name[NAME_MAX];
+       struct hashlimit_cfg2 cfg;
+
+       /* Used internally by the kernel */
+       struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
+};
+
 #endif /* _UAPI_XT_HASHLIMIT_H */
index f8e39db..df7451d 100644 (file)
@@ -811,7 +811,7 @@ struct tc_fq_qd_stats {
        __u32   flows;
        __u32   inactive_flows;
        __u32   throttled_flows;
-       __u32   pad;
+       __u32   unthrottle_latency_ns;
 };
 
 /* Heavy-Hitter Filter */
index 1433389..1fc62b2 100644 (file)
@@ -298,7 +298,7 @@ enum xfrm_attr_type_t {
        XFRMA_ALG_AUTH_TRUNC,   /* struct xfrm_algo_auth */
        XFRMA_MARK,             /* struct xfrm_mark */
        XFRMA_TFCPAD,           /* __u32 */
-       XFRMA_REPLAY_ESN_VAL,   /* struct xfrm_replay_esn */
+       XFRMA_REPLAY_ESN_VAL,   /* struct xfrm_replay_state_esn */
        XFRMA_SA_EXTRA_FLAGS,   /* __u32 */
        XFRMA_PROTO,            /* __u8 */
        XFRMA_ADDRESS_FILTER,   /* struct xfrm_address_filter */
index d1c51b7..5e8dab5 100644 (file)
@@ -6270,6 +6270,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
        if (cgroup_sk_alloc_disabled)
                return;
 
+       /* Socket clone path */
+       if (skcd->val) {
+               cgroup_get(sock_cgroup_ptr(skcd));
+               return;
+       }
+
        rcu_read_lock();
 
        while (true) {
index a7b8c1c..9fc3be0 100644 (file)
@@ -2496,11 +2496,11 @@ static int __perf_event_stop(void *info)
        return 0;
 }
 
-static int perf_event_restart(struct perf_event *event)
+static int perf_event_stop(struct perf_event *event, int restart)
 {
        struct stop_event_data sd = {
                .event          = event,
-               .restart        = 1,
+               .restart        = restart,
        };
        int ret = 0;
 
@@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group)
                        .group = group,
                        .ret = 0,
                };
-               ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
-               /* The event must have been read from an online CPU: */
-               WARN_ON_ONCE(ret);
-               ret = ret ? : data.ret;
+               /*
+                * Purposely ignore the smp_call_function_single() return
+                * value.
+                *
+                * If event->oncpu isn't a valid CPU it means the event got
+                * scheduled out and that will have updated the event count.
+                *
+                * Therefore, either way, we'll have an up-to-date event count
+                * after this.
+                */
+               (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+               ret = data.ret;
        } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
                struct perf_event_context *ctx = event->ctx;
                unsigned long flags;
@@ -4837,6 +4845,19 @@ static void ring_buffer_attach(struct perf_event *event,
                spin_unlock_irqrestore(&rb->event_lock, flags);
        }
 
+       /*
+        * Avoid racing with perf_mmap_close(AUX): stop the event
+        * before swizzling the event::rb pointer; if it's getting
+        * unmapped, its aux_mmap_count will be 0 and it won't
+        * restart. See the comment in __perf_pmu_output_stop().
+        *
+        * Data will inevitably be lost when set_output is done in
+        * mid-air, but then again, whoever does it like this is
+        * not in for the data anyway.
+        */
+       if (has_aux(event))
+               perf_event_stop(event, 0);
+
        rcu_assign_pointer(event->rb, rb);
 
        if (old_rb) {
@@ -6112,7 +6133,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
        raw_spin_unlock_irqrestore(&ifh->lock, flags);
 
        if (restart)
-               perf_event_restart(event);
+               perf_event_stop(event, 1);
 }
 
 void perf_event_exec(void)
@@ -6156,7 +6177,13 @@ static void __perf_event_output_stop(struct perf_event *event, void *data)
 
        /*
         * In case of inheritance, it will be the parent that links to the
-        * ring-buffer, but it will be the child that's actually using it:
+        * ring-buffer, but it will be the child that's actually using it.
+        *
+        * We are using event::rb to determine if the event should be stopped,
+        * however this may race with ring_buffer_attach() (through set_output),
+        * which will make us skip the event that actually needs to be stopped.
+        * So ring_buffer_attach() has to stop an aux event before re-assigning
+        * its rb pointer.
         */
        if (rcu_dereference(parent->rb) == rb)
                ro->err = __perf_event_stop(&sd);
@@ -6670,7 +6697,7 @@ static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
        raw_spin_unlock_irqrestore(&ifh->lock, flags);
 
        if (restart)
-               perf_event_restart(event);
+               perf_event_stop(event, 1);
 }
 
 /*
@@ -7933,7 +7960,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
        mmput(mm);
 
 restart:
-       perf_event_restart(event);
+       perf_event_stop(event, 1);
 }
 
 /*
index ae9b90d..257fa46 100644 (file)
@@ -330,15 +330,22 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
        if (!rb)
                return NULL;
 
-       if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
+       if (!rb_has_aux(rb))
                goto err;
 
        /*
-        * If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
-        * the aux buffer is in perf_mmap_close(), about to get freed.
+        * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(),
+        * about to get freed, so we leave immediately.
+        *
+        * Checking rb::aux_mmap_count and rb::refcount has to be done in
+        * the same order, see perf_mmap_close. Otherwise we end up freeing
+        * aux pages in this path, which is a bug, because in_atomic().
         */
        if (!atomic_read(&rb->aux_mmap_count))
-               goto err_put;
+               goto err;
+
+       if (!atomic_inc_not_zero(&rb->aux_refcount))
+               goto err;
 
        /*
         * Nesting is not supported for AUX area, make sure nested
index 2a906f2..44817c6 100644 (file)
@@ -2016,6 +2016,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
        success = 1; /* we're going to change ->state */
        cpu = task_cpu(p);
 
+       /*
+        * Ensure we load p->on_rq _after_ p->state, otherwise it would
+        * be possible to, falsely, observe p->on_rq == 0 and get stuck
+        * in smp_cond_load_acquire() below.
+        *
+        * sched_ttwu_pending()                 try_to_wake_up()
+        *   [S] p->on_rq = 1;                  [L] P->state
+        *       UNLOCK rq->lock  -----.
+        *                              \
+        *                               +---   RMB
+        * schedule()                   /
+        *       LOCK rq->lock    -----'
+        *       UNLOCK rq->lock
+        *
+        * [task p]
+        *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+        *
+        * Pairs with the UNLOCK+LOCK on rq->lock from the
+        * last wakeup of our task and the schedule that got our task
+        * current.
+        */
+       smp_rmb();
        if (p->on_rq && ttwu_remote(p, wake_flags))
                goto stat;
 
index 9e8c738..7e3138c 100644 (file)
@@ -290,26 +290,6 @@ done:
        return wanted - bytes;
 }
 
-/*
- * Fault in the first iovec of the given iov_iter, to a maximum length
- * of bytes. Returns 0 on success, or non-zero if the memory could not be
- * accessed (ie. because it is an invalid address).
- *
- * writev-intensive code may want this to prefault several iovecs -- that
- * would be possible (callers must not rely on the fact that _only_ the
- * first iovec will be faulted with the current implementation).
- */
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
-{
-       if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
-               char __user *buf = i->iov->iov_base + i->iov_offset;
-               bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-               return fault_in_pages_readable(buf, bytes);
-       }
-       return 0;
-}
-EXPORT_SYMBOL(iov_iter_fault_in_readable);
-
 /*
  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
  * bytes.  For each iovec, fault in each page that constitutes the iovec.
@@ -317,7 +297,7 @@ EXPORT_SYMBOL(iov_iter_fault_in_readable);
  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
  * because it is an invalid address).
  */
-int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
        size_t skip = i->iov_offset;
        const struct iovec *iov;
@@ -334,7 +314,7 @@ int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
        }
        return 0;
 }
-EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
 void iov_iter_init(struct iov_iter *i, int direction,
                        const struct iovec *iov, unsigned long nr_segs,
index 8865bfb..74c7cae 100644 (file)
@@ -42,9 +42,11 @@ const struct trace_print_flags vmaflag_names[] = {
 
 void __dump_page(struct page *page, const char *reason)
 {
+       int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
+
        pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
-                 page, page_ref_count(page), page_mapcount(page),
-                 page->mapping, page->index);
+                 page, page_ref_count(page), mapcount,
+                 page->mapping, page_to_pgoff(page));
        if (PageCompound(page))
                pr_cont(" compound_mapcount: %d", compound_mapcount(page));
        pr_cont("\n");
index 79c52d0..728d779 100644 (file)
@@ -838,7 +838,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
  * value (scan code).
  */
 
-static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
+static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
+               struct vm_area_struct **vmap)
 {
        struct vm_area_struct *vma;
        unsigned long hstart, hend;
@@ -846,7 +847,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
        if (unlikely(khugepaged_test_exit(mm)))
                return SCAN_ANY_PROCESS;
 
-       vma = find_vma(mm, address);
+       *vmap = vma = find_vma(mm, address);
        if (!vma)
                return SCAN_VMA_NULL;
 
@@ -881,6 +882,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
                .pmd = pmd,
        };
 
+       /* we only decide to swapin, if there is enough young ptes */
+       if (referenced < HPAGE_PMD_NR/2) {
+               trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+               return false;
+       }
        fe.pte = pte_offset_map(pmd, address);
        for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
                        fe.pte++, fe.address += PAGE_SIZE) {
@@ -888,17 +894,12 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
                if (!is_swap_pte(pteval))
                        continue;
                swapped_in++;
-               /* we only decide to swapin, if there is enough young ptes */
-               if (referenced < HPAGE_PMD_NR/2) {
-                       trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
-                       return false;
-               }
                ret = do_swap_page(&fe, pteval);
 
                /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
                if (ret & VM_FAULT_RETRY) {
                        down_read(&mm->mmap_sem);
-                       if (hugepage_vma_revalidate(mm, address)) {
+                       if (hugepage_vma_revalidate(mm, address, &fe.vma)) {
                                /* vma is no longer available, don't continue to swapin */
                                trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
                                return false;
@@ -923,7 +924,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 static void collapse_huge_page(struct mm_struct *mm,
                                   unsigned long address,
                                   struct page **hpage,
-                                  struct vm_area_struct *vma,
                                   int node, int referenced)
 {
        pmd_t *pmd, _pmd;
@@ -933,6 +933,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        spinlock_t *pmd_ptl, *pte_ptl;
        int isolated = 0, result = 0;
        struct mem_cgroup *memcg;
+       struct vm_area_struct *vma;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
        gfp_t gfp;
@@ -961,7 +962,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        }
 
        down_read(&mm->mmap_sem);
-       result = hugepage_vma_revalidate(mm, address);
+       result = hugepage_vma_revalidate(mm, address, &vma);
        if (result) {
                mem_cgroup_cancel_charge(new_page, memcg, true);
                up_read(&mm->mmap_sem);
@@ -994,7 +995,7 @@ static void collapse_huge_page(struct mm_struct *mm,
         * handled by the anon_vma lock + PG_lock.
         */
        down_write(&mm->mmap_sem);
-       result = hugepage_vma_revalidate(mm, address);
+       result = hugepage_vma_revalidate(mm, address, &vma);
        if (result)
                goto out;
        /* check if the pmd is still valid */
@@ -1202,7 +1203,7 @@ out_unmap:
        if (ret) {
                node = khugepaged_find_target_node();
                /* collapse_huge_page will return with the mmap_sem released */
-               collapse_huge_page(mm, address, hpage, vma, node, referenced);
+               collapse_huge_page(mm, address, hpage, node, referenced);
        }
 out:
        trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
index 9a6a51a..4be518d 100644 (file)
@@ -1740,17 +1740,22 @@ static DEFINE_MUTEX(percpu_charge_mutex);
 static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
        struct memcg_stock_pcp *stock;
+       unsigned long flags;
        bool ret = false;
 
        if (nr_pages > CHARGE_BATCH)
                return ret;
 
-       stock = &get_cpu_var(memcg_stock);
+       local_irq_save(flags);
+
+       stock = this_cpu_ptr(&memcg_stock);
        if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
                stock->nr_pages -= nr_pages;
                ret = true;
        }
-       put_cpu_var(memcg_stock);
+
+       local_irq_restore(flags);
+
        return ret;
 }
 
@@ -1771,15 +1776,18 @@ static void drain_stock(struct memcg_stock_pcp *stock)
        stock->cached = NULL;
 }
 
-/*
- * This must be called under preempt disabled or must be called by
- * a thread which is pinned to local cpu.
- */
 static void drain_local_stock(struct work_struct *dummy)
 {
-       struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
+       struct memcg_stock_pcp *stock;
+       unsigned long flags;
+
+       local_irq_save(flags);
+
+       stock = this_cpu_ptr(&memcg_stock);
        drain_stock(stock);
        clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+
+       local_irq_restore(flags);
 }
 
 /*
@@ -1788,14 +1796,19 @@ static void drain_local_stock(struct work_struct *dummy)
  */
 static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-       struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
+       struct memcg_stock_pcp *stock;
+       unsigned long flags;
+
+       local_irq_save(flags);
 
+       stock = this_cpu_ptr(&memcg_stock);
        if (stock->cached != memcg) { /* reset if necessary */
                drain_stock(stock);
                stock->cached = memcg;
        }
        stock->nr_pages += nr_pages;
-       put_cpu_var(memcg_stock);
+
+       local_irq_restore(flags);
 }
 
 /*
index 41266dc..b58906b 100644 (file)
@@ -1567,7 +1567,9 @@ static struct page *new_node_page(struct page *page, unsigned long private,
                return alloc_huge_page_node(page_hstate(compound_head(page)),
                                        next_node_in(nid, nmask));
 
-       node_clear(nid, nmask);
+       if (nid != next_node_in(nid, nmask))
+               node_clear(nid, nmask);
+
        if (PageHighMem(page)
            || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
                gfp_mask |= __GFP_HIGHMEM;
index 16bd82f..eafe5dd 100644 (file)
@@ -264,6 +264,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
        int ret;
        struct swap_info_struct *sis = page_swap_info(page);
 
+       BUG_ON(!PageSwapCache(page));
        if (sis->flags & SWP_FILE) {
                struct kiocb kiocb;
                struct file *swap_file = sis->swap_file;
@@ -337,6 +338,7 @@ int swap_readpage(struct page *page)
        int ret = 0;
        struct swap_info_struct *sis = page_swap_info(page);
 
+       BUG_ON(!PageSwapCache(page));
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageUptodate(page), page);
        if (frontswap_load(page) == 0) {
@@ -386,6 +388,7 @@ int swap_set_page_dirty(struct page *page)
 
        if (sis->flags & SWP_FILE) {
                struct address_space *mapping = sis->swap_file->f_mapping;
+               BUG_ON(!PageSwapCache(page));
                return mapping->a_ops->set_page_dirty(page);
        } else {
                return __set_page_dirty_no_writeback(page);
index 78cfa29..2657acc 100644 (file)
@@ -2724,7 +2724,6 @@ int swapcache_prepare(swp_entry_t entry)
 struct swap_info_struct *page_swap_info(struct page *page)
 {
        swp_entry_t swap = { .val = page_private(page) };
-       BUG_ON(!PageSwapCache(page));
        return swap_info[swp_type(swap)];
 }
 
index 089328f..3c8da0a 100644 (file)
@@ -207,8 +207,11 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n,
         * Some architectures (arm64) return true for virt_addr_valid() on
         * vmalloced addresses. Work around this by checking for vmalloc
         * first.
+        *
+        * We also need to check for module addresses explicitly since we
+        * may copy static data from modules to userspace
         */
-       if (is_vmalloc_addr(ptr))
+       if (is_vmalloc_or_module_addr(ptr))
                return NULL;
 
        if (!virt_addr_valid(ptr))
index 7d17001..ee08540 100644 (file)
@@ -335,7 +335,7 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
                goto out;
 
        skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
-       elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
+       elp_buff = skb_put(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
        elp_packet = (struct batadv_elp_packet *)elp_buff;
        memset(elp_packet, 0, BATADV_ELP_HLEN);
 
index 610f2c4..7e8dc64 100644 (file)
@@ -460,6 +460,29 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
        return 0;
 }
 
+/**
+ * batadv_last_bonding_get - Get last_bonding_candidate of orig_node
+ * @orig_node: originator node whose last bonding candidate should be retrieved
+ *
+ * Return: last bonding candidate of router or NULL if not found
+ *
+ * The object is returned with refcounter increased by 1.
+ */
+static struct batadv_orig_ifinfo *
+batadv_last_bonding_get(struct batadv_orig_node *orig_node)
+{
+       struct batadv_orig_ifinfo *last_bonding_candidate;
+
+       spin_lock_bh(&orig_node->neigh_list_lock);
+       last_bonding_candidate = orig_node->last_bonding_candidate;
+
+       if (last_bonding_candidate)
+               kref_get(&last_bonding_candidate->refcount);
+       spin_unlock_bh(&orig_node->neigh_list_lock);
+
+       return last_bonding_candidate;
+}
+
 /**
  * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
  * @orig_node: originator node whose bonding candidates should be replaced
@@ -530,7 +553,7 @@ batadv_find_router(struct batadv_priv *bat_priv,
         * router - obviously there are no other candidates.
         */
        rcu_read_lock();
-       last_candidate = orig_node->last_bonding_candidate;
+       last_candidate = batadv_last_bonding_get(orig_node);
        if (last_candidate)
                last_cand_router = rcu_dereference(last_candidate->router);
 
@@ -622,6 +645,9 @@ next:
                batadv_orig_ifinfo_put(next_candidate);
        }
 
+       if (last_candidate)
+               batadv_orig_ifinfo_put(last_candidate);
+
        return router;
 }
 
index 77e7f69..2fe9345 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_arp.h>
 #include <linux/in_route.h>
+#include <linux/rculist.h>
 #include <linux/inetdevice.h>
 
 #include <net/ip.h>
@@ -395,11 +396,10 @@ bridged_dnat:
                                skb->dev = nf_bridge->physindev;
                                nf_bridge_update_protocol(skb);
                                nf_bridge_push_encap_header(skb);
-                               NF_HOOK_THRESH(NFPROTO_BRIDGE,
-                                              NF_BR_PRE_ROUTING,
-                                              net, sk, skb, skb->dev, NULL,
-                                              br_nf_pre_routing_finish_bridge,
-                                              1);
+                               br_nf_hook_thresh(NF_BR_PRE_ROUTING,
+                                                 net, sk, skb, skb->dev,
+                                                 NULL,
+                                                 br_nf_pre_routing_finish);
                                return 0;
                        }
                        ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
@@ -417,10 +417,8 @@ bridged_dnat:
        skb->dev = nf_bridge->physindev;
        nf_bridge_update_protocol(skb);
        nf_bridge_push_encap_header(skb);
-       NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
-                      skb->dev, NULL,
-                      br_handle_frame_finish, 1);
-
+       br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
+                         br_handle_frame_finish);
        return 0;
 }
 
@@ -992,6 +990,43 @@ static struct notifier_block brnf_notifier __read_mostly = {
        .notifier_call = brnf_device_event,
 };
 
+/* recursively invokes nf_hook_slow (again), skipping already-called
+ * hooks (< NF_BR_PRI_BRNF).
+ *
+ * Called with rcu read lock held.
+ */
+int br_nf_hook_thresh(unsigned int hook, struct net *net,
+                     struct sock *sk, struct sk_buff *skb,
+                     struct net_device *indev,
+                     struct net_device *outdev,
+                     int (*okfn)(struct net *, struct sock *,
+                                 struct sk_buff *))
+{
+       struct nf_hook_entry *elem;
+       struct nf_hook_state state;
+       int ret;
+
+       elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+
+       while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF))
+               elem = rcu_dereference(elem->next);
+
+       if (!elem)
+               return okfn(net, sk, skb);
+
+       /* We may already have this, but read-locks nest anyway */
+       rcu_read_lock();
+       nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1,
+                          NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
+
+       ret = nf_hook_slow(skb, &state);
+       rcu_read_unlock();
+       if (ret == 1)
+               ret = okfn(net, sk, skb);
+
+       return ret;
+}
+
 #ifdef CONFIG_SYSCTL
 static
 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
index 5e59a84..5989661 100644 (file)
@@ -187,10 +187,9 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
                        skb->dev = nf_bridge->physindev;
                        nf_bridge_update_protocol(skb);
                        nf_bridge_push_encap_header(skb);
-                       NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
-                                      net, sk, skb, skb->dev, NULL,
-                                      br_nf_pre_routing_finish_bridge,
-                                      1);
+                       br_nf_hook_thresh(NF_BR_PRE_ROUTING,
+                                         net, sk, skb, skb->dev, NULL,
+                                         br_nf_pre_routing_finish_bridge);
                        return 0;
                }
                ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
@@ -207,9 +206,8 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
        skb->dev = nf_bridge->physindev;
        nf_bridge_update_protocol(skb);
        nf_bridge_push_encap_header(skb);
-       NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
-                      skb->dev, NULL,
-                      br_handle_frame_finish, 1);
+       br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb,
+                         skb->dev, NULL, br_handle_frame_finish);
 
        return 0;
 }
index 152300d..9a11086 100644 (file)
@@ -91,7 +91,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
        if (loginfo->type == NF_LOG_TYPE_LOG)
                bitmask = loginfo->u.log.logflags;
        else
-               bitmask = NF_LOG_MASK;
+               bitmask = NF_LOG_DEFAULT_MASK;
 
        if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
           htons(ETH_P_IP)) {
index 2039649..2e7c4f9 100644 (file)
@@ -24,7 +24,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
                return EBT_DROP;
 
        if (par->hooknum != NF_BR_BROUTING)
-               /* rcu_read_lock()ed by nf_hook_slow */
+               /* rcu_read_lock()ed by nf_hook_thresh */
                ether_addr_copy(eth_hdr(skb)->h_dest,
                                br_port_get_rcu(par->in)->br->dev->dev_addr);
        else
index 0833c25..f5c11bb 100644 (file)
@@ -146,7 +146,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
                return 1;
        if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out)))
                return 1;
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        if (in && (p = br_port_get_rcu(in)) != NULL &&
            NF_INVF(e, EBT_ILOGICALIN,
                    ebt_dev_check(e->logical_in, p->br->dev)))
index a78c4e2..97afdc0 100644 (file)
 #include <linux/module.h>
 #include <linux/netfilter_bridge.h>
 #include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_bridge.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <net/netfilter/nf_tables_ipv4.h>
 #include <net/netfilter/nf_tables_ipv6.h>
 
-int nft_bridge_iphdr_validate(struct sk_buff *skb)
-{
-       struct iphdr *iph;
-       u32 len;
-
-       if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-               return 0;
-
-       iph = ip_hdr(skb);
-       if (iph->ihl < 5 || iph->version != 4)
-               return 0;
-
-       len = ntohs(iph->tot_len);
-       if (skb->len < len)
-               return 0;
-       else if (len < (iph->ihl*4))
-               return 0;
-
-       if (!pskb_may_pull(skb, iph->ihl*4))
-               return 0;
-
-       return 1;
-}
-EXPORT_SYMBOL_GPL(nft_bridge_iphdr_validate);
-
-int nft_bridge_ip6hdr_validate(struct sk_buff *skb)
-{
-       struct ipv6hdr *hdr;
-       u32 pkt_len;
-
-       if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-               return 0;
-
-       hdr = ipv6_hdr(skb);
-       if (hdr->version != 6)
-               return 0;
-
-       pkt_len = ntohs(hdr->payload_len);
-       if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
-               return 0;
-
-       return 1;
-}
-EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate);
-
-static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
-                                              struct sk_buff *skb,
-                                              const struct nf_hook_state *state)
-{
-       if (nft_bridge_iphdr_validate(skb))
-               nft_set_pktinfo_ipv4(pkt, skb, state);
-       else
-               nft_set_pktinfo(pkt, skb, state);
-}
-
-static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-                                              struct sk_buff *skb,
-                                              const struct nf_hook_state *state)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-       if (nft_bridge_ip6hdr_validate(skb) &&
-           nft_set_pktinfo_ipv6(pkt, skb, state) == 0)
-               return;
-#endif
-       nft_set_pktinfo(pkt, skb, state);
-}
-
 static unsigned int
 nft_do_chain_bridge(void *priv,
                    struct sk_buff *skb,
@@ -95,13 +27,13 @@ nft_do_chain_bridge(void *priv,
 
        switch (eth_hdr(skb)->h_proto) {
        case htons(ETH_P_IP):
-               nft_bridge_set_pktinfo_ipv4(&pkt, skb, state);
+               nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
                break;
        case htons(ETH_P_IPV6):
-               nft_bridge_set_pktinfo_ipv6(&pkt, skb, state);
+               nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
                break;
        default:
-               nft_set_pktinfo(&pkt, skb, state);
+               nft_set_pktinfo_unspec(&pkt, skb, state);
                break;
        }
 
@@ -207,12 +139,20 @@ static int __init nf_tables_bridge_init(void)
        int ret;
 
        nf_register_afinfo(&nf_br_afinfo);
-       nft_register_chain_type(&filter_bridge);
+       ret = nft_register_chain_type(&filter_bridge);
+       if (ret < 0)
+               goto err1;
+
        ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
-       if (ret < 0) {
-               nft_unregister_chain_type(&filter_bridge);
-               nf_unregister_afinfo(&nf_br_afinfo);
-       }
+       if (ret < 0)
+               goto err2;
+
+       return ret;
+
+err2:
+       nft_unregister_chain_type(&filter_bridge);
+err1:
+       nf_unregister_afinfo(&nf_br_afinfo);
        return ret;
 }
 
index 0b77ffb..4b3df6b 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nft_reject.h>
-#include <net/netfilter/nf_tables_bridge.h>
 #include <net/netfilter/ipv4/nf_reject.h>
 #include <net/netfilter/ipv6/nf_reject.h>
 #include <linux/ip.h>
@@ -37,6 +36,30 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb,
        skb_pull(nskb, ETH_HLEN);
 }
 
+static int nft_bridge_iphdr_validate(struct sk_buff *skb)
+{
+       struct iphdr *iph;
+       u32 len;
+
+       if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+               return 0;
+
+       iph = ip_hdr(skb);
+       if (iph->ihl < 5 || iph->version != 4)
+               return 0;
+
+       len = ntohs(iph->tot_len);
+       if (skb->len < len)
+               return 0;
+       else if (len < (iph->ihl*4))
+               return 0;
+
+       if (!pskb_may_pull(skb, iph->ihl*4))
+               return 0;
+
+       return 1;
+}
+
 /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT)
  * or the bridge port (NF_BRIDGE PREROUTING).
  */
@@ -143,6 +166,25 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
        br_forward(br_port_get_rcu(dev), nskb, false, true);
 }
 
+static int nft_bridge_ip6hdr_validate(struct sk_buff *skb)
+{
+       struct ipv6hdr *hdr;
+       u32 pkt_len;
+
+       if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+               return 0;
+
+       hdr = ipv6_hdr(skb);
+       if (hdr->version != 6)
+               return 0;
+
+       pkt_len = ntohs(hdr->payload_len);
+       if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
+               return 0;
+
+       return 1;
+}
+
 static void nft_reject_br_send_v6_tcp_reset(struct net *net,
                                            struct sk_buff *oldskb,
                                            const struct net_device *dev,
index 9dbece2..c0c291f 100644 (file)
@@ -4055,12 +4055,17 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
 {
 #ifdef CONFIG_NETFILTER_INGRESS
        if (nf_hook_ingress_active(skb)) {
+               int ingress_retval;
+
                if (*pt_prev) {
                        *ret = deliver_skb(skb, *pt_prev, orig_dev);
                        *pt_prev = NULL;
                }
 
-               return nf_hook_ingress(skb);
+               rcu_read_lock();
+               ingress_retval = nf_hook_ingress(skb);
+               rcu_read_unlock();
+               return ingress_retval;
        }
 #endif /* CONFIG_NETFILTER_INGRESS */
        return 0;
index 0920c2a..00351cd 100644 (file)
@@ -1777,6 +1777,22 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
        .arg1_type      = ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
+{
+       /* After all direct packet write, this can be used once for
+        * triggering a lazy recalc on next skb_get_hash() invocation.
+        */
+       skb_clear_hash(skb);
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
+       .func           = bpf_set_hash_invalid,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
 BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
           u16, vlan_tci)
 {
@@ -2408,7 +2424,7 @@ BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
        struct cgroup *cgrp;
        struct sock *sk;
 
-       sk = skb->sk;
+       sk = skb_to_full_sk(skb);
        if (!sk || !sk_fullsock(sk))
                return -ENOENT;
        if (unlikely(idx >= array->map.max_entries))
@@ -2534,6 +2550,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
                return &bpf_get_route_realm_proto;
        case BPF_FUNC_get_hash_recalc:
                return &bpf_get_hash_recalc_proto;
+       case BPF_FUNC_set_hash_invalid:
+               return &bpf_set_hash_invalid_proto;
        case BPF_FUNC_perf_event_output:
                return &bpf_skb_event_output_proto;
        case BPF_FUNC_get_smp_processor_id:
@@ -2551,6 +2569,8 @@ xdp_func_proto(enum bpf_func_id func_id)
        switch (func_id) {
        case BPF_FUNC_perf_event_output:
                return &bpf_xdp_event_output_proto;
+       case BPF_FUNC_get_smp_processor_id:
+               return &bpf_get_smp_processor_id_proto;
        default:
                return sk_filter_func_proto(func_id);
        }
index 0dbae42..3ac8946 100644 (file)
@@ -843,7 +843,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
                size += nla_total_size(num_vfs * sizeof(struct nlattr));
                size += num_vfs *
                        (nla_total_size(sizeof(struct ifla_vf_mac)) +
-                        nla_total_size(sizeof(struct ifla_vf_vlan)) +
+                        nla_total_size(MAX_VLAN_LIST_LEN *
+                                       sizeof(struct nlattr)) +
+                        nla_total_size(MAX_VLAN_LIST_LEN *
+                                       sizeof(struct ifla_vf_vlan_info)) +
                         nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
                         nla_total_size(sizeof(struct ifla_vf_rate)) +
                         nla_total_size(sizeof(struct ifla_vf_link_state)) +
@@ -1111,14 +1114,15 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
                                               struct nlattr *vfinfo)
 {
        struct ifla_vf_rss_query_en vf_rss_query_en;
+       struct nlattr *vf, *vfstats, *vfvlanlist;
        struct ifla_vf_link_state vf_linkstate;
+       struct ifla_vf_vlan_info vf_vlan_info;
        struct ifla_vf_spoofchk vf_spoofchk;
        struct ifla_vf_tx_rate vf_tx_rate;
        struct ifla_vf_stats vf_stats;
        struct ifla_vf_trust vf_trust;
        struct ifla_vf_vlan vf_vlan;
        struct ifla_vf_rate vf_rate;
-       struct nlattr *vf, *vfstats;
        struct ifla_vf_mac vf_mac;
        struct ifla_vf_info ivi;
 
@@ -1135,11 +1139,14 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
         * IFLA_VF_LINK_STATE_AUTO which equals zero
         */
        ivi.linkstate = 0;
+       /* VLAN Protocol by default is 802.1Q */
+       ivi.vlan_proto = htons(ETH_P_8021Q);
        if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
                return 0;
 
        vf_mac.vf =
                vf_vlan.vf =
+               vf_vlan_info.vf =
                vf_rate.vf =
                vf_tx_rate.vf =
                vf_spoofchk.vf =
@@ -1150,6 +1157,9 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
        memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
        vf_vlan.vlan = ivi.vlan;
        vf_vlan.qos = ivi.qos;
+       vf_vlan_info.vlan = ivi.vlan;
+       vf_vlan_info.qos = ivi.qos;
+       vf_vlan_info.vlan_proto = ivi.vlan_proto;
        vf_tx_rate.rate = ivi.max_tx_rate;
        vf_rate.min_tx_rate = ivi.min_tx_rate;
        vf_rate.max_tx_rate = ivi.max_tx_rate;
@@ -1158,10 +1168,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
        vf_rss_query_en.setting = ivi.rss_query_en;
        vf_trust.setting = ivi.trusted;
        vf = nla_nest_start(skb, IFLA_VF_INFO);
-       if (!vf) {
-               nla_nest_cancel(skb, vfinfo);
-               return -EMSGSIZE;
-       }
+       if (!vf)
+               goto nla_put_vfinfo_failure;
        if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
            nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
            nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
@@ -1177,17 +1185,23 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
                    &vf_rss_query_en) ||
            nla_put(skb, IFLA_VF_TRUST,
                    sizeof(vf_trust), &vf_trust))
-               return -EMSGSIZE;
+               goto nla_put_vf_failure;
+       vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST);
+       if (!vfvlanlist)
+               goto nla_put_vf_failure;
+       if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info),
+                   &vf_vlan_info)) {
+               nla_nest_cancel(skb, vfvlanlist);
+               goto nla_put_vf_failure;
+       }
+       nla_nest_end(skb, vfvlanlist);
        memset(&vf_stats, 0, sizeof(vf_stats));
        if (dev->netdev_ops->ndo_get_vf_stats)
                dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
                                                &vf_stats);
        vfstats = nla_nest_start(skb, IFLA_VF_STATS);
-       if (!vfstats) {
-               nla_nest_cancel(skb, vf);
-               nla_nest_cancel(skb, vfinfo);
-               return -EMSGSIZE;
-       }
+       if (!vfstats)
+               goto nla_put_vf_failure;
        if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
                              vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
            nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
@@ -1199,11 +1213,19 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
            nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
                              vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
            nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
-                             vf_stats.multicast, IFLA_VF_STATS_PAD))
-               return -EMSGSIZE;
+                             vf_stats.multicast, IFLA_VF_STATS_PAD)) {
+               nla_nest_cancel(skb, vfstats);
+               goto nla_put_vf_failure;
+       }
        nla_nest_end(skb, vfstats);
        nla_nest_end(skb, vf);
        return 0;
+
+nla_put_vf_failure:
+       nla_nest_cancel(skb, vf);
+nla_put_vfinfo_failure:
+       nla_nest_cancel(skb, vfinfo);
+       return -EMSGSIZE;
 }
 
 static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
@@ -1448,6 +1470,7 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
        [IFLA_VF_MAC]           = { .len = sizeof(struct ifla_vf_mac) },
        [IFLA_VF_VLAN]          = { .len = sizeof(struct ifla_vf_vlan) },
+       [IFLA_VF_VLAN_LIST]     = { .type = NLA_NESTED },
        [IFLA_VF_TX_RATE]       = { .len = sizeof(struct ifla_vf_tx_rate) },
        [IFLA_VF_SPOOFCHK]      = { .len = sizeof(struct ifla_vf_spoofchk) },
        [IFLA_VF_RATE]          = { .len = sizeof(struct ifla_vf_rate) },
@@ -1704,7 +1727,34 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
                err = -EOPNOTSUPP;
                if (ops->ndo_set_vf_vlan)
                        err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
-                                                  ivv->qos);
+                                                  ivv->qos,
+                                                  htons(ETH_P_8021Q));
+               if (err < 0)
+                       return err;
+       }
+
+       if (tb[IFLA_VF_VLAN_LIST]) {
+               struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN];
+               struct nlattr *attr;
+               int rem, len = 0;
+
+               err = -EOPNOTSUPP;
+               if (!ops->ndo_set_vf_vlan)
+                       return err;
+
+               nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
+                       if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
+                           nla_len(attr) < NLA_HDRLEN) {
+                               return -EINVAL;
+                       }
+                       if (len >= MAX_VLAN_LIST_LEN)
+                               return -EOPNOTSUPP;
+                       ivvl[len] = nla_data(attr);
+
+                       len++;
+               }
+               err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
+                                          ivvl[0]->qos, ivvl[0]->vlan_proto);
                if (err < 0)
                        return err;
        }
index 51a7304..038e660 100644 (file)
@@ -1340,7 +1340,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
                if (!try_module_get(prot->owner))
                        goto out_free_sec;
                sk_tx_queue_clear(sk);
-               cgroup_sk_alloc(&sk->sk_cgrp_data);
        }
 
        return sk;
@@ -1400,6 +1399,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                sock_net_set(sk, net);
                atomic_set(&sk->sk_wmem_alloc, 1);
 
+               cgroup_sk_alloc(&sk->sk_cgrp_data);
                sock_update_classid(&sk->sk_cgrp_data);
                sock_update_netprioidx(&sk->sk_cgrp_data);
        }
@@ -1544,6 +1544,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                newsk->sk_priority = 0;
                newsk->sk_incoming_cpu = raw_smp_processor_id();
                atomic64_set(&newsk->sk_cookie, 0);
+
+               cgroup_sk_alloc(&newsk->sk_cgrp_data);
+
                /*
                 * Before updating sk_refcnt, we must commit prior changes to memory
                 * (Documentation/RCU/rculist_nulls.txt for details)
index 9ecbe78..6b1282c 100644 (file)
@@ -69,6 +69,30 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p)
        return !!p->bridge_dev;
 }
 
+static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state)
+{
+       struct dsa_port *dp = &ds->ports[port];
+
+       if (ds->ops->port_stp_state_set)
+               ds->ops->port_stp_state_set(ds, port, state);
+
+       if (ds->ops->port_fast_age) {
+               /* Fast age FDB entries or flush appropriate forwarding database
+                * for the given port, if we are moving it from Learning or
+                * Forwarding state, to Disabled or Blocking or Listening state.
+                */
+
+               if ((dp->stp_state == BR_STATE_LEARNING ||
+                    dp->stp_state == BR_STATE_FORWARDING) &&
+                   (state == BR_STATE_DISABLED ||
+                    state == BR_STATE_BLOCKING ||
+                    state == BR_STATE_LISTENING))
+                       ds->ops->port_fast_age(ds, port);
+       }
+
+       dp->stp_state = state;
+}
+
 static int dsa_slave_open(struct net_device *dev)
 {
        struct dsa_slave_priv *p = netdev_priv(dev);
@@ -104,8 +128,7 @@ static int dsa_slave_open(struct net_device *dev)
                        goto clear_promisc;
        }
 
-       if (ds->ops->port_stp_state_set)
-               ds->ops->port_stp_state_set(ds, p->port, stp_state);
+       dsa_port_set_stp_state(ds, p->port, stp_state);
 
        if (p->phy)
                phy_start(p->phy);
@@ -147,8 +170,7 @@ static int dsa_slave_close(struct net_device *dev)
        if (ds->ops->port_disable)
                ds->ops->port_disable(ds, p->port, p->phy);
 
-       if (ds->ops->port_stp_state_set)
-               ds->ops->port_stp_state_set(ds, p->port, BR_STATE_DISABLED);
+       dsa_port_set_stp_state(ds, p->port, BR_STATE_DISABLED);
 
        return 0;
 }
@@ -354,7 +376,7 @@ static int dsa_slave_stp_state_set(struct net_device *dev,
        if (switchdev_trans_ph_prepare(trans))
                return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
 
-       ds->ops->port_stp_state_set(ds, p->port, attr->u.stp_state);
+       dsa_port_set_stp_state(ds, p->port, attr->u.stp_state);
 
        return 0;
 }
@@ -556,8 +578,7 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev)
        /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
         * so allow it to be in BR_STATE_FORWARDING to be kept functional
         */
-       if (ds->ops->port_stp_state_set)
-               ds->ops->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING);
+       dsa_port_set_stp_state(ds, p->port, BR_STATE_FORWARDING);
 }
 
 static int dsa_slave_port_attr_get(struct net_device *dev,
index 4b351af..d6feabb 100644 (file)
@@ -312,6 +312,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        const struct iphdr *iph = ip_hdr(skb);
        struct rtable *rt;
+       struct net_device *dev = skb->dev;
 
        /* if ingress device is enslaved to an L3 master device pass the
         * skb to its handler for processing
@@ -341,7 +342,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
         */
        if (!skb_valid_dst(skb)) {
                int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
-                                              iph->tos, skb->dev);
+                                              iph->tos, dev);
                if (unlikely(err)) {
                        if (err == -EXDEV)
                                __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
@@ -370,7 +371,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
                __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
        } else if (skb->pkt_type == PACKET_BROADCAST ||
                   skb->pkt_type == PACKET_MULTICAST) {
-               struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+               struct in_device *in_dev = __in_dev_get_rcu(dev);
 
                /* RFC 1122 3.3.6:
                 *
index cc701fa..5d7944f 100644 (file)
@@ -88,6 +88,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
        struct net_device *dev;
        struct pcpu_sw_netstats *tstats;
        struct xfrm_state *x;
+       struct xfrm_mode *inner_mode;
        struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
        u32 orig_mark = skb->mark;
        int ret;
@@ -105,7 +106,19 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
        }
 
        x = xfrm_input_state(skb);
-       family = x->inner_mode->afinfo->family;
+
+       inner_mode = x->inner_mode;
+
+       if (x->sel.family == AF_UNSPEC) {
+               inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+               if (inner_mode == NULL) {
+                       XFRM_INC_STATS(dev_net(skb->dev),
+                                      LINUX_MIB_XFRMINSTATEMODEERROR);
+                       return -EINVAL;
+               }
+       }
+
+       family = inner_mode->afinfo->family;
 
        skb->mark = be32_to_cpu(tunnel->parms.i_key);
        ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
index 2625332..a87bcd2 100644 (file)
@@ -2076,6 +2076,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
        struct rta_mfc_stats mfcs;
        struct nlattr *mp_attr;
        struct rtnexthop *nhp;
+       unsigned long lastuse;
        int ct;
 
        /* If cache is unresolved, don't try to parse IIF and OIF */
@@ -2105,12 +2106,14 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 
        nla_nest_end(skb, mp_attr);
 
+       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
        mfcs.mfcs_packets = c->mfc_un.res.pkt;
        mfcs.mfcs_bytes = c->mfc_un.res.bytes;
        mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
        if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-           nla_put_u64_64bit(skb, RTA_EXPIRES,
-                             jiffies_to_clock_t(c->mfc_un.res.lastuse),
+           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
                              RTA_PAD))
                return -EMSGSIZE;
 
index f993545..7c00ce9 100644 (file)
@@ -156,7 +156,7 @@ static struct nf_loginfo trace_loginfo = {
        .u = {
                .log = {
                        .level = 4,
-                       .logflags = NF_LOG_MASK,
+                       .logflags = NF_LOG_DEFAULT_MASK,
                },
        },
 };
index 870aebd..713c09a 100644 (file)
@@ -110,7 +110,7 @@ static unsigned int ipv4_helper(void *priv,
        if (!help)
                return NF_ACCEPT;
 
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        helper = rcu_dereference(help->helper);
        if (!helper)
                return NF_ACCEPT;
index 4b5904b..d075b3c 100644 (file)
@@ -149,7 +149,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
                return -NF_ACCEPT;
        }
 
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
 
        /* Ordinarily, we'd expect the inverted tupleproto, but it's
index 8945c26..b24795e 100644 (file)
@@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = {
        .u = {
                .log = {
                        .level    = LOGLEVEL_NOTICE,
-                       .logflags = NF_LOG_MASK,
+                       .logflags = NF_LOG_DEFAULT_MASK,
                },
        },
 };
index 20f2255..8566489 100644 (file)
@@ -29,7 +29,7 @@ static struct nf_loginfo default_loginfo = {
        .u = {
                .log = {
                        .level    = LOGLEVEL_NOTICE,
-                       .logflags = NF_LOG_MASK,
+                       .logflags = NF_LOG_DEFAULT_MASK,
                },
        },
 };
@@ -46,7 +46,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
        if (info->type == NF_LOG_TYPE_LOG)
                logflags = info->u.log.logflags;
        else
-               logflags = NF_LOG_MASK;
+               logflags = NF_LOG_DEFAULT_MASK;
 
        ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
        if (ih == NULL) {
@@ -76,7 +76,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
        if (ntohs(ih->frag_off) & IP_OFFSET)
                nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
 
-       if ((logflags & XT_LOG_IPOPT) &&
+       if ((logflags & NF_LOG_IPOPT) &&
            ih->ihl * 4 > sizeof(struct iphdr)) {
                const unsigned char *op;
                unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
@@ -250,7 +250,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
        }
 
        /* Max length: 15 "UID=4294967295 " */
-       if ((logflags & XT_LOG_UID) && !iphoff)
+       if ((logflags & NF_LOG_UID) && !iphoff)
                nf_log_dump_sk_uid_gid(m, skb->sk);
 
        /* Max length: 16 "MARK=0xFFFFFFFF " */
@@ -282,7 +282,7 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m,
        if (info->type == NF_LOG_TYPE_LOG)
                logflags = info->u.log.logflags;
 
-       if (!(logflags & XT_LOG_MACDECODE))
+       if (!(logflags & NF_LOG_MACDECODE))
                goto fallback;
 
        switch (dev->type) {
index 9414923..edf0500 100644 (file)
@@ -88,8 +88,8 @@ gre_manip_pkt(struct sk_buff *skb,
              const struct nf_conntrack_tuple *tuple,
              enum nf_nat_manip_type maniptype)
 {
-       const struct gre_hdr *greh;
-       struct gre_hdr_pptp *pgreh;
+       const struct gre_base_hdr *greh;
+       struct pptp_gre_header *pgreh;
 
        /* pgreh includes two optional 32bit fields which are not required
         * to be there.  That's where the magic '8' comes from */
@@ -97,18 +97,19 @@ gre_manip_pkt(struct sk_buff *skb,
                return false;
 
        greh = (void *)skb->data + hdroff;
-       pgreh = (struct gre_hdr_pptp *)greh;
+       pgreh = (struct pptp_gre_header *)greh;
 
        /* we only have destination manip of a packet, since 'source key'
         * is not present in the packet itself */
        if (maniptype != NF_NAT_MANIP_DST)
                return true;
-       switch (greh->version) {
-       case GRE_VERSION_1701:
+
+       switch (greh->flags & GRE_VERSION) {
+       case GRE_VERSION_0:
                /* We do not currently NAT any GREv0 packets.
                 * Try to behave like "nf_nat_proto_unknown" */
                break;
-       case GRE_VERSION_PPTP:
+       case GRE_VERSION_1:
                pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
                pgreh->call_id = tuple->dst.u.gre.key;
                break;
index cd84d42..805c8dd 100644 (file)
@@ -21,7 +21,7 @@ nft_do_chain_arp(void *priv,
 {
        struct nft_pktinfo pkt;
 
-       nft_set_pktinfo(&pkt, skb, state);
+       nft_set_pktinfo_unspec(&pkt, skb, state);
 
        return nft_do_chain(&pkt, priv);
 }
@@ -80,7 +80,10 @@ static int __init nf_tables_arp_init(void)
 {
        int ret;
 
-       nft_register_chain_type(&filter_arp);
+       ret = nft_register_chain_type(&filter_arp);
+       if (ret < 0)
+               return ret;
+
        ret = register_pernet_subsys(&nf_tables_arp_net_ops);
        if (ret < 0)
                nft_unregister_chain_type(&filter_arp);
index e44ba3b..2840a29 100644 (file)
@@ -103,7 +103,10 @@ static int __init nf_tables_ipv4_init(void)
 {
        int ret;
 
-       nft_register_chain_type(&filter_ipv4);
+       ret = nft_register_chain_type(&filter_ipv4);
+       if (ret < 0)
+               return ret;
+
        ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
        if (ret < 0)
                nft_unregister_chain_type(&filter_ipv4);
index 2375b0a..30493be 100644 (file)
@@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv,
        __be32 saddr, daddr;
        u_int8_t tos;
        const struct iphdr *iph;
+       int err;
 
        /* root is playing with raw sockets. */
        if (skb->len < sizeof(struct iphdr) ||
@@ -46,15 +47,17 @@ static unsigned int nf_route_table_hook(void *priv,
        tos = iph->tos;
 
        ret = nft_do_chain(&pkt, priv);
-       if (ret != NF_DROP && ret != NF_QUEUE) {
+       if (ret != NF_DROP && ret != NF_STOLEN) {
                iph = ip_hdr(skb);
 
                if (iph->saddr != saddr ||
                    iph->daddr != daddr ||
                    skb->mark != mark ||
-                   iph->tos != tos)
-                       if (ip_route_me_harder(state->net, skb, RTN_UNSPEC))
-                               ret = NF_DROP;
+                   iph->tos != tos) {
+                       err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+                       if (err < 0)
+                               ret = NF_DROP_ERR(err);
+               }
        }
        return ret;
 }
index b52496f..654a9af 100644 (file)
@@ -476,12 +476,18 @@ u32 ip_idents_reserve(u32 hash, int segs)
        atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
        u32 old = ACCESS_ONCE(*p_tstamp);
        u32 now = (u32)jiffies;
-       u32 delta = 0;
+       u32 new, delta = 0;
 
        if (old != now && cmpxchg(p_tstamp, old, now) == old)
                delta = prandom_u32_max(now - old);
 
-       return atomic_add_return(segs + delta, p_id) - segs;
+       /* Do not use atomic_add_return() as it makes UBSAN unhappy */
+       do {
+               old = (u32)atomic_read(p_id);
+               new = old + delta + segs;
+       } while (atomic_cmpxchg(p_id, old, new) != old);
+
+       return new - segs;
 }
 EXPORT_SYMBOL(ip_idents_reserve);
 
index 980a83e..8c6ad2d 100644 (file)
@@ -5950,7 +5950,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                 * so release it.
                 */
                if (req) {
-                       tp->total_retrans = req->num_retrans;
+                       inet_csk(sk)->icsk_retransmits = 0;
                        reqsk_fastopen_remove(sk, req, false);
                } else {
                        /* Make sure socket is routed, for correct metrics. */
@@ -6326,6 +6326,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
        tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
        tcp_openreq_init(req, &tmp_opt, skb, sk);
+       inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
 
        /* Note: tcp_v6_init_req() might override ir_iif for link locals */
        inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
index 04b9893..7ac37c3 100644 (file)
@@ -1196,7 +1196,6 @@ static void tcp_v4_init_req(struct request_sock *req,
 
        sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
        sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
-       ireq->no_srccheck = inet_sk(sk_listener)->transparent;
        ireq->opt = tcp_v4_save_options(skb);
 }
 
index 478dfc5..7c77708 100644 (file)
@@ -2643,7 +2643,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
         * copying overhead: fragmentation, tunneling, mangling etc.
         */
        if (atomic_read(&sk->sk_wmem_alloc) >
-           min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
+           min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
+                 sk->sk_sndbuf))
                return -EAGAIN;
 
        if (skb_still_in_host_queue(sk, skb))
@@ -2872,7 +2873,7 @@ begin_fwd:
                if (tcp_retransmit_skb(sk, skb, segs))
                        return;
 
-               NET_INC_STATS(sock_net(sk), mib_idx);
+               NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
 
                if (tcp_in_cwnd_reduction(sk))
                        tp->prr_out += tcp_skb_pcount(skb);
@@ -3609,6 +3610,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
        if (!res) {
                __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
                __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+               if (unlikely(tcp_passive_fastopen(sk)))
+                       tcp_sk(sk)->total_retrans++;
        }
        return res;
 }
index d84930b..f712b41 100644 (file)
@@ -384,6 +384,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
         */
        inet_rtx_syn_ack(sk, req);
        req->num_timeout++;
+       icsk->icsk_retransmits++;
        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
                          TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
 }
index 397e1ed..4ce74f8 100644 (file)
@@ -1239,7 +1239,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
                parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
 
        if (data[IFLA_GRE_FLOWINFO])
-               parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+               parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]);
 
        if (data[IFLA_GRE_FLAGS])
                parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
index cc7e058..8a02ca8 100644 (file)
@@ -321,11 +321,9 @@ static int vti6_rcv(struct sk_buff *skb)
                        goto discard;
                }
 
-               XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
-
                rcu_read_unlock();
 
-               return xfrm6_rcv(skb);
+               return xfrm6_rcv_tnl(skb, t);
        }
        rcu_read_unlock();
        return -EINVAL;
@@ -340,6 +338,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
        struct net_device *dev;
        struct pcpu_sw_netstats *tstats;
        struct xfrm_state *x;
+       struct xfrm_mode *inner_mode;
        struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
        u32 orig_mark = skb->mark;
        int ret;
@@ -357,7 +356,19 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
        }
 
        x = xfrm_input_state(skb);
-       family = x->inner_mode->afinfo->family;
+
+       inner_mode = x->inner_mode;
+
+       if (x->sel.family == AF_UNSPEC) {
+               inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+               if (inner_mode == NULL) {
+                       XFRM_INC_STATS(dev_net(skb->dev),
+                                      LINUX_MIB_XFRMINSTATEMODEERROR);
+                       return -EINVAL;
+               }
+       }
+
+       family = inner_mode->afinfo->family;
 
        skb->mark = be32_to_cpu(t->parms.i_key);
        ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
index 6122f9c..fccb5dd 100644 (file)
@@ -2239,6 +2239,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
        struct rta_mfc_stats mfcs;
        struct nlattr *mp_attr;
        struct rtnexthop *nhp;
+       unsigned long lastuse;
        int ct;
 
        /* If cache is unresolved, don't try to parse IIF and OIF */
@@ -2269,12 +2270,14 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 
        nla_nest_end(skb, mp_attr);
 
+       lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+       lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
        mfcs.mfcs_packets = c->mfc_un.res.pkt;
        mfcs.mfcs_bytes = c->mfc_un.res.bytes;
        mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
        if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-           nla_put_u64_64bit(skb, RTA_EXPIRES,
-                             jiffies_to_clock_t(c->mfc_un.res.lastuse),
+           nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
                              RTA_PAD))
                return -EMSGSIZE;
 
index 552fac2..55aacea 100644 (file)
@@ -190,7 +190,7 @@ static struct nf_loginfo trace_loginfo = {
        .u = {
                .log = {
                        .level = LOGLEVEL_WARNING,
-                       .logflags = NF_LOG_MASK,
+                       .logflags = NF_LOG_DEFAULT_MASK,
                },
        },
 };
index 1aa5848..963ee38 100644 (file)
@@ -115,7 +115,7 @@ static unsigned int ipv6_helper(void *priv,
        help = nfct_help(ct);
        if (!help)
                return NF_ACCEPT;
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        helper = rcu_dereference(help->helper);
        if (!helper)
                return NF_ACCEPT;
index 660bc10..f5a61bc 100644 (file)
@@ -165,7 +165,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
                return -NF_ACCEPT;
        }
 
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
 
        /* Ordinarily, we'd expect the inverted tupleproto, but it's
index c1bcf69..57d8606 100644 (file)
@@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = {
        .u = {
                .log = {
                        .level    = LOGLEVEL_NOTICE,
-                       .logflags = NF_LOG_MASK,
+                       .logflags = NF_LOG_DEFAULT_MASK,
                },
        },
 };
@@ -52,7 +52,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
        if (info->type == NF_LOG_TYPE_LOG)
                logflags = info->u.log.logflags;
        else
-               logflags = NF_LOG_MASK;
+               logflags = NF_LOG_DEFAULT_MASK;
 
        ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
        if (ih == NULL) {
@@ -84,7 +84,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
                }
 
                /* Max length: 48 "OPT (...) " */
-               if (logflags & XT_LOG_IPOPT)
+               if (logflags & NF_LOG_IPOPT)
                        nf_log_buf_add(m, "OPT ( ");
 
                switch (currenthdr) {
@@ -121,7 +121,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
                case IPPROTO_ROUTING:
                case IPPROTO_HOPOPTS:
                        if (fragment) {
-                               if (logflags & XT_LOG_IPOPT)
+                               if (logflags & NF_LOG_IPOPT)
                                        nf_log_buf_add(m, ")");
                                return;
                        }
@@ -129,7 +129,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
                        break;
                /* Max Length */
                case IPPROTO_AH:
-                       if (logflags & XT_LOG_IPOPT) {
+                       if (logflags & NF_LOG_IPOPT) {
                                struct ip_auth_hdr _ahdr;
                                const struct ip_auth_hdr *ah;
 
@@ -161,7 +161,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
                        hdrlen = (hp->hdrlen+2)<<2;
                        break;
                case IPPROTO_ESP:
-                       if (logflags & XT_LOG_IPOPT) {
+                       if (logflags & NF_LOG_IPOPT) {
                                struct ip_esp_hdr _esph;
                                const struct ip_esp_hdr *eh;
 
@@ -194,7 +194,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
                        nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
                        return;
                }
-               if (logflags & XT_LOG_IPOPT)
+               if (logflags & NF_LOG_IPOPT)
                        nf_log_buf_add(m, ") ");
 
                currenthdr = hp->nexthdr;
@@ -277,7 +277,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
        }
 
        /* Max length: 15 "UID=4294967295 " */
-       if ((logflags & XT_LOG_UID) && recurse)
+       if ((logflags & NF_LOG_UID) && recurse)
                nf_log_dump_sk_uid_gid(m, skb->sk);
 
        /* Max length: 16 "MARK=0xFFFFFFFF " */
@@ -295,7 +295,7 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m,
        if (info->type == NF_LOG_TYPE_LOG)
                logflags = info->u.log.logflags;
 
-       if (!(logflags & XT_LOG_MACDECODE))
+       if (!(logflags & NF_LOG_MACDECODE))
                goto fallback;
 
        switch (dev->type) {
index 30b22f4..d6e4ba5 100644 (file)
@@ -22,9 +22,7 @@ static unsigned int nft_do_chain_ipv6(void *priv,
 {
        struct nft_pktinfo pkt;
 
-       /* malformed packet, drop it */
-       if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
-               return NF_DROP;
+       nft_set_pktinfo_ipv6(&pkt, skb, state);
 
        return nft_do_chain(&pkt, priv);
 }
@@ -102,7 +100,10 @@ static int __init nf_tables_ipv6_init(void)
 {
        int ret;
 
-       nft_register_chain_type(&filter_ipv6);
+       ret = nft_register_chain_type(&filter_ipv6);
+       if (ret < 0)
+               return ret;
+
        ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
        if (ret < 0)
                nft_unregister_chain_type(&filter_ipv6);
index 71d995f..f272747 100644 (file)
@@ -31,10 +31,9 @@ static unsigned int nf_route_table_hook(void *priv,
        struct in6_addr saddr, daddr;
        u_int8_t hop_limit;
        u32 mark, flowlabel;
+       int err;
 
-       /* malformed packet, drop it */
-       if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
-               return NF_DROP;
+       nft_set_pktinfo_ipv6(&pkt, skb, state);
 
        /* save source/dest address, mark, hoplimit, flowlabel, priority */
        memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
@@ -46,13 +45,16 @@ static unsigned int nf_route_table_hook(void *priv,
        flowlabel = *((u32 *)ipv6_hdr(skb));
 
        ret = nft_do_chain(&pkt, priv);
-       if (ret != NF_DROP && ret != NF_QUEUE &&
+       if (ret != NF_DROP && ret != NF_STOLEN &&
            (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
             memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
             skb->mark != mark ||
             ipv6_hdr(skb)->hop_limit != hop_limit ||
-            flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
-               return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP;
+            flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
+               err = ip6_route_me_harder(state->net, skb);
+               if (err < 0)
+                       ret = NF_DROP_ERR(err);
+       }
 
        return ret;
 }
index 4dab585..5a5aeb9 100644 (file)
@@ -1992,9 +1992,18 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
                        if (!(gwa_type & IPV6_ADDR_UNICAST))
                                goto out;
 
-                       if (cfg->fc_table)
+                       if (cfg->fc_table) {
                                grt = ip6_nh_lookup_table(net, cfg, gw_addr);
 
+                               if (grt) {
+                                       if (grt->rt6i_flags & RTF_GATEWAY ||
+                                           (dev && dev != grt->dst.dev)) {
+                                               ip6_rt_put(grt);
+                                               grt = NULL;
+                                       }
+                               }
+                       }
+
                        if (!grt)
                                grt = rt6_lookup(net, gw_addr, NULL,
                                                 cfg->fc_ifindex, 1);
index 00a2d40..b578956 100644 (file)
@@ -21,9 +21,10 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
        return xfrm6_extract_header(skb);
 }
 
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+                 struct ip6_tnl *t)
 {
-       XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+       XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
        XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
        XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
        return xfrm_input(skb, nexthdr, spi, 0);
@@ -49,13 +50,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
        return -1;
 }
 
-int xfrm6_rcv(struct sk_buff *skb)
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
 {
        return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
-                            0);
+                            0, t);
 }
-EXPORT_SYMBOL(xfrm6_rcv);
+EXPORT_SYMBOL(xfrm6_rcv_tnl);
 
+int xfrm6_rcv(struct sk_buff *skb)
+{
+       return xfrm6_rcv_tnl(skb, NULL);
+}
+EXPORT_SYMBOL(xfrm6_rcv);
 int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
                     xfrm_address_t *saddr, u8 proto)
 {
index 5743044..e1c0bbe 100644 (file)
@@ -236,7 +236,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
        __be32 spi;
 
        spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
-       return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi);
+       return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
 }
 
 static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
index db63969..391c3cb 100644 (file)
@@ -832,7 +832,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
        struct sock *sk = sock->sk;
        struct irda_sock *new, *self = irda_sk(sk);
        struct sock *newsk;
-       struct sk_buff *skb;
+       struct sk_buff *skb = NULL;
        int err;
 
        err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
@@ -897,7 +897,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
        err = -EPERM; /* value does not seem to make sense. -arnd */
        if (!new->tsap) {
                pr_debug("%s(), dup failed!\n", __func__);
-               kfree_skb(skb);
                goto out;
        }
 
@@ -916,7 +915,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
        /* Clean up the original one to keep it in listen state */
        irttp_listen(self->tsap);
 
-       kfree_skb(skb);
        sk->sk_ack_backlog--;
 
        newsock->state = SS_CONNECTED;
@@ -924,6 +922,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
        irda_connect_response(new);
        err = 0;
 out:
+       kfree_skb(skb);
        release_sock(sk);
        return err;
 }
index a5d69df..f6749dc 100644 (file)
@@ -261,10 +261,16 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
                .timeout = timeout,
                .ssn = start_seq_num,
        };
-
        int i, ret = -EOPNOTSUPP;
        u16 status = WLAN_STATUS_REQUEST_DECLINED;
 
+       if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
+               ht_dbg(sta->sdata,
+                      "STA %pM requests BA session on unsupported tid %d\n",
+                      sta->sta.addr, tid);
+               goto end_no_lock;
+       }
+
        if (!sta->sta.ht_cap.ht_supported) {
                ht_dbg(sta->sdata,
                       "STA %pM erroneously requests BA session on tid %d w/o QoS\n",
index 5650c46..45319cc 100644 (file)
@@ -584,6 +584,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
            ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW))
                return -EINVAL;
 
+       if (WARN_ON(tid >= IEEE80211_FIRST_TSPEC_TSID))
+               return -EINVAL;
+
        ht_dbg(sdata, "Open BA session requested for %pM tid %u\n",
               pubsta->addr, tid);
 
index fa7d37c..b747c96 100644 (file)
@@ -757,6 +757,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
                sta = next_hop_deref_protected(mpath);
                if (mpath->flags & MESH_PATH_ACTIVE &&
                    ether_addr_equal(ta, sta->sta.addr) &&
+                   !(mpath->flags & MESH_PATH_FIXED) &&
                    (!(mpath->flags & MESH_PATH_SN_VALID) ||
                    SN_GT(target_sn, mpath->sn)  || target_sn == 0)) {
                        mpath->flags &= ~MESH_PATH_ACTIVE;
@@ -1023,7 +1024,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
                goto enddiscovery;
 
        spin_lock_bh(&mpath->state_lock);
-       if (mpath->flags & MESH_PATH_DELETED) {
+       if (mpath->flags & (MESH_PATH_DELETED | MESH_PATH_FIXED)) {
                spin_unlock_bh(&mpath->state_lock);
                goto enddiscovery;
        }
index 6db2ddf..f0e6175 100644 (file)
@@ -826,7 +826,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop)
        mpath->metric = 0;
        mpath->hop_count = 0;
        mpath->exp_time = 0;
-       mpath->flags |= MESH_PATH_FIXED;
+       mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID;
        mesh_path_activate(mpath);
        spin_unlock_bh(&mpath->state_lock);
        mesh_path_tx_pending(mpath);
index c803e2c..011880d 100644 (file)
@@ -1609,7 +1609,6 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
 
                sta_info_recalc_tim(sta);
        } else {
-               unsigned long tids = sta->txq_buffered_tids & driver_release_tids;
                int tid;
 
                /*
@@ -1641,7 +1640,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
                for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
                        struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
 
-                       if (!(tids & BIT(tid)) || txqi->tin.backlog_packets)
+                       if (!(driver_release_tids & BIT(tid)) ||
+                           txqi->tin.backlog_packets)
                                continue;
 
                        sta_info_recalc_tim(sta);
index 61d302d..1ff08be 100644 (file)
@@ -796,6 +796,36 @@ static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid)
        return ret;
 }
 
+static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
+                                         struct ieee80211_vif *vif,
+                                         struct ieee80211_sta *pubsta,
+                                         struct sk_buff *skb)
+{
+       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+       struct ieee80211_txq *txq = NULL;
+
+       if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
+           (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
+               return NULL;
+
+       if (!ieee80211_is_data(hdr->frame_control))
+               return NULL;
+
+       if (pubsta) {
+               u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+
+               txq = pubsta->txq[tid];
+       } else if (vif) {
+               txq = vif->txq;
+       }
+
+       if (!txq)
+               return NULL;
+
+       return to_txq_info(txq);
+}
+
 static ieee80211_tx_result debug_noinline
 ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 {
@@ -853,7 +883,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
        tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
        tx->sta->tx_stats.msdu[tid]++;
 
-       if (!tx->sta->sta.txq[0])
+       if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta,
+                              tx->skb))
                hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
 
        return TX_CONTINUE;
@@ -1243,36 +1274,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
        return TX_CONTINUE;
 }
 
-static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
-                                         struct ieee80211_vif *vif,
-                                         struct ieee80211_sta *pubsta,
-                                         struct sk_buff *skb)
-{
-       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-       struct ieee80211_txq *txq = NULL;
-
-       if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
-           (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
-               return NULL;
-
-       if (!ieee80211_is_data(hdr->frame_control))
-               return NULL;
-
-       if (pubsta) {
-               u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
-
-               txq = pubsta->txq[tid];
-       } else if (vif) {
-               txq = vif->txq;
-       }
-
-       if (!txq)
-               return NULL;
-
-       return to_txq_info(txq);
-}
-
 static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
 {
        IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
@@ -1514,8 +1515,12 @@ out:
        spin_unlock_bh(&fq->lock);
 
        if (skb && skb_has_frag_list(skb) &&
-           !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
-               skb_linearize(skb);
+           !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
+               if (skb_linearize(skb)) {
+                       ieee80211_free_txskb(&local->hw, skb);
+                       return NULL;
+               }
+       }
 
        return skb;
 }
@@ -3236,7 +3241,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
 
        if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
                *ieee80211_get_qos_ctl(hdr) = tid;
-               if (!sta->sta.txq[0])
+               if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb))
                        hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
        } else {
                info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
index 0c85811..c23c3c8 100644 (file)
@@ -71,8 +71,9 @@ obj-$(CONFIG_NF_DUP_NETDEV)   += nf_dup_netdev.o
 
 # nf_tables
 nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o
 nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+nf_tables-objs += nft_lookup.o nft_dynset.o
 
 obj-$(CONFIG_NF_TABLES)                += nf_tables.o
 obj-$(CONFIG_NF_TABLES_INET)   += nf_tables_inet.o
index 2c5327e..fa6715d 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/rcupdate.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
@@ -61,33 +62,55 @@ EXPORT_SYMBOL(nf_hooks_needed);
 #endif
 
 static DEFINE_MUTEX(nf_hook_mutex);
+#define nf_entry_dereference(e) \
+       rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
-static struct list_head *nf_find_hook_list(struct net *net,
-                                          const struct nf_hook_ops *reg)
+static struct nf_hook_entry *nf_hook_entry_head(struct net *net,
+                                               const struct nf_hook_ops *reg)
 {
-       struct list_head *hook_list = NULL;
+       struct nf_hook_entry *hook_head = NULL;
 
        if (reg->pf != NFPROTO_NETDEV)
-               hook_list = &net->nf.hooks[reg->pf][reg->hooknum];
+               hook_head = nf_entry_dereference(net->nf.hooks[reg->pf]
+                                                [reg->hooknum]);
        else if (reg->hooknum == NF_NETDEV_INGRESS) {
 #ifdef CONFIG_NETFILTER_INGRESS
                if (reg->dev && dev_net(reg->dev) == net)
-                       hook_list = &reg->dev->nf_hooks_ingress;
+                       hook_head =
+                               nf_entry_dereference(
+                                       reg->dev->nf_hooks_ingress);
 #endif
        }
-       return hook_list;
+       return hook_head;
 }
 
-struct nf_hook_entry {
-       const struct nf_hook_ops        *orig_ops;
-       struct nf_hook_ops              ops;
-};
+/* must hold nf_hook_mutex */
+static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg,
+                             struct nf_hook_entry *entry)
+{
+       switch (reg->pf) {
+       case NFPROTO_NETDEV:
+               /* We already checked in nf_register_net_hook() that this is
+                * used from ingress.
+                */
+               rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry);
+               break;
+       default:
+               rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum],
+                                  entry);
+               break;
+       }
+}
 
 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-       struct list_head *hook_list;
+       struct nf_hook_entry *hooks_entry;
        struct nf_hook_entry *entry;
-       struct nf_hook_ops *elem;
+
+       if (reg->pf == NFPROTO_NETDEV &&
+           (reg->hooknum != NF_NETDEV_INGRESS ||
+            !reg->dev || dev_net(reg->dev) != net))
+               return -EINVAL;
 
        entry = kmalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
@@ -95,19 +118,30 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 
        entry->orig_ops = reg;
        entry->ops      = *reg;
+       entry->next     = NULL;
+
+       mutex_lock(&nf_hook_mutex);
+       hooks_entry = nf_hook_entry_head(net, reg);
 
-       hook_list = nf_find_hook_list(net, reg);
-       if (!hook_list) {
-               kfree(entry);
-               return -ENOENT;
+       if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) {
+               /* This is the case where we need to insert at the head */
+               entry->next = hooks_entry;
+               hooks_entry = NULL;
        }
 
-       mutex_lock(&nf_hook_mutex);
-       list_for_each_entry(elem, hook_list, list) {
-               if (reg->priority < elem->priority)
-                       break;
+       while (hooks_entry &&
+               reg->priority >= hooks_entry->orig_ops->priority &&
+               nf_entry_dereference(hooks_entry->next)) {
+               hooks_entry = nf_entry_dereference(hooks_entry->next);
+       }
+
+       if (hooks_entry) {
+               entry->next = nf_entry_dereference(hooks_entry->next);
+               rcu_assign_pointer(hooks_entry->next, entry);
+       } else {
+               nf_set_hooks_head(net, reg, entry);
        }
-       list_add_rcu(&entry->ops.list, elem->list.prev);
+
        mutex_unlock(&nf_hook_mutex);
 #ifdef CONFIG_NETFILTER_INGRESS
        if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
@@ -122,24 +156,33 @@ EXPORT_SYMBOL(nf_register_net_hook);
 
 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-       struct list_head *hook_list;
-       struct nf_hook_entry *entry;
-       struct nf_hook_ops *elem;
-
-       hook_list = nf_find_hook_list(net, reg);
-       if (!hook_list)
-               return;
+       struct nf_hook_entry *hooks_entry;
 
        mutex_lock(&nf_hook_mutex);
-       list_for_each_entry(elem, hook_list, list) {
-               entry = container_of(elem, struct nf_hook_entry, ops);
-               if (entry->orig_ops == reg) {
-                       list_del_rcu(&entry->ops.list);
-                       break;
+       hooks_entry = nf_hook_entry_head(net, reg);
+       if (hooks_entry->orig_ops == reg) {
+               nf_set_hooks_head(net, reg,
+                                 nf_entry_dereference(hooks_entry->next));
+               goto unlock;
+       }
+       while (hooks_entry && nf_entry_dereference(hooks_entry->next)) {
+               struct nf_hook_entry *next =
+                       nf_entry_dereference(hooks_entry->next);
+               struct nf_hook_entry *nnext;
+
+               if (next->orig_ops != reg) {
+                       hooks_entry = next;
+                       continue;
                }
+               nnext = nf_entry_dereference(next->next);
+               rcu_assign_pointer(hooks_entry->next, nnext);
+               hooks_entry = next;
+               break;
        }
+
+unlock:
        mutex_unlock(&nf_hook_mutex);
-       if (&elem->list == hook_list) {
+       if (!hooks_entry) {
                WARN(1, "nf_unregister_net_hook: hook not found!\n");
                return;
        }
@@ -151,10 +194,10 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
        static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
        synchronize_net();
-       nf_queue_nf_hook_drop(net, &entry->ops);
+       nf_queue_nf_hook_drop(net, hooks_entry);
        /* other cpu might still process nfqueue verdict that used reg */
        synchronize_net();
-       kfree(entry);
+       kfree(hooks_entry);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);
 
@@ -294,10 +337,9 @@ void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
 }
 EXPORT_SYMBOL(_nf_unregister_hooks);
 
-unsigned int nf_iterate(struct list_head *head,
-                       struct sk_buff *skb,
+unsigned int nf_iterate(struct sk_buff *skb,
                        struct nf_hook_state *state,
-                       struct nf_hook_ops **elemp)
+                       struct nf_hook_entry **entryp)
 {
        unsigned int verdict;
 
@@ -305,20 +347,23 @@ unsigned int nf_iterate(struct list_head *head,
         * The caller must not block between calls to this
         * function because of risk of continuing from deleted element.
         */
-       list_for_each_entry_continue_rcu((*elemp), head, list) {
-               if (state->thresh > (*elemp)->priority)
+       while (*entryp) {
+               if (state->thresh > (*entryp)->ops.priority) {
+                       *entryp = rcu_dereference((*entryp)->next);
                        continue;
+               }
 
                /* Optimization: we don't need to hold module
                   reference here, since function can't sleep. --RR */
 repeat:
-               verdict = (*elemp)->hook((*elemp)->priv, skb, state);
+               verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
                if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
                        if (unlikely((verdict & NF_VERDICT_MASK)
                                                        > NF_MAX_VERDICT)) {
                                NFDEBUG("Evil return from %p(%u).\n",
-                                       (*elemp)->hook, state->hook);
+                                       (*entryp)->ops.hook, state->hook);
+                               *entryp = rcu_dereference((*entryp)->next);
                                continue;
                        }
 #endif
@@ -326,25 +371,23 @@ repeat:
                                return verdict;
                        goto repeat;
                }
+               *entryp = rcu_dereference((*entryp)->next);
        }
        return NF_ACCEPT;
 }
 
 
 /* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. */
+ * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
 {
-       struct nf_hook_ops *elem;
+       struct nf_hook_entry *entry;
        unsigned int verdict;
        int ret = 0;
 
-       /* We may already have this, but read-locks nest anyway */
-       rcu_read_lock();
-
-       elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
+       entry = rcu_dereference(state->hook_entries);
 next_hook:
-       verdict = nf_iterate(state->hook_list, skb, state, &elem);
+       verdict = nf_iterate(skb, state, &entry);
        if (verdict == NF_ACCEPT || verdict == NF_STOP) {
                ret = 1;
        } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
@@ -353,8 +396,10 @@ next_hook:
                if (ret == 0)
                        ret = -EPERM;
        } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-               int err = nf_queue(skb, elem, state,
-                                  verdict >> NF_VERDICT_QBITS);
+               int err;
+
+               RCU_INIT_POINTER(state->hook_entries, entry);
+               err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
                if (err < 0) {
                        if (err == -ESRCH &&
                           (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
@@ -362,7 +407,6 @@ next_hook:
                        kfree_skb(skb);
                }
        }
-       rcu_read_unlock();
        return ret;
 }
 EXPORT_SYMBOL(nf_hook_slow);
@@ -482,7 +526,7 @@ static int __net_init netfilter_net_init(struct net *net)
 
        for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
                for (h = 0; h < NF_MAX_HOOKS; h++)
-                       INIT_LIST_HEAD(&net->nf.hooks[i][h]);
+                       RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
        }
 
 #ifdef CONFIG_PROC_FS
index ac1db40..ba6a1d4 100644 (file)
@@ -379,7 +379,6 @@ static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
        struct nf_conn *ct = (struct nf_conn *)nfct;
-       struct net *net = nf_ct_net(ct);
        struct nf_conntrack_l4proto *l4proto;
 
        pr_debug("destroy_conntrack(%p)\n", ct);
@@ -406,7 +405,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
 
        nf_ct_del_from_dying_or_unconfirmed_list(ct);
 
-       NF_CT_STAT_INC(net, delete);
        local_bh_enable();
 
        if (ct->master)
@@ -438,7 +436,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
 
        nf_ct_add_to_dying_list(ct);
 
-       NF_CT_STAT_INC(net, delete_list);
        local_bh_enable();
 }
 
@@ -529,11 +526,8 @@ begin:
                if (nf_ct_is_dying(ct))
                        continue;
 
-               if (nf_ct_key_equal(h, tuple, zone, net)) {
-                       NF_CT_STAT_INC_ATOMIC(net, found);
+               if (nf_ct_key_equal(h, tuple, zone, net))
                        return h;
-               }
-               NF_CT_STAT_INC_ATOMIC(net, searched);
        }
        /*
         * if the nulls value we got at the end of this lookup is
@@ -798,7 +792,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
         */
        __nf_conntrack_hash_insert(ct, hash, reply_hash);
        nf_conntrack_double_unlock(hash, reply_hash);
-       NF_CT_STAT_INC(net, insert);
        local_bh_enable();
 
        help = nfct_help(ct);
@@ -857,7 +850,6 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
                        rcu_read_unlock();
                        return 1;
                }
-               NF_CT_STAT_INC_ATOMIC(net, searched);
        }
 
        if (get_nulls_value(n) != hash) {
@@ -1116,9 +1108,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
        if (IS_ERR(ct))
                return (struct nf_conntrack_tuple_hash *)ct;
 
-       if (tmpl && nfct_synproxy(tmpl)) {
-               nfct_seqadj_ext_add(ct);
-               nfct_synproxy_ext_add(ct);
+       if (!nf_ct_add_synproxy(ct, tmpl)) {
+               nf_conntrack_free(ct);
+               return ERR_PTR(-ENOMEM);
        }
 
        timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
@@ -1177,10 +1169,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
                }
                spin_unlock(&nf_conntrack_expect_lock);
        }
-       if (!exp) {
+       if (!exp)
                __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
-               NF_CT_STAT_INC(net, new);
-       }
 
        /* Now it is inserted into the unconfirmed list, bump refcount */
        nf_conntrack_get(&ct->ct_general);
@@ -1285,7 +1275,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                skb->nfct = NULL;
        }
 
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        l3proto = __nf_ct_l3proto_find(pf);
        ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
                                   &dataoff, &protonum);
index b6934b5..e3ed200 100644 (file)
@@ -301,8 +301,6 @@ static int find_pattern(const char *data, size_t dlen,
        size_t i = plen;
 
        pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
-       if (dlen == 0)
-               return 0;
 
        if (dlen <= plen) {
                /* Short packet: try for partial? */
@@ -311,19 +309,8 @@ static int find_pattern(const char *data, size_t dlen,
                else return 0;
        }
 
-       if (strncasecmp(data, pattern, plen) != 0) {
-#if 0
-               size_t i;
-
-               pr_debug("ftp: string mismatch\n");
-               for (i = 0; i < plen; i++) {
-                       pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
-                                i, data[i], data[i],
-                                pattern[i], pattern[i]);
-               }
-#endif
+       if (strncasecmp(data, pattern, plen) != 0)
                return 0;
-       }
 
        pr_debug("Pattern matches!\n");
        /* Now we've found the constant string, try to skip
index 5c0db5c..f65d936 100644 (file)
@@ -736,7 +736,7 @@ static int callforward_do_filter(struct net *net,
        const struct nf_afinfo *afinfo;
        int ret = 0;
 
-       /* rcu_read_lock()ed by nf_hook_slow() */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        afinfo = nf_get_afinfo(family);
        if (!afinfo)
                return 0;
index b989b81..336e215 100644 (file)
@@ -189,7 +189,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
        struct nf_conntrack_helper *helper = NULL;
        struct nf_conn_help *help;
        struct net *net = nf_ct_net(ct);
-       int ret = 0;
 
        /* We already got a helper explicitly attached. The function
         * nf_conntrack_alter_reply - in case NAT is in use - asks for looking
@@ -223,15 +222,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
        if (helper == NULL) {
                if (help)
                        RCU_INIT_POINTER(help->helper, NULL);
-               goto out;
+               return 0;
        }
 
        if (help == NULL) {
                help = nf_ct_helper_ext_add(ct, helper, flags);
-               if (help == NULL) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+               if (help == NULL)
+                       return -ENOMEM;
        } else {
                /* We only allow helper re-assignment of the same sort since
                 * we cannot reallocate the helper extension area.
@@ -240,13 +237,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 
                if (tmp && tmp->help != helper->help) {
                        RCU_INIT_POINTER(help->helper, NULL);
-                       goto out;
+                       return 0;
                }
        }
 
        rcu_assign_pointer(help->helper, helper);
-out:
-       return ret;
+
+       return 0;
 }
 EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
 
@@ -349,7 +346,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
        /* Called from the helper function, this call never fails */
        help = nfct_help(ct);
 
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        helper = rcu_dereference(help->helper);
 
        nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
index c052b71..2754045 100644 (file)
@@ -1984,13 +1984,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
        nfmsg->version      = NFNETLINK_V0;
        nfmsg->res_id       = htons(cpu);
 
-       if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) ||
-           nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
-           nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) ||
+       if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
            nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
            nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) ||
-           nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) ||
-           nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) ||
            nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
            nla_put_be32(skb, CTA_STATS_INSERT_FAILED,
                                htonl(st->insert_failed)) ||
index a96451a..9a715f8 100644 (file)
@@ -192,15 +192,15 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
                             struct net *net, struct nf_conntrack_tuple *tuple)
 {
-       const struct gre_hdr_pptp *pgrehdr;
-       struct gre_hdr_pptp _pgrehdr;
+       const struct pptp_gre_header *pgrehdr;
+       struct pptp_gre_header _pgrehdr;
        __be16 srckey;
-       const struct gre_hdr *grehdr;
-       struct gre_hdr _grehdr;
+       const struct gre_base_hdr *grehdr;
+       struct gre_base_hdr _grehdr;
 
        /* first only delinearize old RFC1701 GRE header */
        grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
-       if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+       if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) {
                /* try to behave like "nf_conntrack_proto_generic" */
                tuple->src.u.all = 0;
                tuple->dst.u.all = 0;
@@ -212,8 +212,8 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
        if (!pgrehdr)
                return true;
 
-       if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
-               pr_debug("GRE_VERSION_PPTP but unknown proto\n");
+       if (grehdr->protocol != GRE_PROTO_PPP) {
+               pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol));
                return false;
        }
 
index dff0f0c..ef7063e 100644 (file)
@@ -169,7 +169,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
        s32 seqoff, ackoff;
        struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
        struct nf_ct_seqadj *this_way, *other_way;
-       int res;
+       int res = 1;
 
        this_way  = &seqadj->seq[dir];
        other_way = &seqadj->seq[!dir];
@@ -184,27 +184,31 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
        else
                seqoff = this_way->offset_before;
 
+       newseq = htonl(ntohl(tcph->seq) + seqoff);
+       inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
+       pr_debug("Adjusting sequence number from %u->%u\n",
+                ntohl(tcph->seq), ntohl(newseq));
+       tcph->seq = newseq;
+
+       if (!tcph->ack)
+               goto out;
+
        if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
                  other_way->correction_pos))
                ackoff = other_way->offset_after;
        else
                ackoff = other_way->offset_before;
 
-       newseq = htonl(ntohl(tcph->seq) + seqoff);
        newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
-       inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
        inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack,
                                 false);
-
-       pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+       pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n",
                 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
                 ntohl(newack));
-
-       tcph->seq = newseq;
        tcph->ack_seq = newack;
 
        res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+out:
        spin_unlock_bh(&ct->lock);
 
        return res;
index 7d77217..621b81c 100644 (file)
@@ -83,9 +83,10 @@ static int digits_len(const struct nf_conn *ct, const char *dptr,
 static int iswordc(const char c)
 {
        if (isalnum(c) || c == '!' || c == '"' || c == '%' ||
-           (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' ||
+           (c >= '(' && c <= '+') || c == ':' || c == '<' || c == '>' ||
            c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' ||
-           c == '{' || c == '}' || c == '~')
+           c == '{' || c == '}' || c == '~' || (c >= '-' && c <= '/') ||
+           c == '\'')
                return 1;
        return 0;
 }
@@ -329,13 +330,12 @@ static const char *sip_follow_continuation(const char *dptr, const char *limit)
 static const char *sip_skip_whitespace(const char *dptr, const char *limit)
 {
        for (; dptr < limit; dptr++) {
-               if (*dptr == ' ')
+               if (*dptr == ' ' || *dptr == '\t')
                        continue;
                if (*dptr != '\r' && *dptr != '\n')
                        break;
                dptr = sip_follow_continuation(dptr, limit);
-               if (dptr == NULL)
-                       return NULL;
+               break;
        }
        return dptr;
 }
index 3d9a316..5f446cd 100644 (file)
@@ -212,6 +212,11 @@ static int ct_seq_show(struct seq_file *s, void *v)
        if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
                return 0;
 
+       if (nf_ct_should_gc(ct)) {
+               nf_ct_kill(ct);
+               goto release;
+       }
+
        /* we only want to print DIR_ORIGINAL */
        if (NF_CT_DIRECTION(hash))
                goto release;
@@ -352,13 +357,13 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
        seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
                        "%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
                   nr_conntracks,
-                  st->searched,
+                  0,
                   st->found,
-                  st->new,
+                  0,
                   st->invalid,
                   st->ignore,
-                  st->delete,
-                  st->delete_list,
+                  0,
+                  0,
                   st->insert,
                   st->insert_failed,
                   st->drop,
index 0655225..e0adb59 100644 (file)
 
 
 /* core.c */
-unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
-                       struct nf_hook_state *state, struct nf_hook_ops **elemp);
+unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state,
+                       struct nf_hook_entry **entryp);
 
 /* nf_queue.c */
-int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
-            struct nf_hook_state *state, unsigned int queuenum);
-void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops);
+int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+            unsigned int queuenum);
+void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry);
 int __init netfilter_queue_init(void);
 
 /* nf_log.c */
index a5aa596..119fe1c 100644 (file)
@@ -77,7 +77,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
        nf_log_buf_add(m, "SPT=%u DPT=%u ",
                       ntohs(th->source), ntohs(th->dest));
        /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
-       if (logflags & XT_LOG_TCPSEQ) {
+       if (logflags & NF_LOG_TCPSEQ) {
                nf_log_buf_add(m, "SEQ=%u ACK=%u ",
                               ntohl(th->seq), ntohl(th->ack_seq));
        }
@@ -107,7 +107,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
        /* Max length: 11 "URGP=65535 " */
        nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr));
 
-       if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
+       if ((logflags & NF_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
                u_int8_t _opt[60 - sizeof(struct tcphdr)];
                const u_int8_t *op;
                unsigned int i;
index 81ae41f..bbb8f3d 100644 (file)
@@ -441,7 +441,8 @@ nf_nat_setup_info(struct nf_conn *ct,
                        ct->status |= IPS_DST_NAT;
 
                if (nfct_help(ct))
-                       nfct_seqadj_ext_add(ct);
+                       if (!nfct_seqadj_ext_add(ct))
+                               return NF_DROP;
        }
 
        if (maniptype == NF_NAT_MANIP_SRC) {
@@ -801,7 +802,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
        if (err < 0)
                return err;
 
-       return nf_nat_setup_info(ct, &range, manip);
+       return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
 }
 #else
 static int
index b19ad20..96964a0 100644 (file)
@@ -96,14 +96,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
 
-void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
+void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
 {
        const struct nf_queue_handler *qh;
 
        rcu_read_lock();
        qh = rcu_dereference(net->nf.queue_handler);
        if (qh)
-               qh->nf_hook_drop(net, ops);
+               qh->nf_hook_drop(net, entry);
        rcu_read_unlock();
 }
 
@@ -112,7 +112,6 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
  * through nf_reinject().
  */
 int nf_queue(struct sk_buff *skb,
-            struct nf_hook_ops *elem,
             struct nf_hook_state *state,
             unsigned int queuenum)
 {
@@ -141,7 +140,6 @@ int nf_queue(struct sk_buff *skb,
 
        *entry = (struct nf_queue_entry) {
                .skb    = skb,
-               .elem   = elem,
                .state  = *state,
                .size   = sizeof(*entry) + afinfo->route_key_size,
        };
@@ -165,11 +163,15 @@ err:
 
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
+       struct nf_hook_entry *hook_entry;
        struct sk_buff *skb = entry->skb;
-       struct nf_hook_ops *elem = entry->elem;
        const struct nf_afinfo *afinfo;
+       struct nf_hook_ops *elem;
        int err;
 
+       hook_entry = rcu_dereference(entry->state.hook_entries);
+       elem = &hook_entry->ops;
+
        nf_queue_entry_release_refs(entry);
 
        /* Continue traversal iff userspace said ok... */
@@ -186,8 +188,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 
        if (verdict == NF_ACCEPT) {
        next_hook:
-               verdict = nf_iterate(entry->state.hook_list,
-                                    skb, &entry->state, &elem);
+               verdict = nf_iterate(skb, &entry->state, &hook_entry);
        }
 
        switch (verdict & NF_VERDICT_MASK) {
@@ -198,7 +199,8 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
                local_bh_enable();
                break;
        case NF_QUEUE:
-               err = nf_queue(skb, elem, &entry->state,
+               RCU_INIT_POINTER(entry->state.hook_entries, hook_entry);
+               err = nf_queue(skb, &entry->state,
                               verdict >> NF_VERDICT_QBITS);
                if (err < 0) {
                        if (err == -ESRCH &&
index bd9715e..b70d3ea 100644 (file)
@@ -4409,6 +4409,31 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
        return 0;
 }
 
+/**
+ *     nft_parse_u32_check - fetch u32 attribute and check for maximum value
+ *
+ *     @attr: netlink attribute to fetch value from
+ *     @max: maximum value to be stored in dest
+ *     @dest: pointer to the variable
+ *
+ *     Parse, check and store a given u32 netlink attribute into variable.
+ *     This function returns -ERANGE if the value goes over maximum value.
+ *     Otherwise a 0 is returned and the attribute value is stored in the
+ *     destination variable.
+ */
+unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
+{
+       int val;
+
+       val = ntohl(nla_get_be32(attr));
+       if (val > max)
+               return -ERANGE;
+
+       *dest = val;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_u32_check);
+
 /**
  *     nft_parse_register - parse a register value from a netlink attribute
  *
index fb8b589..0dd5c69 100644 (file)
@@ -34,7 +34,7 @@ static struct nf_loginfo trace_loginfo = {
        .u = {
                .log = {
                        .level = LOGLEVEL_WARNING,
-                       .logflags = NF_LOG_MASK,
+                       .logflags = NF_LOG_DEFAULT_MASK,
                },
        },
 };
@@ -93,12 +93,15 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
 
        if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
                ptr = skb_network_header(skb);
-       else
+       else {
+               if (!pkt->tprot_set)
+                       return false;
                ptr = skb_network_header(skb) + pkt->xt.thoff;
+       }
 
        ptr += priv->offset;
 
-       if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
+       if (unlikely(ptr + priv->len > skb_tail_pointer(skb)))
                return false;
 
        *dest = 0;
@@ -260,8 +263,13 @@ int __init nf_tables_core_module_init(void)
        if (err < 0)
                goto err7;
 
-       return 0;
+       err = nft_range_module_init();
+       if (err < 0)
+               goto err8;
 
+       return 0;
+err8:
+       nft_dynset_module_exit();
 err7:
        nft_payload_module_exit();
 err6:
index 6b5f762..f713cc2 100644 (file)
@@ -82,7 +82,10 @@ static int __init nf_tables_inet_init(void)
 {
        int ret;
 
-       nft_register_chain_type(&filter_inet);
+       ret = nft_register_chain_type(&filter_inet);
+       if (ret < 0)
+               return ret;
+
        ret = register_pernet_subsys(&nf_tables_inet_net_ops);
        if (ret < 0)
                nft_unregister_chain_type(&filter_inet);
index 75d696f..9e2ae42 100644 (file)
 #include <net/netfilter/nf_tables_ipv4.h>
 #include <net/netfilter/nf_tables_ipv6.h>
 
-static inline void
-nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
-                           struct sk_buff *skb,
-                           const struct nf_hook_state *state)
-{
-       struct iphdr *iph, _iph;
-       u32 len, thoff;
-
-       nft_set_pktinfo(pkt, skb, state);
-
-       iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
-                                &_iph);
-       if (!iph)
-               return;
-
-       if (iph->ihl < 5 || iph->version != 4)
-               return;
-
-       len = ntohs(iph->tot_len);
-       thoff = iph->ihl * 4;
-       if (skb->len < len)
-               return;
-       else if (len < thoff)
-               return;
-
-       pkt->tprot = iph->protocol;
-       pkt->xt.thoff = thoff;
-       pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
-}
-
-static inline void
-__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-                             struct sk_buff *skb,
-                             const struct nf_hook_state *state)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-       struct ipv6hdr *ip6h, _ip6h;
-       unsigned int thoff = 0;
-       unsigned short frag_off;
-       int protohdr;
-       u32 pkt_len;
-
-       ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
-                                 &_ip6h);
-       if (!ip6h)
-               return;
-
-       if (ip6h->version != 6)
-               return;
-
-       pkt_len = ntohs(ip6h->payload_len);
-       if (pkt_len + sizeof(*ip6h) > skb->len)
-               return;
-
-       protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
-       if (protohdr < 0)
-                return;
-
-       pkt->tprot = protohdr;
-       pkt->xt.thoff = thoff;
-       pkt->xt.fragoff = frag_off;
-#endif
-}
-
-static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-                                              struct sk_buff *skb,
-                                              const struct nf_hook_state *state)
-{
-       nft_set_pktinfo(pkt, skb, state);
-       __nft_netdev_set_pktinfo_ipv6(pkt, skb, state);
-}
-
 static unsigned int
 nft_do_chain_netdev(void *priv, struct sk_buff *skb,
                    const struct nf_hook_state *state)
@@ -95,13 +23,13 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb,
 
        switch (skb->protocol) {
        case htons(ETH_P_IP):
-               nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
+               nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
                break;
        case htons(ETH_P_IPV6):
-               nft_netdev_set_pktinfo_ipv6(&pkt, skb, state);
+               nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
                break;
        default:
-               nft_set_pktinfo(&pkt, skb, state);
+               nft_set_pktinfo_unspec(&pkt, skb, state);
                break;
        }
 
@@ -221,14 +149,25 @@ static int __init nf_tables_netdev_init(void)
 {
        int ret;
 
-       nft_register_chain_type(&nft_filter_chain_netdev);
-       ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
-       if (ret < 0) {
-               nft_unregister_chain_type(&nft_filter_chain_netdev);
+       ret = nft_register_chain_type(&nft_filter_chain_netdev);
+       if (ret)
                return ret;
-       }
-       register_netdevice_notifier(&nf_tables_netdev_notifier);
+
+       ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
+       if (ret)
+               goto err1;
+
+       ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
+       if (ret)
+               goto err2;
+
        return 0;
+
+err2:
+       unregister_pernet_subsys(&nf_tables_netdev_net_ops);
+err1:
+       nft_unregister_chain_type(&nft_filter_chain_netdev);
+       return ret;
 }
 
 static void __exit nf_tables_netdev_exit(void)
index 39eb1cc..ab695f8 100644 (file)
@@ -113,20 +113,22 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
                                  const struct nft_pktinfo *pkt)
 {
        const struct sk_buff *skb = pkt->skb;
-       unsigned int len = min_t(unsigned int,
-                                pkt->xt.thoff - skb_network_offset(skb),
-                                NFT_TRACETYPE_NETWORK_HSIZE);
        int off = skb_network_offset(skb);
+       unsigned int len, nh_end;
 
+       nh_end = pkt->tprot_set ? pkt->xt.thoff : skb->len;
+       len = min_t(unsigned int, nh_end - skb_network_offset(skb),
+                   NFT_TRACETYPE_NETWORK_HSIZE);
        if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
                return -1;
 
-       len = min_t(unsigned int, skb->len - pkt->xt.thoff,
-                   NFT_TRACETYPE_TRANSPORT_HSIZE);
-
-       if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
-                             pkt->xt.thoff, len))
-               return -1;
+       if (pkt->tprot_set) {
+               len = min_t(unsigned int, skb->len - pkt->xt.thoff,
+                           NFT_TRACETYPE_TRANSPORT_HSIZE);
+               if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
+                                     pkt->xt.thoff, len))
+                       return -1;
+       }
 
        if (!skb_mac_header_was_set(skb))
                return 0;
@@ -237,7 +239,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
                break;
        case NFT_TRACETYPE_POLICY:
                if (nla_put_be32(skb, NFTA_TRACE_POLICY,
-                                info->basechain->policy))
+                                htonl(info->basechain->policy)))
                        goto nla_put_failure;
                break;
        }
index e924e95..3b79f34 100644 (file)
@@ -43,7 +43,7 @@ nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
        if (help == NULL)
                return NF_DROP;
 
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        helper = rcu_dereference(help->helper);
        if (helper == NULL)
                return NF_DROP;
index 6577db5..eb086a1 100644 (file)
@@ -442,7 +442,9 @@ __build_packet_message(struct nfnl_log_net *log,
                        if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
                                         htonl(indev->ifindex)) ||
                        /* this is the bridge group "brX" */
-                       /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
+                       /* rcu_read_lock()ed by nf_hook_thresh or
+                        * nf_log_packet.
+                        */
                            nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
                                         htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
                                goto nla_put_failure;
@@ -477,7 +479,9 @@ __build_packet_message(struct nfnl_log_net *log,
                        if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
                                         htonl(outdev->ifindex)) ||
                        /* this is the bridge group "brX" */
-                       /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
+                       /* rcu_read_lock()ed by nf_hook_thresh or
+                        * nf_log_packet.
+                        */
                            nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
                                         htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
                                goto nla_put_failure;
index f49f450..af832c5 100644 (file)
@@ -740,7 +740,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
        struct net *net = entry->state.net;
        struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 
-       /* rcu_read_lock()ed by nf_hook_slow() */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        queue = instance_lookup(q, queuenum);
        if (!queue)
                return -ESRCH;
@@ -917,12 +917,14 @@ static struct notifier_block nfqnl_dev_notifier = {
        .notifier_call  = nfqnl_rcv_dev_event,
 };
 
-static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr)
+static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr)
 {
-       return entry->elem == (struct nf_hook_ops *)ops_ptr;
+       return rcu_access_pointer(entry->state.hook_entries) ==
+               (struct nf_hook_entry *)entry_ptr;
 }
 
-static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook)
+static void nfqnl_nf_hook_drop(struct net *net,
+                              const struct nf_hook_entry *hook)
 {
        struct nfnl_queue_net *q = nfnl_queue_pernet(net);
        int i;
@@ -1522,9 +1524,16 @@ static int __init nfnetlink_queue_init(void)
                goto cleanup_netlink_notifier;
        }
 
-       register_netdevice_notifier(&nfqnl_dev_notifier);
+       status = register_netdevice_notifier(&nfqnl_dev_notifier);
+       if (status < 0) {
+               pr_err("nf_queue: failed to register netdevice notifier\n");
+               goto cleanup_netlink_subsys;
+       }
+
        return status;
 
+cleanup_netlink_subsys:
+       nfnetlink_subsys_unregister(&nfqnl_subsys);
 cleanup_netlink_notifier:
        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
        unregister_pernet_subsys(&nfnl_queue_net_ops);
index d71cc18..31c15ed 100644 (file)
@@ -52,6 +52,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
 {
        struct nft_bitwise *priv = nft_expr_priv(expr);
        struct nft_data_desc d1, d2;
+       u32 len;
        int err;
 
        if (tb[NFTA_BITWISE_SREG] == NULL ||
@@ -61,7 +62,12 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
            tb[NFTA_BITWISE_XOR] == NULL)
                return -EINVAL;
 
-       priv->len  = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+       err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len);
+       if (err < 0)
+               return err;
+
+       priv->len = len;
+
        priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
        err = nft_validate_register_load(priv->sreg, priv->len);
        if (err < 0)
index b78c28b..ee63d98 100644 (file)
@@ -99,6 +99,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
                              const struct nlattr * const tb[])
 {
        struct nft_byteorder *priv = nft_expr_priv(expr);
+       u32 size, len;
        int err;
 
        if (tb[NFTA_BYTEORDER_SREG] == NULL ||
@@ -117,7 +118,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
                return -EINVAL;
        }
 
-       priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
+       err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size);
+       if (err < 0)
+               return err;
+
+       priv->size = size;
+
        switch (priv->size) {
        case 2:
        case 4:
@@ -128,7 +134,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
        }
 
        priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]);
-       priv->len  = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+       err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len);
+       if (err < 0)
+               return err;
+
+       priv->len = len;
+
        err = nft_validate_register_load(priv->sreg, priv->len);
        if (err < 0)
                return err;
index e25b35d..2e53739 100644 (file)
@@ -84,6 +84,9 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        if (err < 0)
                return err;
 
+       if (desc.len > U8_MAX)
+               return -ERANGE;
+
        priv->op  = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
        priv->len = desc.len;
        return 0;
index 51e180f..d7b0d17 100644 (file)
@@ -128,15 +128,18 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                memcpy(dest, &count, sizeof(count));
                return;
        }
+       case NFT_CT_L3PROTOCOL:
+               *dest = nf_ct_l3num(ct);
+               return;
+       case NFT_CT_PROTOCOL:
+               *dest = nf_ct_protonum(ct);
+               return;
        default:
                break;
        }
 
        tuple = &ct->tuplehash[priv->dir].tuple;
        switch (priv->key) {
-       case NFT_CT_L3PROTOCOL:
-               *dest = nf_ct_l3num(ct);
-               return;
        case NFT_CT_SRC:
                memcpy(dest, tuple->src.u3.all,
                       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
@@ -145,9 +148,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
                memcpy(dest, tuple->dst.u3.all,
                       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
                return;
-       case NFT_CT_PROTOCOL:
-               *dest = nf_ct_protonum(ct);
-               return;
        case NFT_CT_PROTO_SRC:
                *dest = (__force __u16)tuple->src.u.all;
                return;
@@ -283,8 +283,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
 
        case NFT_CT_L3PROTOCOL:
        case NFT_CT_PROTOCOL:
-               if (tb[NFTA_CT_DIRECTION] == NULL)
-                       return -EINVAL;
+               /* For compatibility, do not report error if NFTA_CT_DIRECTION
+                * attribute is specified.
+                */
                len = sizeof(u8);
                break;
        case NFT_CT_SRC:
@@ -363,6 +364,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
        switch (priv->key) {
 #ifdef CONFIG_NF_CONNTRACK_MARK
        case NFT_CT_MARK:
+               if (tb[NFTA_CT_DIRECTION])
+                       return -EINVAL;
                len = FIELD_SIZEOF(struct nf_conn, mark);
                break;
 #endif
@@ -432,8 +435,6 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
                goto nla_put_failure;
 
        switch (priv->key) {
-       case NFT_CT_L3PROTOCOL:
-       case NFT_CT_PROTOCOL:
        case NFT_CT_SRC:
        case NFT_CT_DST:
        case NFT_CT_PROTO_SRC:
index 0af2669..e3b83c3 100644 (file)
@@ -22,6 +22,7 @@ struct nft_dynset {
        enum nft_dynset_ops             op:8;
        enum nft_registers              sreg_key:8;
        enum nft_registers              sreg_data:8;
+       bool                            invert;
        u64                             timeout;
        struct nft_expr                 *expr;
        struct nft_set_binding          binding;
@@ -82,10 +83,14 @@ static void nft_dynset_eval(const struct nft_expr *expr,
 
                if (sexpr != NULL)
                        sexpr->ops->eval(sexpr, regs, pkt);
+
+               if (priv->invert)
+                       regs->verdict.code = NFT_BREAK;
                return;
        }
 out:
-       regs->verdict.code = NFT_BREAK;
+       if (!priv->invert)
+               regs->verdict.code = NFT_BREAK;
 }
 
 static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
@@ -96,6 +101,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
        [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 },
        [NFTA_DYNSET_TIMEOUT]   = { .type = NLA_U64 },
        [NFTA_DYNSET_EXPR]      = { .type = NLA_NESTED },
+       [NFTA_DYNSET_FLAGS]     = { .type = NLA_U32 },
 };
 
 static int nft_dynset_init(const struct nft_ctx *ctx,
@@ -113,6 +119,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
            tb[NFTA_DYNSET_SREG_KEY] == NULL)
                return -EINVAL;
 
+       if (tb[NFTA_DYNSET_FLAGS]) {
+               u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
+
+               if (flags & ~NFT_DYNSET_F_INV)
+                       return -EINVAL;
+               if (flags & NFT_DYNSET_F_INV)
+                       priv->invert = true;
+       }
+
        set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
                                   genmask);
        if (IS_ERR(set)) {
@@ -220,6 +235,7 @@ static void nft_dynset_destroy(const struct nft_ctx *ctx,
 static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
        const struct nft_dynset *priv = nft_expr_priv(expr);
+       u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0;
 
        if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key))
                goto nla_put_failure;
@@ -235,6 +251,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
                goto nla_put_failure;
        if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
                goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags)))
+               goto nla_put_failure;
        return 0;
 
 nla_put_failure:
index 82c264e..a84cf3d 100644 (file)
@@ -59,7 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
                           const struct nlattr * const tb[])
 {
        struct nft_exthdr *priv = nft_expr_priv(expr);
-       u32 offset, len;
+       u32 offset, len, err;
 
        if (tb[NFTA_EXTHDR_DREG] == NULL ||
            tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -67,11 +67,13 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
            tb[NFTA_EXTHDR_LEN] == NULL)
                return -EINVAL;
 
-       offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
-       len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+       err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+       if (err < 0)
+               return err;
 
-       if (offset > U8_MAX || len > U8_MAX)
-               return -ERANGE;
+       err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+       if (err < 0)
+               return err;
 
        priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
        priv->offset = offset;
index 764251d..09473b4 100644 (file)
@@ -23,6 +23,7 @@ struct nft_hash {
        u8                      len;
        u32                     modulus;
        u32                     seed;
+       u32                     offset;
 };
 
 static void nft_hash_eval(const struct nft_expr *expr,
@@ -31,10 +32,10 @@ static void nft_hash_eval(const struct nft_expr *expr,
 {
        struct nft_hash *priv = nft_expr_priv(expr);
        const void *data = &regs->data[priv->sreg];
+       u32 h;
 
-       regs->data[priv->dreg] =
-               reciprocal_scale(jhash(data, priv->len, priv->seed),
-                                priv->modulus);
+       h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus);
+       regs->data[priv->dreg] = h + priv->offset;
 }
 
 static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
@@ -59,6 +60,9 @@ static int nft_hash_init(const struct nft_ctx *ctx,
            !tb[NFTA_HASH_MODULUS])
                return -EINVAL;
 
+       if (tb[NFTA_HASH_OFFSET])
+               priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
        priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
        priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
 
@@ -72,6 +76,9 @@ static int nft_hash_init(const struct nft_ctx *ctx,
        if (priv->modulus <= 1)
                return -ERANGE;
 
+       if (priv->offset + priv->modulus - 1 < priv->offset)
+               return -EOVERFLOW;
+
        priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
 
        return nft_validate_register_load(priv->sreg, len) &&
@@ -94,7 +101,9 @@ static int nft_hash_dump(struct sk_buff *skb,
                goto nla_put_failure;
        if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
                goto nla_put_failure;
-
+       if (priv->offset != 0)
+               if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+                       goto nla_put_failure;
        return 0;
 
 nla_put_failure:
index db3b746..d17018f 100644 (file)
@@ -53,6 +53,10 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
                            tb[NFTA_IMMEDIATE_DATA]);
        if (err < 0)
                return err;
+
+       if (desc.len > U8_MAX)
+               return -ERANGE;
+
        priv->dlen = desc.len;
 
        priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
index 24a73bb..1b01404 100644 (file)
@@ -58,8 +58,11 @@ static int nft_log_init(const struct nft_ctx *ctx,
        if (tb[NFTA_LOG_LEVEL] != NULL &&
            tb[NFTA_LOG_GROUP] != NULL)
                return -EINVAL;
-       if (tb[NFTA_LOG_GROUP] != NULL)
+       if (tb[NFTA_LOG_GROUP] != NULL) {
                li->type = NF_LOG_TYPE_ULOG;
+               if (tb[NFTA_LOG_FLAGS] != NULL)
+                       return -EINVAL;
+       }
 
        nla = tb[NFTA_LOG_PREFIX];
        if (nla != NULL) {
@@ -87,6 +90,10 @@ static int nft_log_init(const struct nft_ctx *ctx,
                if (tb[NFTA_LOG_FLAGS] != NULL) {
                        li->u.log.logflags =
                                ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
+                       if (li->u.log.logflags & ~NF_LOG_MASK) {
+                               err = -EINVAL;
+                               goto err1;
+                       }
                }
                break;
        case NF_LOG_TYPE_ULOG:
index e164325..8166b69 100644 (file)
@@ -43,7 +43,7 @@ static void nft_lookup_eval(const struct nft_expr *expr,
                return;
        }
 
-       if (found && set->flags & NFT_SET_MAP)
+       if (set->flags & NFT_SET_MAP)
                nft_data_copy(&regs->data[priv->dreg],
                              nft_set_ext_data(ext), set->dlen);
 
index 8a6bc76..6c1e024 100644 (file)
@@ -52,6 +52,8 @@ void nft_meta_get_eval(const struct nft_expr *expr,
                *dest = pkt->pf;
                break;
        case NFT_META_L4PROTO:
+               if (!pkt->tprot_set)
+                       goto err;
                *dest = pkt->tprot;
                break;
        case NFT_META_PRIORITY:
index 294745e..55bc5ab 100644 (file)
@@ -21,8 +21,9 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
 
 struct nft_ng_inc {
        enum nft_registers      dreg:8;
-       u32                     until;
+       u32                     modulus;
        atomic_t                counter;
+       u32                     offset;
 };
 
 static void nft_ng_inc_eval(const struct nft_expr *expr,
@@ -34,16 +35,17 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
 
        do {
                oval = atomic_read(&priv->counter);
-               nval = (oval + 1 < priv->until) ? oval + 1 : 0;
+               nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
        } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
 
-       memcpy(&regs->data[priv->dreg], &priv->counter, sizeof(u32));
+       regs->data[priv->dreg] = nval + priv->offset;
 }
 
 static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
        [NFTA_NG_DREG]          = { .type = NLA_U32 },
-       [NFTA_NG_UNTIL]         = { .type = NLA_U32 },
+       [NFTA_NG_MODULUS]       = { .type = NLA_U32 },
        [NFTA_NG_TYPE]          = { .type = NLA_U32 },
+       [NFTA_NG_OFFSET]        = { .type = NLA_U32 },
 };
 
 static int nft_ng_inc_init(const struct nft_ctx *ctx,
@@ -52,10 +54,16 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
 {
        struct nft_ng_inc *priv = nft_expr_priv(expr);
 
-       priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
-       if (priv->until == 0)
+       if (tb[NFTA_NG_OFFSET])
+               priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
+       priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+       if (priv->modulus == 0)
                return -ERANGE;
 
+       if (priv->offset + priv->modulus - 1 < priv->offset)
+               return -EOVERFLOW;
+
        priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
        atomic_set(&priv->counter, 0);
 
@@ -64,14 +72,16 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
 }
 
 static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
-                      u32 until, enum nft_ng_types type)
+                      u32 modulus, enum nft_ng_types type, u32 offset)
 {
        if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
                goto nla_put_failure;
-       if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until)))
+       if (nla_put_be32(skb, NFTA_NG_MODULUS, htonl(modulus)))
                goto nla_put_failure;
        if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
                goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_NG_OFFSET, htonl(offset)))
+               goto nla_put_failure;
 
        return 0;
 
@@ -83,12 +93,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
        const struct nft_ng_inc *priv = nft_expr_priv(expr);
 
-       return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL);
+       return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL,
+                          priv->offset);
 }
 
 struct nft_ng_random {
        enum nft_registers      dreg:8;
-       u32                     until;
+       u32                     modulus;
+       u32                     offset;
 };
 
 static void nft_ng_random_eval(const struct nft_expr *expr,
@@ -97,9 +109,10 @@ static void nft_ng_random_eval(const struct nft_expr *expr,
 {
        struct nft_ng_random *priv = nft_expr_priv(expr);
        struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+       u32 val;
 
-       regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state),
-                                                 priv->until);
+       val = reciprocal_scale(prandom_u32_state(state), priv->modulus);
+       regs->data[priv->dreg] = val + priv->offset;
 }
 
 static int nft_ng_random_init(const struct nft_ctx *ctx,
@@ -108,10 +121,16 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
 {
        struct nft_ng_random *priv = nft_expr_priv(expr);
 
-       priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
-       if (priv->until == 0)
+       if (tb[NFTA_NG_OFFSET])
+               priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
+       priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+       if (priv->modulus == 0)
                return -ERANGE;
 
+       if (priv->offset + priv->modulus - 1 < priv->offset)
+               return -EOVERFLOW;
+
        prandom_init_once(&nft_numgen_prandom_state);
 
        priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
@@ -124,7 +143,8 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
        const struct nft_ng_random *priv = nft_expr_priv(expr);
 
-       return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM);
+       return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM,
+                          priv->offset);
 }
 
 static struct nft_expr_type nft_ng_type;
@@ -149,8 +169,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 {
        u32 type;
 
-       if (!tb[NFTA_NG_DREG]   ||
-           !tb[NFTA_NG_UNTIL]  ||
+       if (!tb[NFTA_NG_DREG]    ||
+           !tb[NFTA_NG_MODULUS] ||
            !tb[NFTA_NG_TYPE])
                return ERR_PTR(-EINVAL);
 
index 12cd4bf..b2f8861 100644 (file)
@@ -92,6 +92,8 @@ static void nft_payload_eval(const struct nft_expr *expr,
                offset = skb_network_offset(skb);
                break;
        case NFT_PAYLOAD_TRANSPORT_HEADER:
+               if (!pkt->tprot_set)
+                       goto err;
                offset = pkt->xt.thoff;
                break;
        default:
@@ -184,6 +186,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
                offset = skb_network_offset(skb);
                break;
        case NFT_PAYLOAD_TRANSPORT_HEADER:
+               if (!pkt->tprot_set)
+                       goto err;
                offset = pkt->xt.thoff;
                break;
        default:
index 61d216e..393d359 100644 (file)
 static u32 jhash_initval __read_mostly;
 
 struct nft_queue {
-       u16     queuenum;
-       u16     queues_total;
-       u16     flags;
+       enum nft_registers      sreg_qnum:8;
+       u16                     queuenum;
+       u16                     queues_total;
+       u16                     flags;
 };
 
 static void nft_queue_eval(const struct nft_expr *expr,
@@ -54,31 +55,78 @@ static void nft_queue_eval(const struct nft_expr *expr,
        regs->verdict.code = ret;
 }
 
+static void nft_queue_sreg_eval(const struct nft_expr *expr,
+                               struct nft_regs *regs,
+                               const struct nft_pktinfo *pkt)
+{
+       struct nft_queue *priv = nft_expr_priv(expr);
+       u32 queue, ret;
+
+       queue = regs->data[priv->sreg_qnum];
+
+       ret = NF_QUEUE_NR(queue);
+       if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
+               ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+       regs->verdict.code = ret;
+}
+
 static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
        [NFTA_QUEUE_NUM]        = { .type = NLA_U16 },
        [NFTA_QUEUE_TOTAL]      = { .type = NLA_U16 },
        [NFTA_QUEUE_FLAGS]      = { .type = NLA_U16 },
+       [NFTA_QUEUE_SREG_QNUM]  = { .type = NLA_U32 },
 };
 
 static int nft_queue_init(const struct nft_ctx *ctx,
-                          const struct nft_expr *expr,
-                          const struct nlattr * const tb[])
+                         const struct nft_expr *expr,
+                         const struct nlattr * const tb[])
 {
        struct nft_queue *priv = nft_expr_priv(expr);
+       u32 maxid;
 
-       if (tb[NFTA_QUEUE_NUM] == NULL)
-               return -EINVAL;
-
-       init_hashrandom(&jhash_initval);
        priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
 
-       if (tb[NFTA_QUEUE_TOTAL] != NULL)
+       if (tb[NFTA_QUEUE_TOTAL])
                priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL]));
-       if (tb[NFTA_QUEUE_FLAGS] != NULL) {
+       else
+               priv->queues_total = 1;
+
+       if (priv->queues_total == 0)
+               return -EINVAL;
+
+       maxid = priv->queues_total - 1 + priv->queuenum;
+       if (maxid > U16_MAX)
+               return -ERANGE;
+
+       if (tb[NFTA_QUEUE_FLAGS]) {
+               priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
+               if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
+                       return -EINVAL;
+       }
+       return 0;
+}
+
+static int nft_queue_sreg_init(const struct nft_ctx *ctx,
+                              const struct nft_expr *expr,
+                              const struct nlattr * const tb[])
+{
+       struct nft_queue *priv = nft_expr_priv(expr);
+       int err;
+
+       priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]);
+       err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32));
+       if (err < 0)
+               return err;
+
+       if (tb[NFTA_QUEUE_FLAGS]) {
                priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
                if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
                        return -EINVAL;
+               if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT)
+                       return -EOPNOTSUPP;
        }
+
        return 0;
 }
 
@@ -97,6 +145,21 @@ nla_put_failure:
        return -1;
 }
 
+static int
+nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_queue *priv = nft_expr_priv(expr);
+
+       if (nft_dump_register(skb, NFTA_QUEUE_SREG_QNUM, priv->sreg_qnum) ||
+           nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags)))
+               goto nla_put_failure;
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
 static struct nft_expr_type nft_queue_type;
 static const struct nft_expr_ops nft_queue_ops = {
        .type           = &nft_queue_type,
@@ -106,9 +169,35 @@ static const struct nft_expr_ops nft_queue_ops = {
        .dump           = nft_queue_dump,
 };
 
+static const struct nft_expr_ops nft_queue_sreg_ops = {
+       .type           = &nft_queue_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_queue)),
+       .eval           = nft_queue_sreg_eval,
+       .init           = nft_queue_sreg_init,
+       .dump           = nft_queue_sreg_dump,
+};
+
+static const struct nft_expr_ops *
+nft_queue_select_ops(const struct nft_ctx *ctx,
+                    const struct nlattr * const tb[])
+{
+       if (tb[NFTA_QUEUE_NUM] && tb[NFTA_QUEUE_SREG_QNUM])
+               return ERR_PTR(-EINVAL);
+
+       init_hashrandom(&jhash_initval);
+
+       if (tb[NFTA_QUEUE_NUM])
+               return &nft_queue_ops;
+
+       if (tb[NFTA_QUEUE_SREG_QNUM])
+               return &nft_queue_sreg_ops;
+
+       return ERR_PTR(-EINVAL);
+}
+
 static struct nft_expr_type nft_queue_type __read_mostly = {
        .name           = "queue",
-       .ops            = &nft_queue_ops,
+       .select_ops     = &nft_queue_select_ops,
        .policy         = nft_queue_policy,
        .maxattr        = NFTA_QUEUE_MAX,
        .owner          = THIS_MODULE,
index 6eafbf9..c00104c 100644 (file)
@@ -21,10 +21,10 @@ struct nft_quota {
        atomic64_t      remain;
 };
 
-static inline long nft_quota(struct nft_quota *priv,
-                            const struct nft_pktinfo *pkt)
+static inline bool nft_overquota(struct nft_quota *priv,
+                                const struct nft_pktinfo *pkt)
 {
-       return atomic64_sub_return(pkt->skb->len, &priv->remain);
+       return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0;
 }
 
 static void nft_quota_eval(const struct nft_expr *expr,
@@ -33,7 +33,7 @@ static void nft_quota_eval(const struct nft_expr *expr,
 {
        struct nft_quota *priv = nft_expr_priv(expr);
 
-       if (nft_quota(priv, pkt) < 0 && !priv->invert)
+       if (nft_overquota(priv, pkt) ^ priv->invert)
                regs->verdict.code = NFT_BREAK;
 }
 
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
new file mode 100644 (file)
index 0000000..c6d5358
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_range_expr {
+       struct nft_data         data_from;
+       struct nft_data         data_to;
+       enum nft_registers      sreg:8;
+       u8                      len;
+       enum nft_range_ops      op:8;
+};
+
+static void nft_range_eval(const struct nft_expr *expr,
+                        struct nft_regs *regs,
+                        const struct nft_pktinfo *pkt)
+{
+       const struct nft_range_expr *priv = nft_expr_priv(expr);
+       bool mismatch;
+       int d1, d2;
+
+       d1 = memcmp(&regs->data[priv->sreg], &priv->data_from, priv->len);
+       d2 = memcmp(&regs->data[priv->sreg], &priv->data_to, priv->len);
+       switch (priv->op) {
+       case NFT_RANGE_EQ:
+               mismatch = (d1 < 0 || d2 > 0);
+               break;
+       case NFT_RANGE_NEQ:
+               mismatch = (d1 >= 0 && d2 <= 0);
+               break;
+       }
+
+       if (mismatch)
+               regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = {
+       [NFTA_RANGE_SREG]               = { .type = NLA_U32 },
+       [NFTA_RANGE_OP]                 = { .type = NLA_U32 },
+       [NFTA_RANGE_FROM_DATA]          = { .type = NLA_NESTED },
+       [NFTA_RANGE_TO_DATA]            = { .type = NLA_NESTED },
+};
+
+static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                       const struct nlattr * const tb[])
+{
+       struct nft_range_expr *priv = nft_expr_priv(expr);
+       struct nft_data_desc desc_from, desc_to;
+       int err;
+
+       err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
+                           &desc_from, tb[NFTA_RANGE_FROM_DATA]);
+       if (err < 0)
+               return err;
+
+       err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
+                           &desc_to, tb[NFTA_RANGE_TO_DATA]);
+       if (err < 0)
+               goto err1;
+
+       if (desc_from.len != desc_to.len) {
+               err = -EINVAL;
+               goto err2;
+       }
+
+       priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]);
+       err = nft_validate_register_load(priv->sreg, desc_from.len);
+       if (err < 0)
+               goto err2;
+
+       priv->op  = ntohl(nla_get_be32(tb[NFTA_RANGE_OP]));
+       priv->len = desc_from.len;
+       return 0;
+err2:
+       nft_data_uninit(&priv->data_to, desc_to.type);
+err1:
+       nft_data_uninit(&priv->data_from, desc_from.type);
+       return err;
+}
+
+static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_range_expr *priv = nft_expr_priv(expr);
+
+       if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op)))
+               goto nla_put_failure;
+
+       if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from,
+                         NFT_DATA_VALUE, priv->len) < 0 ||
+           nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to,
+                         NFT_DATA_VALUE, priv->len) < 0)
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_type nft_range_type;
+static const struct nft_expr_ops nft_range_ops = {
+       .type           = &nft_range_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)),
+       .eval           = nft_range_eval,
+       .init           = nft_range_init,
+       .dump           = nft_range_dump,
+};
+
+static struct nft_expr_type nft_range_type __read_mostly = {
+       .name           = "range",
+       .ops            = &nft_range_ops,
+       .policy         = nft_range_policy,
+       .maxattr        = NFTA_RANGE_MAX,
+       .owner          = THIS_MODULE,
+};
+
+int __init nft_range_module_init(void)
+{
+       return nft_register_expr(&nft_range_type);
+}
+
+void nft_range_module_exit(void)
+{
+       nft_unregister_expr(&nft_range_type);
+}
index 515131f..dbd6c4a 100644 (file)
@@ -24,7 +24,6 @@ static DEFINE_MUTEX(xt_rateest_mutex);
 #define RATEEST_HSIZE  16
 static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
 static unsigned int jhash_rnd __read_mostly;
-static bool rnd_inited __read_mostly;
 
 static unsigned int xt_rateest_hash(const char *name)
 {
@@ -99,10 +98,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
        } cfg;
        int ret;
 
-       if (unlikely(!rnd_inited)) {
-               get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
-               rnd_inited = true;
-       }
+       net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
 
        est = xt_rateest_lookup(info->name);
        if (est) {
index e118397..872db2d 100644 (file)
@@ -110,18 +110,14 @@ tcpmss_mangle_packet(struct sk_buff *skb,
        if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
                struct net *net = par->net;
                unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
+               unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu);
 
-               if (dst_mtu(skb_dst(skb)) <= minlen) {
+               if (min_mtu <= minlen) {
                        net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
-                                           dst_mtu(skb_dst(skb)));
+                                           min_mtu);
                        return -1;
                }
-               if (in_mtu <= minlen) {
-                       net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
-                                           in_mtu);
-                       return -1;
-               }
-               newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
+               newmss = min_mtu - minlen;
        } else
                newmss = info->mss;
 
index 6e57a39..0471db4 100644 (file)
@@ -89,6 +89,8 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
                return -EINVAL;
 
        if (info->oif[0]) {
+               int ret;
+
                if (info->oif[sizeof(info->oif)-1] != '\0')
                        return -EINVAL;
 
@@ -101,7 +103,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
                priv->notifier.notifier_call = tee_netdev_event;
                info->priv    = priv;
 
-               register_netdevice_notifier(&priv->notifier);
+               ret = register_netdevice_notifier(&priv->notifier);
+               if (ret) {
+                       kfree(priv);
+                       return ret;
+               }
        } else
                info->priv = NULL;
 
index 99bbc82..b6dc322 100644 (file)
@@ -366,14 +366,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
        unsigned int i;
        int ret;
 
-       if (unlikely(!connlimit_rnd)) {
-               u_int32_t rand;
+       net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd));
 
-               do {
-                       get_random_bytes(&rand, sizeof(rand));
-               } while (!rand);
-               cmpxchg(&connlimit_rnd, 0, rand);
-       }
        ret = nf_ct_l3proto_try_module_get(par->family);
        if (ret < 0) {
                pr_info("cannot load conntrack support for "
index 1786968..44a095e 100644 (file)
@@ -56,6 +56,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
 }
 
 /* need to declare this at the top */
+static const struct file_operations dl_file_ops_v1;
 static const struct file_operations dl_file_ops;
 
 /* hash table crap */
@@ -86,8 +87,8 @@ struct dsthash_ent {
        unsigned long expires;          /* precalculated expiry time */
        struct {
                unsigned long prev;     /* last modification */
-               u_int32_t credit;
-               u_int32_t credit_cap, cost;
+               u_int64_t credit;
+               u_int64_t credit_cap, cost;
        } rateinfo;
        struct rcu_head rcu;
 };
@@ -98,7 +99,7 @@ struct xt_hashlimit_htable {
        u_int8_t family;
        bool rnd_initialized;
 
-       struct hashlimit_cfg1 cfg;      /* config */
+       struct hashlimit_cfg2 cfg;      /* config */
 
        /* used internally */
        spinlock_t lock;                /* lock for list_head */
@@ -114,6 +115,30 @@ struct xt_hashlimit_htable {
        struct hlist_head hash[0];      /* hashtable itself */
 };
 
+static int
+cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+{
+       if (revision == 1) {
+               struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
+
+               to->mode = cfg->mode;
+               to->avg = cfg->avg;
+               to->burst = cfg->burst;
+               to->size = cfg->size;
+               to->max = cfg->max;
+               to->gc_interval = cfg->gc_interval;
+               to->expire = cfg->expire;
+               to->srcmask = cfg->srcmask;
+               to->dstmask = cfg->dstmask;
+       } else if (revision == 2) {
+               memcpy(to, from, sizeof(struct hashlimit_cfg2));
+       } else {
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static DEFINE_MUTEX(hashlimit_mutex);  /* protects htables list */
 static struct kmem_cache *hashlimit_cachep __read_mostly;
 
@@ -215,16 +240,18 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(struct work_struct *work);
 
-static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
-                        u_int8_t family)
+static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+                        const char *name, u_int8_t family,
+                        struct xt_hashlimit_htable **out_hinfo,
+                        int revision)
 {
        struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
        struct xt_hashlimit_htable *hinfo;
-       unsigned int size;
-       unsigned int i;
+       unsigned int size, i;
+       int ret;
 
-       if (minfo->cfg.size) {
-               size = minfo->cfg.size;
+       if (cfg->size) {
+               size = cfg->size;
        } else {
                size = (totalram_pages << PAGE_SHIFT) / 16384 /
                       sizeof(struct list_head);
@@ -238,10 +265,14 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
                        sizeof(struct list_head) * size);
        if (hinfo == NULL)
                return -ENOMEM;
-       minfo->hinfo = hinfo;
+       *out_hinfo = hinfo;
 
        /* copy match config into hashtable config */
-       memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
+       ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+
+       if (ret)
+               return ret;
+
        hinfo->cfg.size = size;
        if (hinfo->cfg.max == 0)
                hinfo->cfg.max = 8 * hinfo->cfg.size;
@@ -255,17 +286,18 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
        hinfo->count = 0;
        hinfo->family = family;
        hinfo->rnd_initialized = false;
-       hinfo->name = kstrdup(minfo->name, GFP_KERNEL);
+       hinfo->name = kstrdup(name, GFP_KERNEL);
        if (!hinfo->name) {
                vfree(hinfo);
                return -ENOMEM;
        }
        spin_lock_init(&hinfo->lock);
 
-       hinfo->pde = proc_create_data(minfo->name, 0,
+       hinfo->pde = proc_create_data(name, 0,
                (family == NFPROTO_IPV4) ?
                hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-               &dl_file_ops, hinfo);
+               (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
+               hinfo);
        if (hinfo->pde == NULL) {
                kfree(hinfo->name);
                vfree(hinfo);
@@ -398,7 +430,8 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
    (slowest userspace tool allows), which means
    CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
 */
-#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24))
 
 /* Repeated shift and or gives us all 1s, final shift and add 1 gives
  * us the power of 2 below the theoretical max, so GCC simply does a
@@ -408,9 +441,12 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
 #define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
 #define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
 #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define _POW2_BELOW64(x) (_POW2_BELOW32(x)|_POW2_BELOW32((x)>>32))
 #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+#define POW2_BELOW64(x) ((_POW2_BELOW64(x)>>1) + 1)
 
-#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+#define CREDITS_PER_JIFFY POW2_BELOW64(MAX_CPJ)
+#define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1)
 
 /* in byte mode, the lowest possible rate is one packet/second.
  * credit_cap is used as a counter that tells us how many times we can
@@ -425,14 +461,24 @@ static u32 xt_hashlimit_len_to_chunks(u32 len)
 }
 
 /* Precision saver. */
-static u32 user2credits(u32 user)
+static u64 user2credits(u64 user, int revision)
 {
-       /* If multiplying would overflow... */
-       if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
-               /* Divide first. */
-               return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+       if (revision == 1) {
+               /* If multiplying would overflow... */
+               if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1))
+                       /* Divide first. */
+                       return (user / XT_HASHLIMIT_SCALE) *\
+                                               HZ * CREDITS_PER_JIFFY_v1;
+
+               return (user * HZ * CREDITS_PER_JIFFY_v1) \
+                                               / XT_HASHLIMIT_SCALE;
+       } else {
+               if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+                       return (user / XT_HASHLIMIT_SCALE_v2) *\
+                                               HZ * CREDITS_PER_JIFFY;
 
-       return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
+               return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2;
+       }
 }
 
 static u32 user2credits_byte(u32 user)
@@ -442,10 +488,11 @@ static u32 user2credits_byte(u32 user)
        return (u32) (us >> 32);
 }
 
-static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
+static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
+                           u32 mode, int revision)
 {
        unsigned long delta = now - dh->rateinfo.prev;
-       u32 cap;
+       u64 cap, cpj;
 
        if (delta == 0)
                return;
@@ -453,7 +500,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
        dh->rateinfo.prev = now;
 
        if (mode & XT_HASHLIMIT_BYTES) {
-               u32 tmp = dh->rateinfo.credit;
+               u64 tmp = dh->rateinfo.credit;
                dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta;
                cap = CREDITS_PER_JIFFY_BYTES * HZ;
                if (tmp >= dh->rateinfo.credit) {/* overflow */
@@ -461,7 +508,9 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
                        return;
                }
        } else {
-               dh->rateinfo.credit += delta * CREDITS_PER_JIFFY;
+               cpj = (revision == 1) ?
+                       CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY;
+               dh->rateinfo.credit += delta * cpj;
                cap = dh->rateinfo.credit_cap;
        }
        if (dh->rateinfo.credit > cap)
@@ -469,7 +518,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
 }
 
 static void rateinfo_init(struct dsthash_ent *dh,
-                         struct xt_hashlimit_htable *hinfo)
+                         struct xt_hashlimit_htable *hinfo, int revision)
 {
        dh->rateinfo.prev = jiffies;
        if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
@@ -478,8 +527,8 @@ static void rateinfo_init(struct dsthash_ent *dh,
                dh->rateinfo.credit_cap = hinfo->cfg.burst;
        } else {
                dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
-                                                  hinfo->cfg.burst);
-               dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+                                                  hinfo->cfg.burst, revision);
+               dh->rateinfo.cost = user2credits(hinfo->cfg.avg, revision);
                dh->rateinfo.credit_cap = dh->rateinfo.credit;
        }
 }
@@ -603,15 +652,15 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
+                   struct xt_hashlimit_htable *hinfo,
+                   const struct hashlimit_cfg2 *cfg, int revision)
 {
-       const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
-       struct xt_hashlimit_htable *hinfo = info->hinfo;
        unsigned long now = jiffies;
        struct dsthash_ent *dh;
        struct dsthash_dst dst;
        bool race = false;
-       u32 cost;
+       u64 cost;
 
        if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
                goto hotdrop;
@@ -626,18 +675,18 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
                } else if (race) {
                        /* Already got an entry, update expiration timeout */
                        dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-                       rateinfo_recalc(dh, now, hinfo->cfg.mode);
+                       rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
                } else {
                        dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
-                       rateinfo_init(dh, hinfo);
+                       rateinfo_init(dh, hinfo, revision);
                }
        } else {
                /* update expiration timeout */
                dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-               rateinfo_recalc(dh, now, hinfo->cfg.mode);
+               rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
        }
 
-       if (info->cfg.mode & XT_HASHLIMIT_BYTES)
+       if (cfg->mode & XT_HASHLIMIT_BYTES)
                cost = hashlimit_byte_cost(skb->len, dh);
        else
                cost = dh->rateinfo.cost;
@@ -647,84 +696,157 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
                dh->rateinfo.credit -= cost;
                spin_unlock(&dh->lock);
                rcu_read_unlock_bh();
-               return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
+               return !(cfg->mode & XT_HASHLIMIT_INVERT);
        }
 
        spin_unlock(&dh->lock);
        rcu_read_unlock_bh();
        /* default match is underlimit - so over the limit, we need to invert */
-       return info->cfg.mode & XT_HASHLIMIT_INVERT;
+       return cfg->mode & XT_HASHLIMIT_INVERT;
 
  hotdrop:
        par->hotdrop = true;
        return false;
 }
 
-static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+static bool
+hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+       const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+       struct xt_hashlimit_htable *hinfo = info->hinfo;
+       struct hashlimit_cfg2 cfg = {};
+       int ret;
+
+       ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+
+       if (ret)
+               return ret;
+
+       return hashlimit_mt_common(skb, par, hinfo, &cfg, 1);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+       const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+       struct xt_hashlimit_htable *hinfo = info->hinfo;
+
+       return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+}
+
+static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
+                                    struct xt_hashlimit_htable **hinfo,
+                                    struct hashlimit_cfg2 *cfg,
+                                    const char *name, int revision)
 {
        struct net *net = par->net;
-       struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
        int ret;
 
-       if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
-               return -EINVAL;
-       if (info->name[sizeof(info->name)-1] != '\0')
+       if (cfg->gc_interval == 0 || cfg->expire == 0)
                return -EINVAL;
        if (par->family == NFPROTO_IPV4) {
-               if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
+               if (cfg->srcmask > 32 || cfg->dstmask > 32)
                        return -EINVAL;
        } else {
-               if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
+               if (cfg->srcmask > 128 || cfg->dstmask > 128)
                        return -EINVAL;
        }
 
-       if (info->cfg.mode & ~XT_HASHLIMIT_ALL) {
+       if (cfg->mode & ~XT_HASHLIMIT_ALL) {
                pr_info("Unknown mode mask %X, kernel too old?\n",
-                                               info->cfg.mode);
+                                               cfg->mode);
                return -EINVAL;
        }
 
        /* Check for overflow. */
-       if (info->cfg.mode & XT_HASHLIMIT_BYTES) {
-               if (user2credits_byte(info->cfg.avg) == 0) {
-                       pr_info("overflow, rate too high: %u\n", info->cfg.avg);
+       if (cfg->mode & XT_HASHLIMIT_BYTES) {
+               if (user2credits_byte(cfg->avg) == 0) {
+                       pr_info("overflow, rate too high: %llu\n", cfg->avg);
                        return -EINVAL;
                }
-       } else if (info->cfg.burst == 0 ||
-                   user2credits(info->cfg.avg * info->cfg.burst) <
-                   user2credits(info->cfg.avg)) {
-                       pr_info("overflow, try lower: %u/%u\n",
-                               info->cfg.avg, info->cfg.burst);
+       } else if (cfg->burst == 0 ||
+                   user2credits(cfg->avg * cfg->burst, revision) <
+                   user2credits(cfg->avg, revision)) {
+                       pr_info("overflow, try lower: %llu/%llu\n",
+                               cfg->avg, cfg->burst);
                        return -ERANGE;
        }
 
        mutex_lock(&hashlimit_mutex);
-       info->hinfo = htable_find_get(net, info->name, par->family);
-       if (info->hinfo == NULL) {
-               ret = htable_create(net, info, par->family);
+       *hinfo = htable_find_get(net, name, par->family);
+       if (*hinfo == NULL) {
+               ret = htable_create(net, cfg, name, par->family,
+                                   hinfo, revision);
                if (ret < 0) {
                        mutex_unlock(&hashlimit_mutex);
                        return ret;
                }
        }
        mutex_unlock(&hashlimit_mutex);
+
        return 0;
 }
 
-static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
+static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
+{
+       struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+       struct hashlimit_cfg2 cfg = {};
+       int ret;
+
+       if (info->name[sizeof(info->name) - 1] != '\0')
+               return -EINVAL;
+
+       ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+
+       if (ret)
+               return ret;
+
+       return hashlimit_mt_check_common(par, &info->hinfo,
+                                        &cfg, info->name, 1);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+       struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+       if (info->name[sizeof(info->name) - 1] != '\0')
+               return -EINVAL;
+
+       return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
+                                        info->name, 2);
+}
+
+static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
 {
        const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 
        htable_put(info->hinfo);
 }
 
+static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
+{
+       const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+       htable_put(info->hinfo);
+}
+
 static struct xt_match hashlimit_mt_reg[] __read_mostly = {
        {
                .name           = "hashlimit",
                .revision       = 1,
                .family         = NFPROTO_IPV4,
-               .match          = hashlimit_mt,
+               .match          = hashlimit_mt_v1,
                .matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
+               .checkentry     = hashlimit_mt_check_v1,
+               .destroy        = hashlimit_mt_destroy_v1,
+               .me             = THIS_MODULE,
+       },
+       {
+               .name           = "hashlimit",
+               .revision       = 2,
+               .family         = NFPROTO_IPV4,
+               .match          = hashlimit_mt,
+               .matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
                .checkentry     = hashlimit_mt_check,
                .destroy        = hashlimit_mt_destroy,
                .me             = THIS_MODULE,
@@ -734,8 +856,18 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
                .name           = "hashlimit",
                .revision       = 1,
                .family         = NFPROTO_IPV6,
-               .match          = hashlimit_mt,
+               .match          = hashlimit_mt_v1,
                .matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
+               .checkentry     = hashlimit_mt_check_v1,
+               .destroy        = hashlimit_mt_destroy_v1,
+               .me             = THIS_MODULE,
+       },
+       {
+               .name           = "hashlimit",
+               .revision       = 2,
+               .family         = NFPROTO_IPV6,
+               .match          = hashlimit_mt,
+               .matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
                .checkentry     = hashlimit_mt_check,
                .destroy        = hashlimit_mt_destroy,
                .me             = THIS_MODULE,
@@ -786,18 +918,12 @@ static void dl_seq_stop(struct seq_file *s, void *v)
        spin_unlock_bh(&htable->lock);
 }
 
-static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
-                                  struct seq_file *s)
+static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
+                        struct seq_file *s)
 {
-       const struct xt_hashlimit_htable *ht = s->private;
-
-       spin_lock(&ent->lock);
-       /* recalculate to show accurate numbers */
-       rateinfo_recalc(ent, jiffies, ht->cfg.mode);
-
        switch (family) {
        case NFPROTO_IPV4:
-               seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
+               seq_printf(s, "%ld %pI4:%u->%pI4:%u %llu %llu %llu\n",
                           (long)(ent->expires - jiffies)/HZ,
                           &ent->dst.ip.src,
                           ntohs(ent->dst.src_port),
@@ -808,7 +934,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
                break;
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
        case NFPROTO_IPV6:
-               seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
+               seq_printf(s, "%ld %pI6:%u->%pI6:%u %llu %llu %llu\n",
                           (long)(ent->expires - jiffies)/HZ,
                           &ent->dst.ip6.src,
                           ntohs(ent->dst.src_port),
@@ -821,10 +947,52 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
        default:
                BUG();
        }
+}
+
+static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
+                              struct seq_file *s)
+{
+       const struct xt_hashlimit_htable *ht = s->private;
+
+       spin_lock(&ent->lock);
+       /* recalculate to show accurate numbers */
+       rateinfo_recalc(ent, jiffies, ht->cfg.mode, 1);
+
+       dl_seq_print(ent, family, s);
+
+       spin_unlock(&ent->lock);
+       return seq_has_overflowed(s);
+}
+
+static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
+                           struct seq_file *s)
+{
+       const struct xt_hashlimit_htable *ht = s->private;
+
+       spin_lock(&ent->lock);
+       /* recalculate to show accurate numbers */
+       rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+       dl_seq_print(ent, family, s);
+
        spin_unlock(&ent->lock);
        return seq_has_overflowed(s);
 }
 
+static int dl_seq_show_v1(struct seq_file *s, void *v)
+{
+       struct xt_hashlimit_htable *htable = s->private;
+       unsigned int *bucket = (unsigned int *)v;
+       struct dsthash_ent *ent;
+
+       if (!hlist_empty(&htable->hash[*bucket])) {
+               hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+                       if (dl_seq_real_show_v1(ent, htable->family, s))
+                               return -1;
+       }
+       return 0;
+}
+
 static int dl_seq_show(struct seq_file *s, void *v)
 {
        struct xt_hashlimit_htable *htable = s->private;
@@ -839,6 +1007,13 @@ static int dl_seq_show(struct seq_file *s, void *v)
        return 0;
 }
 
+static const struct seq_operations dl_seq_ops_v1 = {
+       .start = dl_seq_start,
+       .next  = dl_seq_next,
+       .stop  = dl_seq_stop,
+       .show  = dl_seq_show_v1
+};
+
 static const struct seq_operations dl_seq_ops = {
        .start = dl_seq_start,
        .next  = dl_seq_next,
@@ -846,17 +1021,37 @@ static const struct seq_operations dl_seq_ops = {
        .show  = dl_seq_show
 };
 
+static int dl_proc_open_v1(struct inode *inode, struct file *file)
+{
+       int ret = seq_open(file, &dl_seq_ops_v1);
+
+       if (!ret) {
+               struct seq_file *sf = file->private_data;
+               sf->private = PDE_DATA(inode);
+       }
+       return ret;
+}
+
 static int dl_proc_open(struct inode *inode, struct file *file)
 {
        int ret = seq_open(file, &dl_seq_ops);
 
        if (!ret) {
                struct seq_file *sf = file->private_data;
+
                sf->private = PDE_DATA(inode);
        }
        return ret;
 }
 
+static const struct file_operations dl_file_ops_v1 = {
+       .owner   = THIS_MODULE,
+       .open    = dl_proc_open_v1,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release
+};
+
 static const struct file_operations dl_file_ops = {
        .owner   = THIS_MODULE,
        .open    = dl_proc_open,
index 9f4ab00..f679dd4 100644 (file)
@@ -41,7 +41,7 @@ helper_mt(const struct sk_buff *skb, struct xt_action_param *par)
        if (!master_help)
                return ret;
 
-       /* rcu_read_lock()ed by nf_hook_slow */
+       /* rcu_read_lock()ed by nf_hook_thresh */
        helper = rcu_dereference(master_help->helper);
        if (!helper)
                return ret;
@@ -65,7 +65,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par)
                        par->family);
                return ret;
        }
-       info->name[29] = '\0';
+       info->name[sizeof(info->name) - 1] = '\0';
        return 0;
 }
 
index d725a27..e3b7a09 100644 (file)
@@ -110,7 +110,6 @@ static const struct file_operations recent_old_fops, recent_mt_fops;
 #endif
 
 static u_int32_t hash_rnd __read_mostly;
-static bool hash_rnd_inited __read_mostly;
 
 static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
 {
@@ -340,10 +339,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
        int ret = -EINVAL;
        size_t sz;
 
-       if (unlikely(!hash_rnd_inited)) {
-               get_random_bytes(&hash_rnd, sizeof(hash_rnd));
-               hash_rnd_inited = true;
-       }
+       net_get_random_once(&hash_rnd, sizeof(hash_rnd));
+
        if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
                pr_info("Unsupported user space flags (%08x)\n",
                        info->check_set);
index 034f525..ca96e54 100644 (file)
@@ -142,10 +142,7 @@ struct rxrpc_host_header {
  */
 struct rxrpc_skb_priv {
        union {
-               unsigned long   resend_at;      /* time in jiffies at which to resend */
-               struct {
-                       u8      nr_jumbo;       /* Number of jumbo subpackets */
-               };
+               u8              nr_jumbo;       /* Number of jumbo subpackets */
        };
        union {
                unsigned int    offset;         /* offset into buffer of next read */
@@ -258,10 +255,12 @@ struct rxrpc_peer {
 
        /* calculated RTT cache */
 #define RXRPC_RTT_CACHE_SIZE 32
-       suseconds_t             rtt;            /* current RTT estimate (in uS) */
-       unsigned int            rtt_point;      /* next entry at which to insert */
-       unsigned int            rtt_usage;      /* amount of cache actually used */
-       suseconds_t             rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
+       ktime_t                 rtt_last_req;   /* Time of last RTT request */
+       u64                     rtt;            /* Current RTT estimate (in nS) */
+       u64                     rtt_sum;        /* Sum of cache contents */
+       u64                     rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */
+       u8                      rtt_cursor;     /* next entry at which to insert */
+       u8                      rtt_usage;      /* amount of cache actually used */
 };
 
 /*
@@ -385,10 +384,9 @@ struct rxrpc_connection {
        int                     debug_id;       /* debug ID for printks */
        atomic_t                serial;         /* packet serial number counter */
        unsigned int            hi_serial;      /* highest serial number received */
+       u32                     security_nonce; /* response re-use preventer */
        u8                      size_align;     /* data size alignment (for security) */
-       u8                      header_size;    /* rxrpc + security header size */
        u8                      security_size;  /* security header size */
-       u32                     security_nonce; /* response re-use preventer */
        u8                      security_ix;    /* security type */
        u8                      out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
 };
@@ -403,6 +401,8 @@ enum rxrpc_call_flag {
        RXRPC_CALL_EXPOSED,             /* The call was exposed to the world */
        RXRPC_CALL_RX_LAST,             /* Received the last packet (at rxtx_top) */
        RXRPC_CALL_TX_LAST,             /* Last packet in Tx buffer (at rxtx_top) */
+       RXRPC_CALL_PINGING,             /* Ping in process */
+       RXRPC_CALL_RETRANS_TIMEOUT,     /* Retransmission due to timeout occurred */
 };
 
 /*
@@ -447,6 +447,17 @@ enum rxrpc_call_completion {
        NR__RXRPC_CALL_COMPLETIONS
 };
 
+/*
+ * Call Tx congestion management modes.
+ */
+enum rxrpc_congest_mode {
+       RXRPC_CALL_SLOW_START,
+       RXRPC_CALL_CONGEST_AVOIDANCE,
+       RXRPC_CALL_PACKET_LOSS,
+       RXRPC_CALL_FAST_RETRANSMIT,
+       NR__RXRPC_CONGEST_MODES
+};
+
 /*
  * RxRPC call definition
  * - matched by { connection, call_id }
@@ -487,6 +498,8 @@ struct rxrpc_call {
        u32                     call_id;        /* call ID on connection  */
        u32                     cid;            /* connection ID plus channel index */
        int                     debug_id;       /* debug ID for printks */
+       unsigned short          rx_pkt_offset;  /* Current recvmsg packet offset */
+       unsigned short          rx_pkt_len;     /* Current recvmsg packet len */
 
        /* Rx/Tx circular buffer, depending on phase.
         *
@@ -506,6 +519,10 @@ struct rxrpc_call {
 #define RXRPC_TX_ANNO_UNACK    1
 #define RXRPC_TX_ANNO_NAK      2
 #define RXRPC_TX_ANNO_RETRANS  3
+#define RXRPC_TX_ANNO_MASK     0x03
+#define RXRPC_TX_ANNO_LAST     0x04
+#define RXRPC_TX_ANNO_RESENT   0x08
+
 #define RXRPC_RX_ANNO_JUMBO    0x3f            /* Jumbo subpacket number + 1 if not zero */
 #define RXRPC_RX_ANNO_JLAST    0x40            /* Set if last element of a jumbo packet */
 #define RXRPC_RX_ANNO_VERIFIED 0x80            /* Set if verified and decrypted */
@@ -513,6 +530,20 @@ struct rxrpc_call {
                                                 * not hard-ACK'd packet follows this.
                                                 */
        rxrpc_seq_t             tx_top;         /* Highest Tx slot allocated. */
+
+       /* TCP-style slow-start congestion control [RFC5681].  Since the SMSS
+        * is fixed, we keep these numbers in terms of segments (ie. DATA
+        * packets) rather than bytes.
+        */
+#define RXRPC_TX_SMSS          RXRPC_JUMBO_DATALEN
+       u8                      cong_cwnd;      /* Congestion window size */
+       u8                      cong_extra;     /* Extra to send for congestion management */
+       u8                      cong_ssthresh;  /* Slow-start threshold */
+       enum rxrpc_congest_mode cong_mode:8;    /* Congestion management mode */
+       u8                      cong_dup_acks;  /* Count of ACKs showing missing packets */
+       u8                      cong_cumul_acks; /* Cumulative ACK count */
+       ktime_t                 cong_tstamp;    /* Last time cwnd was changed */
+
        rxrpc_seq_t             rx_hard_ack;    /* Dead slot in buffer; the first received but not
                                                 * consumed packet follows this.
                                                 */
@@ -528,11 +559,36 @@ struct rxrpc_call {
        u16                     ackr_skew;      /* skew on packet being ACK'd */
        rxrpc_serial_t          ackr_serial;    /* serial of packet being ACK'd */
        rxrpc_seq_t             ackr_prev_seq;  /* previous sequence number received */
-       unsigned short          rx_pkt_offset;  /* Current recvmsg packet offset */
-       unsigned short          rx_pkt_len;     /* Current recvmsg packet len */
+       rxrpc_seq_t             ackr_consumed;  /* Highest packet shown consumed */
+       rxrpc_seq_t             ackr_seen;      /* Highest packet shown seen */
+       rxrpc_serial_t          ackr_ping;      /* Last ping sent */
+       ktime_t                 ackr_ping_time; /* Time last ping sent */
 
        /* transmission-phase ACK management */
+       ktime_t                 acks_latest_ts; /* Timestamp of latest ACK received */
        rxrpc_serial_t          acks_latest;    /* serial number of latest ACK received */
+       rxrpc_seq_t             acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
+};
+
+/*
+ * Summary of a new ACK and the changes it made to the Tx buffer packet states.
+ */
+struct rxrpc_ack_summary {
+       u8                      ack_reason;
+       u8                      nr_acks;                /* Number of ACKs in packet */
+       u8                      nr_nacks;               /* Number of NACKs in packet */
+       u8                      nr_new_acks;            /* Number of new ACKs in packet */
+       u8                      nr_new_nacks;           /* Number of new NACKs in packet */
+       u8                      nr_rot_new_acks;        /* Number of rotated new ACKs */
+       bool                    new_low_nack;           /* T if new low NACK found */
+       bool                    retrans_timeo;          /* T if reTx due to timeout happened */
+       u8                      flight_size;            /* Number of unreceived transmissions */
+       /* Place to stash values for tracing */
+       enum rxrpc_congest_mode mode:8;
+       u8                      cwnd;
+       u8                      ssthresh;
+       u8                      dup_acks;
+       u8                      cumulative_acks;
 };
 
 enum rxrpc_skb_trace {
@@ -618,9 +674,10 @@ extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4];
 enum rxrpc_transmit_trace {
        rxrpc_transmit_wait,
        rxrpc_transmit_queue,
-       rxrpc_transmit_queue_reqack,
        rxrpc_transmit_queue_last,
        rxrpc_transmit_rotate,
+       rxrpc_transmit_rotate_last,
+       rxrpc_transmit_await_reply,
        rxrpc_transmit_end,
        rxrpc_transmit__nr_trace
 };
@@ -656,8 +713,75 @@ enum rxrpc_recvmsg_trace {
 
 extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5];
 
+enum rxrpc_rtt_tx_trace {
+       rxrpc_rtt_tx_ping,
+       rxrpc_rtt_tx_data,
+       rxrpc_rtt_tx__nr_trace
+};
+
+extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5];
+
+enum rxrpc_rtt_rx_trace {
+       rxrpc_rtt_rx_ping_response,
+       rxrpc_rtt_rx_requested_ack,
+       rxrpc_rtt_rx__nr_trace
+};
+
+extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5];
+
+enum rxrpc_timer_trace {
+       rxrpc_timer_begin,
+       rxrpc_timer_init_for_reply,
+       rxrpc_timer_expired,
+       rxrpc_timer_set_for_ack,
+       rxrpc_timer_set_for_resend,
+       rxrpc_timer_set_for_send,
+       rxrpc_timer__nr_trace
+};
+
+extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8];
+
+enum rxrpc_propose_ack_trace {
+       rxrpc_propose_ack_client_tx_end,
+       rxrpc_propose_ack_input_data,
+       rxrpc_propose_ack_ping_for_lost_ack,
+       rxrpc_propose_ack_ping_for_lost_reply,
+       rxrpc_propose_ack_ping_for_params,
+       rxrpc_propose_ack_respond_to_ack,
+       rxrpc_propose_ack_respond_to_ping,
+       rxrpc_propose_ack_retry_tx,
+       rxrpc_propose_ack_rotate_rx,
+       rxrpc_propose_ack_terminal_ack,
+       rxrpc_propose_ack__nr_trace
+};
+
+enum rxrpc_propose_ack_outcome {
+       rxrpc_propose_ack_use,
+       rxrpc_propose_ack_update,
+       rxrpc_propose_ack_subsume,
+       rxrpc_propose_ack__nr_outcomes
+};
+
+extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8];
+extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes];
+
+enum rxrpc_congest_change {
+       rxrpc_cong_begin_retransmission,
+       rxrpc_cong_cleared_nacks,
+       rxrpc_cong_new_low_nack,
+       rxrpc_cong_no_change,
+       rxrpc_cong_progress,
+       rxrpc_cong_retransmit_again,
+       rxrpc_cong_rtt_window_end,
+       rxrpc_cong_saw_nack,
+       rxrpc_congest__nr_change
+};
+
+extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10];
+extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9];
+
 extern const char *const rxrpc_pkts[];
-extern const char *rxrpc_acks(u8 reason);
+extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4];
 
 #include <trace/events/rxrpc.h>
 
@@ -685,7 +809,9 @@ int rxrpc_reject_call(struct rxrpc_sock *);
 /*
  * call_event.c
  */
-void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool);
+void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace);
+void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
+                      enum rxrpc_propose_ack_trace);
 void rxrpc_process_call(struct work_struct *);
 
 /*
@@ -739,6 +865,7 @@ static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call,
                call->error = error;
                call->completion = compl,
                call->state = RXRPC_CALL_COMPLETE;
+               wake_up(&call->waitq);
                return true;
        }
        return false;
@@ -946,7 +1073,7 @@ extern const s8 rxrpc_ack_priority[];
  * output.c
  */
 int rxrpc_send_call_packet(struct rxrpc_call *, u8);
-int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *);
+int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *);
 void rxrpc_reject_packets(struct rxrpc_local *);
 
 /*
@@ -954,6 +1081,8 @@ void rxrpc_reject_packets(struct rxrpc_local *);
  */
 void rxrpc_error_report(struct sock *);
 void rxrpc_peer_error_distributor(struct work_struct *);
+void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
+                       rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
 
 /*
  * peer_object.c
index 7d1b998..0e84780 100644 (file)
 /*
  * Set the timer
  */
-static void rxrpc_set_timer(struct rxrpc_call *call)
+void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why)
 {
        unsigned long t, now = jiffies;
 
-       _enter("{%ld,%ld,%ld:%ld}",
-              call->ack_at - now, call->resend_at - now, call->expire_at - now,
-              call->timer.expires - now);
-
        read_lock_bh(&call->state_lock);
 
        if (call->state < RXRPC_CALL_COMPLETE) {
-               t = call->ack_at;
-               if (time_before(call->resend_at, t))
+               t = call->expire_at;
+               if (time_before_eq(t, now))
+                       goto out;
+
+               if (time_after(call->resend_at, now) &&
+                   time_before(call->resend_at, t))
                        t = call->resend_at;
-               if (time_before(call->expire_at, t))
-                       t = call->expire_at;
-               if (!timer_pending(&call->timer) ||
-                   time_before(t, call->timer.expires)) {
-                       _debug("set timer %ld", t - now);
+
+               if (time_after(call->ack_at, now) &&
+                   time_before(call->ack_at, t))
+                       t = call->ack_at;
+
+               if (call->timer.expires != t || !timer_pending(&call->timer)) {
                        mod_timer(&call->timer, t);
+                       trace_rxrpc_timer(call, why, now);
                }
        }
+
+out:
        read_unlock_bh(&call->state_lock);
 }
 
@@ -54,14 +58,13 @@ static void rxrpc_set_timer(struct rxrpc_call *call)
  */
 static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
                                u16 skew, u32 serial, bool immediate,
-                               bool background)
+                               bool background,
+                               enum rxrpc_propose_ack_trace why)
 {
+       enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
        unsigned long now, ack_at, expiry = rxrpc_soft_ack_delay;
        s8 prior = rxrpc_ack_priority[ack_reason];
 
-       _enter("{%d},%s,%%%x,%u",
-              call->debug_id, rxrpc_acks(ack_reason), serial, immediate);
-
        /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
         * numbers, but we don't alter the timeout.
         */
@@ -70,15 +73,18 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
               call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]);
        if (ack_reason == call->ackr_reason) {
                if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) {
+                       outcome = rxrpc_propose_ack_update;
                        call->ackr_serial = serial;
                        call->ackr_skew = skew;
                }
                if (!immediate)
-                       return;
+                       goto trace;
        } else if (prior > rxrpc_ack_priority[call->ackr_reason]) {
                call->ackr_reason = ack_reason;
                call->ackr_serial = serial;
                call->ackr_skew = skew;
+       } else {
+               outcome = rxrpc_propose_ack_subsume;
        }
 
        switch (ack_reason) {
@@ -94,6 +100,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
                        expiry = rxrpc_soft_ack_delay;
                break;
 
+       case RXRPC_ACK_PING:
        case RXRPC_ACK_IDLE:
                if (rxrpc_idle_ack_delay < expiry)
                        expiry = rxrpc_idle_ack_delay;
@@ -117,38 +124,52 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
                _debug("deferred ACK %ld < %ld", expiry, call->ack_at - now);
                if (time_before(ack_at, call->ack_at)) {
                        call->ack_at = ack_at;
-                       rxrpc_set_timer(call);
+                       rxrpc_set_timer(call, rxrpc_timer_set_for_ack);
                }
        }
+
+trace:
+       trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate,
+                               background, outcome);
 }
 
 /*
  * propose an ACK be sent, locking the call structure
  */
 void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
-                      u16 skew, u32 serial, bool immediate, bool background)
+                      u16 skew, u32 serial, bool immediate, bool background,
+                      enum rxrpc_propose_ack_trace why)
 {
        spin_lock_bh(&call->lock);
        __rxrpc_propose_ACK(call, ack_reason, skew, serial,
-                           immediate, background);
+                           immediate, background, why);
        spin_unlock_bh(&call->lock);
 }
 
+/*
+ * Handle congestion being detected by the retransmit timeout.
+ */
+static void rxrpc_congestion_timeout(struct rxrpc_call *call)
+{
+       set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags);
+}
+
 /*
  * Perform retransmission of NAK'd and unack'd packets.
  */
 static void rxrpc_resend(struct rxrpc_call *call)
 {
-       struct rxrpc_wire_header *whdr;
        struct rxrpc_skb_priv *sp;
        struct sk_buff *skb;
        rxrpc_seq_t cursor, seq, top;
-       unsigned long resend_at, now;
+       ktime_t now = ktime_get_real(), max_age, oldest, resend_at, ack_ts;
        int ix;
-       u8 annotation;
+       u8 annotation, anno_type, retrans = 0, unacked = 0;
 
        _enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
 
+       max_age = ktime_sub_ms(now, rxrpc_resend_timeout);
+
        spin_lock_bh(&call->lock);
 
        cursor = call->tx_hard_ack;
@@ -161,31 +182,63 @@ static void rxrpc_resend(struct rxrpc_call *call)
         * the packets in the Tx buffer we're going to resend and what the new
         * resend timeout will be.
         */
-       now = jiffies;
-       resend_at = now + rxrpc_resend_timeout;
+       oldest = now;
        for (seq = cursor + 1; before_eq(seq, top); seq++) {
                ix = seq & RXRPC_RXTX_BUFF_MASK;
                annotation = call->rxtx_annotations[ix];
-               if (annotation == RXRPC_TX_ANNO_ACK)
+               anno_type = annotation & RXRPC_TX_ANNO_MASK;
+               annotation &= ~RXRPC_TX_ANNO_MASK;
+               if (anno_type == RXRPC_TX_ANNO_ACK)
                        continue;
 
                skb = call->rxtx_buffer[ix];
                rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
                sp = rxrpc_skb(skb);
 
-               if (annotation == RXRPC_TX_ANNO_UNACK) {
-                       if (time_after(sp->resend_at, now)) {
-                               if (time_before(sp->resend_at, resend_at))
-                                       resend_at = sp->resend_at;
+               if (anno_type == RXRPC_TX_ANNO_UNACK) {
+                       if (ktime_after(skb->tstamp, max_age)) {
+                               if (ktime_before(skb->tstamp, oldest))
+                                       oldest = skb->tstamp;
                                continue;
                        }
+                       if (!(annotation & RXRPC_TX_ANNO_RESENT))
+                               unacked++;
                }
 
                /* Okay, we need to retransmit a packet. */
-               call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS;
+               call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation;
+               retrans++;
+               trace_rxrpc_retransmit(call, seq, annotation | anno_type,
+                                      ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
        }
 
-       call->resend_at = resend_at;
+       resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout);
+       call->resend_at = jiffies +
+               nsecs_to_jiffies(ktime_to_ns(ktime_sub(resend_at, now))) +
+               1; /* We have to make sure that the calculated jiffies value
+                   * falls at or after the nsec value, or we shall loop
+                   * ceaselessly because the timer times out, but we haven't
+                   * reached the nsec timeout yet.
+                   */
+
+       if (unacked)
+               rxrpc_congestion_timeout(call);
+
+       /* If there was nothing that needed retransmission then it's likely
+        * that an ACK got lost somewhere.  Send a ping to find out instead of
+        * retransmitting data.
+        */
+       if (!retrans) {
+               rxrpc_set_timer(call, rxrpc_timer_set_for_resend);
+               spin_unlock_bh(&call->lock);
+               ack_ts = ktime_sub(now, call->acks_latest_ts);
+               if (ktime_to_ns(ack_ts) < call->peer->rtt)
+                       goto out;
+               rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+                                 rxrpc_propose_ack_ping_for_lost_ack);
+               rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+               goto out;
+       }
 
        /* Now go through the Tx window and perform the retransmissions.  We
         * have to drop the lock for each send.  If an ACK comes in whilst the
@@ -195,29 +248,21 @@ static void rxrpc_resend(struct rxrpc_call *call)
        for (seq = cursor + 1; before_eq(seq, top); seq++) {
                ix = seq & RXRPC_RXTX_BUFF_MASK;
                annotation = call->rxtx_annotations[ix];
-               if (annotation != RXRPC_TX_ANNO_RETRANS)
+               anno_type = annotation & RXRPC_TX_ANNO_MASK;
+               if (anno_type != RXRPC_TX_ANNO_RETRANS)
                        continue;
 
                skb = call->rxtx_buffer[ix];
                rxrpc_get_skb(skb, rxrpc_skb_tx_got);
                spin_unlock_bh(&call->lock);
-               sp = rxrpc_skb(skb);
-
-               /* Each Tx packet needs a new serial number */
-               sp->hdr.serial = atomic_inc_return(&call->conn->serial);
 
-               whdr = (struct rxrpc_wire_header *)skb->head;
-               whdr->serial = htonl(sp->hdr.serial);
-
-               if (rxrpc_send_data_packet(call->conn, skb) < 0) {
-                       call->resend_at = now + 2;
+               if (rxrpc_send_data_packet(call, skb) < 0) {
                        rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
                        return;
                }
 
                if (rxrpc_is_client_call(call))
                        rxrpc_expose_client_call(call);
-               sp->resend_at = now + rxrpc_resend_timeout;
 
                rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
                spin_lock_bh(&call->lock);
@@ -227,10 +272,17 @@ static void rxrpc_resend(struct rxrpc_call *call)
                 * received and the packet might have been hard-ACK'd (in which
                 * case it will no longer be in the buffer).
                 */
-               if (after(seq, call->tx_hard_ack) &&
-                   (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_RETRANS ||
-                    call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK))
-                       call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK;
+               if (after(seq, call->tx_hard_ack)) {
+                       annotation = call->rxtx_annotations[ix];
+                       anno_type = annotation & RXRPC_TX_ANNO_MASK;
+                       if (anno_type == RXRPC_TX_ANNO_RETRANS ||
+                           anno_type == RXRPC_TX_ANNO_NAK) {
+                               annotation &= ~RXRPC_TX_ANNO_MASK;
+                               annotation |= RXRPC_TX_ANNO_UNACK;
+                       }
+                       annotation |= RXRPC_TX_ANNO_RESENT;
+                       call->rxtx_annotations[ix] = annotation;
+               }
 
                if (after(call->tx_hard_ack, seq))
                        seq = call->tx_hard_ack;
@@ -238,6 +290,7 @@ static void rxrpc_resend(struct rxrpc_call *call)
 
 out_unlock:
        spin_unlock_bh(&call->lock);
+out:
        _leave("");
 }
 
@@ -271,6 +324,7 @@ recheck_state:
        if (time_after_eq(now, call->expire_at)) {
                rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
                set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+               goto recheck_state;
        }
 
        if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) ||
@@ -288,7 +342,7 @@ recheck_state:
                goto recheck_state;
        }
 
-       rxrpc_set_timer(call);
+       rxrpc_set_timer(call, rxrpc_timer_set_for_resend);
 
        /* other events may have been raised since we started checking */
        if (call->events && call->state < RXRPC_CALL_COMPLETE) {
index f50a609..d4b3293 100644 (file)
@@ -76,8 +76,10 @@ static void rxrpc_call_timer_expired(unsigned long _call)
 
        _enter("%d", call->debug_id);
 
-       if (call->state < RXRPC_CALL_COMPLETE)
+       if (call->state < RXRPC_CALL_COMPLETE) {
+               trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
                rxrpc_queue_call(call);
+       }
 }
 
 /*
@@ -158,6 +160,14 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
        call->rx_winsize = rxrpc_rx_window_size;
        call->tx_winsize = 16;
        call->rx_expect_next = 1;
+
+       if (RXRPC_TX_SMSS > 2190)
+               call->cong_cwnd = 2;
+       else if (RXRPC_TX_SMSS > 1095)
+               call->cong_cwnd = 3;
+       else
+               call->cong_cwnd = 4;
+       call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1;
        return call;
 
 nomem_2:
@@ -174,6 +184,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
                                                  gfp_t gfp)
 {
        struct rxrpc_call *call;
+       ktime_t now;
 
        _enter("");
 
@@ -183,6 +194,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
        call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
        call->service_id = srx->srx_service;
        call->tx_phase = true;
+       now = ktime_get_real();
+       call->acks_latest_ts = now;
+       call->cong_tstamp = now;
 
        _leave(" = %p", call);
        return call;
@@ -199,8 +213,8 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
        call->expire_at = expire_at;
        call->ack_at = expire_at;
        call->resend_at = expire_at;
-       call->timer.expires = expire_at;
-       add_timer(&call->timer);
+       call->timer.expires = expire_at + 1;
+       rxrpc_set_timer(call, rxrpc_timer_begin);
 }
 
 /*
@@ -323,6 +337,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
        call->state             = RXRPC_CALL_SERVER_ACCEPTING;
        if (sp->hdr.securityIndex > 0)
                call->state     = RXRPC_CALL_SERVER_SECURING;
+       call->cong_tstamp       = skb->tstamp;
 
        /* Set the channel for this call.  We don't get channel_lock as we're
         * only defending against the data_ready handler (which we're called
index 75a15a4..37609ce 100644 (file)
@@ -97,10 +97,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
                pkt.info.maxMTU         = htonl(mtu);
                pkt.info.rwind          = htonl(rxrpc_rx_window_size);
                pkt.info.jumbo_max      = htonl(rxrpc_rx_jumbo_max);
+               pkt.whdr.flags          |= RXRPC_SLOW_START_OK;
                len += sizeof(pkt.ack) + sizeof(pkt.info);
-
-               trace_rxrpc_tx_ack(NULL, chan->last_seq, 0,
-                                  RXRPC_ACK_DUPLICATE, 0);
                break;
        }
 
@@ -122,6 +120,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
                _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort);
                break;
        case RXRPC_PACKET_TYPE_ACK:
+               trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0,
+                                  RXRPC_ACK_DUPLICATE, 0);
                _proto("Tx ACK %%%u [re]", serial);
                break;
        }
index 3b55aee..e1e83af 100644 (file)
@@ -53,7 +53,6 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
                spin_lock_init(&conn->state_lock);
                conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
                conn->size_align = 4;
-               conn->header_size = sizeof(struct rxrpc_wire_header);
                conn->idle_timestamp = jiffies;
        }
 
index 7ac1edf..094720d 100644 (file)
@@ -36,13 +36,199 @@ static void rxrpc_proto_abort(const char *why,
        }
 }
 
+/*
+ * Do TCP-style congestion management [RFC 5681].
+ */
+static void rxrpc_congestion_management(struct rxrpc_call *call,
+                                       struct sk_buff *skb,
+                                       struct rxrpc_ack_summary *summary)
+{
+       enum rxrpc_congest_change change = rxrpc_cong_no_change;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       unsigned int cumulative_acks = call->cong_cumul_acks;
+       unsigned int cwnd = call->cong_cwnd;
+       bool resend = false;
+
+       summary->flight_size =
+               (call->tx_top - call->tx_hard_ack) - summary->nr_acks;
+
+       if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) {
+               summary->retrans_timeo = true;
+               call->cong_ssthresh = max_t(unsigned int,
+                                           summary->flight_size / 2, 2);
+               cwnd = 1;
+               if (cwnd > call->cong_ssthresh &&
+                   call->cong_mode == RXRPC_CALL_SLOW_START) {
+                       call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+                       call->cong_tstamp = skb->tstamp;
+                       cumulative_acks = 0;
+               }
+       }
+
+       cumulative_acks += summary->nr_new_acks;
+       cumulative_acks += summary->nr_rot_new_acks;
+       if (cumulative_acks > 255)
+               cumulative_acks = 255;
+
+       summary->mode = call->cong_mode;
+       summary->cwnd = call->cong_cwnd;
+       summary->ssthresh = call->cong_ssthresh;
+       summary->cumulative_acks = cumulative_acks;
+       summary->dup_acks = call->cong_dup_acks;
+
+       switch (call->cong_mode) {
+       case RXRPC_CALL_SLOW_START:
+               if (summary->nr_nacks > 0)
+                       goto packet_loss_detected;
+               if (summary->cumulative_acks > 0)
+                       cwnd += 1;
+               if (cwnd > call->cong_ssthresh) {
+                       call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+                       call->cong_tstamp = skb->tstamp;
+               }
+               goto out;
+
+       case RXRPC_CALL_CONGEST_AVOIDANCE:
+               if (summary->nr_nacks > 0)
+                       goto packet_loss_detected;
+
+               /* We analyse the number of packets that get ACK'd per RTT
+                * period and increase the window if we managed to fill it.
+                */
+               if (call->peer->rtt_usage == 0)
+                       goto out;
+               if (ktime_before(skb->tstamp,
+                                ktime_add_ns(call->cong_tstamp,
+                                             call->peer->rtt)))
+                       goto out_no_clear_ca;
+               change = rxrpc_cong_rtt_window_end;
+               call->cong_tstamp = skb->tstamp;
+               if (cumulative_acks >= cwnd)
+                       cwnd++;
+               goto out;
+
+       case RXRPC_CALL_PACKET_LOSS:
+               if (summary->nr_nacks == 0)
+                       goto resume_normality;
+
+               if (summary->new_low_nack) {
+                       change = rxrpc_cong_new_low_nack;
+                       call->cong_dup_acks = 1;
+                       if (call->cong_extra > 1)
+                               call->cong_extra = 1;
+                       goto send_extra_data;
+               }
+
+               call->cong_dup_acks++;
+               if (call->cong_dup_acks < 3)
+                       goto send_extra_data;
+
+               change = rxrpc_cong_begin_retransmission;
+               call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT;
+               call->cong_ssthresh = max_t(unsigned int,
+                                           summary->flight_size / 2, 2);
+               cwnd = call->cong_ssthresh + 3;
+               call->cong_extra = 0;
+               call->cong_dup_acks = 0;
+               resend = true;
+               goto out;
+
+       case RXRPC_CALL_FAST_RETRANSMIT:
+               if (!summary->new_low_nack) {
+                       if (summary->nr_new_acks == 0)
+                               cwnd += 1;
+                       call->cong_dup_acks++;
+                       if (call->cong_dup_acks == 2) {
+                               change = rxrpc_cong_retransmit_again;
+                               call->cong_dup_acks = 0;
+                               resend = true;
+                       }
+               } else {
+                       change = rxrpc_cong_progress;
+                       cwnd = call->cong_ssthresh;
+                       if (summary->nr_nacks == 0)
+                               goto resume_normality;
+               }
+               goto out;
+
+       default:
+               BUG();
+               goto out;
+       }
+
+resume_normality:
+       change = rxrpc_cong_cleared_nacks;
+       call->cong_dup_acks = 0;
+       call->cong_extra = 0;
+       call->cong_tstamp = skb->tstamp;
+       if (cwnd <= call->cong_ssthresh)
+               call->cong_mode = RXRPC_CALL_SLOW_START;
+       else
+               call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+out:
+       cumulative_acks = 0;
+out_no_clear_ca:
+       if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1)
+               cwnd = RXRPC_RXTX_BUFF_SIZE - 1;
+       call->cong_cwnd = cwnd;
+       call->cong_cumul_acks = cumulative_acks;
+       trace_rxrpc_congest(call, summary, sp->hdr.serial, change);
+       if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+               rxrpc_queue_call(call);
+       return;
+
+packet_loss_detected:
+       change = rxrpc_cong_saw_nack;
+       call->cong_mode = RXRPC_CALL_PACKET_LOSS;
+       call->cong_dup_acks = 0;
+       goto send_extra_data;
+
+send_extra_data:
+       /* Send some previously unsent DATA if we have some to advance the ACK
+        * state.
+        */
+       if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
+           RXRPC_TX_ANNO_LAST ||
+           summary->nr_acks != call->tx_top - call->tx_hard_ack) {
+               call->cong_extra++;
+               wake_up(&call->waitq);
+       }
+       goto out_no_clear_ca;
+}
+
+/*
+ * Ping the other end to fill our RTT cache and to retrieve the rwind
+ * and MTU parameters.
+ */
+static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb,
+                           int skew)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       ktime_t now = skb->tstamp;
+
+       if (call->peer->rtt_usage < 3 ||
+           ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
+               rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+                                 true, true,
+                                 rxrpc_propose_ack_ping_for_params);
+}
+
 /*
  * Apply a hard ACK by advancing the Tx window.
  */
-static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to)
+static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
+                                  struct rxrpc_ack_summary *summary)
 {
        struct sk_buff *skb, *list = NULL;
        int ix;
+       u8 annotation;
+
+       if (call->acks_lowest_nak == call->tx_hard_ack) {
+               call->acks_lowest_nak = to;
+       } else if (before_eq(call->acks_lowest_nak, to)) {
+               summary->new_low_nack = true;
+               call->acks_lowest_nak = to;
+       }
 
        spin_lock(&call->lock);
 
@@ -50,16 +236,24 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to)
                call->tx_hard_ack++;
                ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK;
                skb = call->rxtx_buffer[ix];
+               annotation = call->rxtx_annotations[ix];
                rxrpc_see_skb(skb, rxrpc_skb_tx_rotated);
                call->rxtx_buffer[ix] = NULL;
                call->rxtx_annotations[ix] = 0;
                skb->next = list;
                list = skb;
+
+               if (annotation & RXRPC_TX_ANNO_LAST)
+                       set_bit(RXRPC_CALL_TX_LAST, &call->flags);
+               if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK)
+                       summary->nr_rot_new_acks++;
        }
 
        spin_unlock(&call->lock);
 
-       trace_rxrpc_transmit(call, rxrpc_transmit_rotate);
+       trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ?
+                                   rxrpc_transmit_rotate_last :
+                                   rxrpc_transmit_rotate));
        wake_up(&call->waitq);
 
        while (list) {
@@ -76,42 +270,77 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to)
  * This occurs when we get an ACKALL packet, the first DATA packet of a reply,
  * or a final ACK packet.
  */
-static bool rxrpc_end_tx_phase(struct rxrpc_call *call, const char *abort_why)
+static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
+                              const char *abort_why)
 {
-       _enter("");
-
-       switch (call->state) {
-       case RXRPC_CALL_CLIENT_RECV_REPLY:
-               return true;
-       case RXRPC_CALL_CLIENT_AWAIT_REPLY:
-       case RXRPC_CALL_SERVER_AWAIT_ACK:
-               break;
-       default:
-               rxrpc_proto_abort(abort_why, call, call->tx_top);
-               return false;
-       }
 
-       rxrpc_rotate_tx_window(call, call->tx_top);
+       ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
 
        write_lock(&call->state_lock);
 
        switch (call->state) {
-       default:
-               break;
+       case RXRPC_CALL_CLIENT_SEND_REQUEST:
        case RXRPC_CALL_CLIENT_AWAIT_REPLY:
-               call->tx_phase = false;
-               call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+               if (reply_begun)
+                       call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+               else
+                       call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
                break;
+
        case RXRPC_CALL_SERVER_AWAIT_ACK:
                __rxrpc_call_completed(call);
                rxrpc_notify_socket(call);
                break;
+
+       default:
+               goto bad_state;
        }
 
        write_unlock(&call->state_lock);
-       trace_rxrpc_transmit(call, rxrpc_transmit_end);
+       if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
+               rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
+                                 rxrpc_propose_ack_client_tx_end);
+               trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
+       } else {
+               trace_rxrpc_transmit(call, rxrpc_transmit_end);
+       }
        _leave(" = ok");
        return true;
+
+bad_state:
+       write_unlock(&call->state_lock);
+       kdebug("end_tx %s", rxrpc_call_states[call->state]);
+       rxrpc_proto_abort(abort_why, call, call->tx_top);
+       return false;
+}
+
+/*
+ * Begin the reply reception phase of a call.
+ */
+static bool rxrpc_receiving_reply(struct rxrpc_call *call)
+{
+       struct rxrpc_ack_summary summary = { 0 };
+       rxrpc_seq_t top = READ_ONCE(call->tx_top);
+
+       if (call->ackr_reason) {
+               spin_lock_bh(&call->lock);
+               call->ackr_reason = 0;
+               call->resend_at = call->expire_at;
+               call->ack_at = call->expire_at;
+               spin_unlock_bh(&call->lock);
+               rxrpc_set_timer(call, rxrpc_timer_init_for_reply);
+       }
+
+       if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+               rxrpc_rotate_tx_window(call, top, &summary);
+       if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
+               rxrpc_proto_abort("TXL", call, top);
+               return false;
+       }
+       if (!rxrpc_end_tx_phase(call, true, "ETD"))
+               return false;
+       call->tx_phase = false;
+       return true;
 }
 
 /*
@@ -210,8 +439,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
        /* Received data implicitly ACKs all of the request packets we sent
         * when we're acting as a client.
         */
-       if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY &&
-           !rxrpc_end_tx_phase(call, "ETD"))
+       if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+            call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
+           !rxrpc_receiving_reply(call))
                return;
 
        call->ackr_prev_seq = seq;
@@ -283,8 +513,16 @@ next_subpacket:
        call->rxtx_annotations[ix] = annotation;
        smp_wmb();
        call->rxtx_buffer[ix] = skb;
-       if (after(seq, call->rx_top))
+       if (after(seq, call->rx_top)) {
                smp_store_release(&call->rx_top, seq);
+       } else if (before(seq, call->rx_top)) {
+               /* Send an immediate ACK if we fill in a hole */
+               if (!ack) {
+                       ack = RXRPC_ACK_DELAY;
+                       ack_serial = serial;
+               }
+               immediate_ack = true;
+       }
        if (flags & RXRPC_LAST_PACKET) {
                set_bit(RXRPC_CALL_RX_LAST, &call->flags);
                trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq);
@@ -335,13 +573,72 @@ skip:
 ack:
        if (ack)
                rxrpc_propose_ACK(call, ack, skew, ack_serial,
-                                 immediate_ack, true);
+                                 immediate_ack, true,
+                                 rxrpc_propose_ack_input_data);
 
        if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1)
                rxrpc_notify_socket(call);
        _leave(" [queued]");
 }
 
+/*
+ * Process a requested ACK.
+ */
+static void rxrpc_input_requested_ack(struct rxrpc_call *call,
+                                     ktime_t resp_time,
+                                     rxrpc_serial_t orig_serial,
+                                     rxrpc_serial_t ack_serial)
+{
+       struct rxrpc_skb_priv *sp;
+       struct sk_buff *skb;
+       ktime_t sent_at;
+       int ix;
+
+       for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) {
+               skb = call->rxtx_buffer[ix];
+               if (!skb)
+                       continue;
+
+               sp = rxrpc_skb(skb);
+               if (sp->hdr.serial != orig_serial)
+                       continue;
+               smp_rmb();
+               sent_at = skb->tstamp;
+               goto found;
+       }
+       return;
+
+found:
+       rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack,
+                          orig_serial, ack_serial, sent_at, resp_time);
+}
+
+/*
+ * Process a ping response.
+ */
+static void rxrpc_input_ping_response(struct rxrpc_call *call,
+                                     ktime_t resp_time,
+                                     rxrpc_serial_t orig_serial,
+                                     rxrpc_serial_t ack_serial)
+{
+       rxrpc_serial_t ping_serial;
+       ktime_t ping_time;
+
+       ping_time = call->ackr_ping_time;
+       smp_rmb();
+       ping_serial = call->ackr_ping;
+
+       if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
+           before(orig_serial, ping_serial))
+               return;
+       clear_bit(RXRPC_CALL_PINGING, &call->flags);
+       if (after(orig_serial, ping_serial))
+               return;
+
+       rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response,
+                          orig_serial, ack_serial, ping_time, resp_time);
+}
+
 /*
  * Process the extra information that may be appended to an ACK packet
  */
@@ -384,31 +681,45 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
  * the time the ACK was sent.
  */
 static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
-                                 rxrpc_seq_t seq, int nr_acks)
+                                 rxrpc_seq_t seq, int nr_acks,
+                                 struct rxrpc_ack_summary *summary)
 {
-       bool resend = false;
        int ix;
+       u8 annotation, anno_type;
 
        for (; nr_acks > 0; nr_acks--, seq++) {
                ix = seq & RXRPC_RXTX_BUFF_MASK;
+               annotation = call->rxtx_annotations[ix];
+               anno_type = annotation & RXRPC_TX_ANNO_MASK;
+               annotation &= ~RXRPC_TX_ANNO_MASK;
                switch (*acks++) {
                case RXRPC_ACK_TYPE_ACK:
-                       call->rxtx_annotations[ix] = RXRPC_TX_ANNO_ACK;
+                       summary->nr_acks++;
+                       if (anno_type == RXRPC_TX_ANNO_ACK)
+                               continue;
+                       summary->nr_new_acks++;
+                       call->rxtx_annotations[ix] =
+                               RXRPC_TX_ANNO_ACK | annotation;
                        break;
                case RXRPC_ACK_TYPE_NACK:
-                       if (call->rxtx_annotations[ix] == RXRPC_TX_ANNO_NAK)
+                       if (!summary->nr_nacks &&
+                           call->acks_lowest_nak != seq) {
+                               call->acks_lowest_nak = seq;
+                               summary->new_low_nack = true;
+                       }
+                       summary->nr_nacks++;
+                       if (anno_type == RXRPC_TX_ANNO_NAK)
+                               continue;
+                       summary->nr_new_nacks++;
+                       if (anno_type == RXRPC_TX_ANNO_RETRANS)
                                continue;
-                       call->rxtx_annotations[ix] = RXRPC_TX_ANNO_NAK;
-                       resend = true;
+                       call->rxtx_annotations[ix] =
+                               RXRPC_TX_ANNO_NAK | annotation;
                        break;
                default:
                        return rxrpc_proto_abort("SFT", call, 0);
                }
        }
-
-       if (resend &&
-           !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
-               rxrpc_queue_call(call);
 }
 
 /*
@@ -424,12 +735,14 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
 static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
                            u16 skew)
 {
+       struct rxrpc_ack_summary summary = { 0 };
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
        union {
                struct rxrpc_ackpacket ack;
                struct rxrpc_ackinfo info;
                u8 acks[RXRPC_MAXACKS];
        } buf;
+       rxrpc_serial_t acked_serial;
        rxrpc_seq_t first_soft_ack, hard_ack;
        int nr_acks, offset;
 
@@ -441,28 +754,40 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
        }
        sp->offset += sizeof(buf.ack);
 
+       acked_serial = ntohl(buf.ack.serial);
        first_soft_ack = ntohl(buf.ack.firstPacket);
        hard_ack = first_soft_ack - 1;
        nr_acks = buf.ack.nAcks;
+       summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
+                             buf.ack.reason : RXRPC_ACK__INVALID);
 
-       trace_rxrpc_rx_ack(call, first_soft_ack, buf.ack.reason, nr_acks);
+       trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks);
 
        _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
               sp->hdr.serial,
               ntohs(buf.ack.maxSkew),
               first_soft_ack,
               ntohl(buf.ack.previousPacket),
-              ntohl(buf.ack.serial),
-              rxrpc_acks(buf.ack.reason),
+              acked_serial,
+              rxrpc_ack_names[summary.ack_reason],
               buf.ack.nAcks);
 
+       if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE)
+               rxrpc_input_ping_response(call, skb->tstamp, acked_serial,
+                                         sp->hdr.serial);
+       if (buf.ack.reason == RXRPC_ACK_REQUESTED)
+               rxrpc_input_requested_ack(call, skb->tstamp, acked_serial,
+                                         sp->hdr.serial);
+
        if (buf.ack.reason == RXRPC_ACK_PING) {
                _proto("Rx ACK %%%u PING Request", sp->hdr.serial);
                rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
-                                 skew, sp->hdr.serial, true, true);
+                                 skew, sp->hdr.serial, true, true,
+                                 rxrpc_propose_ack_respond_to_ping);
        } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
                rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
-                                 skew, sp->hdr.serial, true, true);
+                                 skew, sp->hdr.serial, true, true,
+                                 rxrpc_propose_ack_respond_to_ack);
        }
 
        offset = sp->offset + nr_acks + 3;
@@ -487,34 +812,43 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
        }
 
        /* Discard any out-of-order or duplicate ACKs. */
-       if ((int)sp->hdr.serial - (int)call->acks_latest <= 0) {
+       if (before_eq(sp->hdr.serial, call->acks_latest)) {
                _debug("discard ACK %d <= %d",
                       sp->hdr.serial, call->acks_latest);
                return;
        }
+       call->acks_latest_ts = skb->tstamp;
        call->acks_latest = sp->hdr.serial;
 
-       if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
-           hard_ack == call->tx_top) {
-               rxrpc_end_tx_phase(call, "ETA");
-               return;
-       }
-
        if (before(hard_ack, call->tx_hard_ack) ||
            after(hard_ack, call->tx_top))
                return rxrpc_proto_abort("AKW", call, 0);
+       if (nr_acks > call->tx_top - hard_ack)
+               return rxrpc_proto_abort("AKN", call, 0);
 
        if (after(hard_ack, call->tx_hard_ack))
-               rxrpc_rotate_tx_window(call, hard_ack);
+               rxrpc_rotate_tx_window(call, hard_ack, &summary);
 
-       if (after(first_soft_ack, call->tx_top))
+       if (nr_acks > 0) {
+               if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0)
+                       return rxrpc_proto_abort("XSA", call, 0);
+               rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks,
+                                     &summary);
+       }
+
+       if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
+               rxrpc_end_tx_phase(call, false, "ETA");
                return;
+       }
+
+       if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
+           RXRPC_TX_ANNO_LAST &&
+           summary.nr_acks == call->tx_top - hard_ack)
+               rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+                                 false, true,
+                                 rxrpc_propose_ack_ping_for_lost_reply);
 
-       if (nr_acks > call->tx_top - first_soft_ack + 1)
-               nr_acks = first_soft_ack - call->tx_top + 1;
-       if (skb_copy_bits(skb, sp->offset, buf.acks, nr_acks) < 0)
-               return rxrpc_proto_abort("XSA", call, 0);
-       rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks);
+       return rxrpc_congestion_management(call, skb, &summary);
 }
 
 /*
@@ -522,11 +856,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
  */
 static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
 {
+       struct rxrpc_ack_summary summary = { 0 };
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
        _proto("Rx ACKALL %%%u", sp->hdr.serial);
 
-       rxrpc_end_tx_phase(call, "ETL");
+       rxrpc_rotate_tx_window(call, call->tx_top, &summary);
+       if (test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+               rxrpc_end_tx_phase(call, false, "ETL");
 }
 
 /*
@@ -712,20 +1049,19 @@ void rxrpc_data_ready(struct sock *udp_sk)
        skb_orphan(skb);
        sp = rxrpc_skb(skb);
 
+       /* dig out the RxRPC connection details */
+       if (rxrpc_extract_header(sp, skb) < 0)
+               goto bad_message;
+
        if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
                static int lose;
                if ((lose++ & 7) == 7) {
+                       trace_rxrpc_rx_lose(sp);
                        rxrpc_lose_skb(skb, rxrpc_skb_rx_lost);
                        return;
                }
        }
 
-       _net("Rx UDP packet from %08x:%04hu",
-            ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
-
-       /* dig out the RxRPC connection details */
-       if (rxrpc_extract_header(sp, skb) < 0)
-               goto bad_message;
        trace_rxrpc_rx_packet(sp);
 
        _net("Rx RxRPC %s ep=%x call=%x:%x",
@@ -822,6 +1158,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
                        rcu_read_unlock();
                        goto reject_packet;
                }
+               rxrpc_send_ping(call, skb, skew);
        }
 
        rxrpc_input_call_packet(call, skb, skew);
index 026e1f2..aedb897 100644 (file)
@@ -68,9 +68,9 @@ unsigned int rxrpc_rx_mtu = 5692;
 unsigned int rxrpc_rx_jumbo_max = 4;
 
 /*
- * Time till packet resend (in jiffies).
+ * Time till packet resend (in milliseconds).
  */
-unsigned int rxrpc_resend_timeout = 4 * HZ;
+unsigned int rxrpc_resend_timeout = 4 * 1000;
 
 const char *const rxrpc_pkts[] = {
        "?00",
@@ -83,24 +83,18 @@ const s8 rxrpc_ack_priority[] = {
        [RXRPC_ACK_DELAY]               = 1,
        [RXRPC_ACK_REQUESTED]           = 2,
        [RXRPC_ACK_IDLE]                = 3,
-       [RXRPC_ACK_PING_RESPONSE]       = 4,
-       [RXRPC_ACK_DUPLICATE]           = 5,
-       [RXRPC_ACK_OUT_OF_SEQUENCE]     = 6,
-       [RXRPC_ACK_EXCEEDS_WINDOW]      = 7,
-       [RXRPC_ACK_NOSPACE]             = 8,
+       [RXRPC_ACK_DUPLICATE]           = 4,
+       [RXRPC_ACK_OUT_OF_SEQUENCE]     = 5,
+       [RXRPC_ACK_EXCEEDS_WINDOW]      = 6,
+       [RXRPC_ACK_NOSPACE]             = 7,
+       [RXRPC_ACK_PING_RESPONSE]       = 8,
+       [RXRPC_ACK_PING]                = 9,
 };
 
-const char *rxrpc_acks(u8 reason)
-{
-       static const char *const str[] = {
-               "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY",
-               "IDL", "-?-"
-       };
-
-       if (reason >= ARRAY_SIZE(str))
-               reason = ARRAY_SIZE(str) - 1;
-       return str[reason];
-}
+const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = {
+       "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY",
+       "IDL", "-?-"
+};
 
 const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = {
        [rxrpc_skb_rx_cleaned]          = "Rx CLN",
@@ -154,9 +148,10 @@ const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = {
 const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = {
        [rxrpc_transmit_wait]           = "WAI",
        [rxrpc_transmit_queue]          = "QUE",
-       [rxrpc_transmit_queue_reqack]   = "QRA",
        [rxrpc_transmit_queue_last]     = "QLS",
        [rxrpc_transmit_rotate]         = "ROT",
+       [rxrpc_transmit_rotate_last]    = "RLS",
+       [rxrpc_transmit_await_reply]    = "AWR",
        [rxrpc_transmit_end]            = "END",
 };
 
@@ -182,3 +177,59 @@ const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = {
        [rxrpc_recvmsg_to_be_accepted]  = "TBAC",
        [rxrpc_recvmsg_return]          = "RETN",
 };
+
+const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = {
+       [rxrpc_rtt_tx_ping]             = "PING",
+       [rxrpc_rtt_tx_data]             = "DATA",
+};
+
+const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = {
+       [rxrpc_rtt_rx_ping_response]    = "PONG",
+       [rxrpc_rtt_rx_requested_ack]    = "RACK",
+};
+
+const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = {
+       [rxrpc_timer_begin]                     = "Begin ",
+       [rxrpc_timer_expired]                   = "*EXPR*",
+       [rxrpc_timer_init_for_reply]            = "IniRpl",
+       [rxrpc_timer_set_for_ack]               = "SetAck",
+       [rxrpc_timer_set_for_send]              = "SetTx ",
+       [rxrpc_timer_set_for_resend]            = "SetRTx",
+};
+
+const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = {
+       [rxrpc_propose_ack_client_tx_end]       = "ClTxEnd",
+       [rxrpc_propose_ack_input_data]          = "DataIn ",
+       [rxrpc_propose_ack_ping_for_lost_ack]   = "LostAck",
+       [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl",
+       [rxrpc_propose_ack_ping_for_params]     = "Params ",
+       [rxrpc_propose_ack_respond_to_ack]      = "Rsp2Ack",
+       [rxrpc_propose_ack_respond_to_ping]     = "Rsp2Png",
+       [rxrpc_propose_ack_retry_tx]            = "RetryTx",
+       [rxrpc_propose_ack_rotate_rx]           = "RxAck  ",
+       [rxrpc_propose_ack_terminal_ack]        = "ClTerm ",
+};
+
+const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = {
+       [rxrpc_propose_ack_use]                 = "",
+       [rxrpc_propose_ack_update]              = " Update",
+       [rxrpc_propose_ack_subsume]             = " Subsume",
+};
+
+const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10] = {
+       [RXRPC_CALL_SLOW_START]         = "SlowStart",
+       [RXRPC_CALL_CONGEST_AVOIDANCE]  = "CongAvoid",
+       [RXRPC_CALL_PACKET_LOSS]        = "PktLoss  ",
+       [RXRPC_CALL_FAST_RETRANSMIT]    = "FastReTx ",
+};
+
+const char rxrpc_congest_changes[rxrpc_congest__nr_change][9] = {
+       [rxrpc_cong_begin_retransmission]       = " Retrans",
+       [rxrpc_cong_cleared_nacks]              = " Cleared",
+       [rxrpc_cong_new_low_nack]               = " NewLowN",
+       [rxrpc_cong_no_change]                  = "",
+       [rxrpc_cong_progress]                   = " Progres",
+       [rxrpc_cong_retransmit_again]           = " ReTxAgn",
+       [rxrpc_cong_rtt_window_end]             = " RttWinE",
+       [rxrpc_cong_saw_nack]                   = " SawNack",
+};
index 16e18a9..cf43a71 100644 (file)
@@ -36,7 +36,9 @@ struct rxrpc_pkt_buffer {
  * Fill out an ACK packet.
  */
 static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
-                                struct rxrpc_pkt_buffer *pkt)
+                                struct rxrpc_pkt_buffer *pkt,
+                                rxrpc_seq_t *_hard_ack,
+                                rxrpc_seq_t *_top)
 {
        rxrpc_serial_t serial;
        rxrpc_seq_t hard_ack, top, seq;
@@ -48,6 +50,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
        serial = call->ackr_serial;
        hard_ack = READ_ONCE(call->rx_hard_ack);
        top = smp_load_acquire(&call->rx_top);
+       *_hard_ack = hard_ack;
+       *_top = top;
 
        pkt->ack.bufferSpace    = htons(8);
        pkt->ack.maxSkew        = htons(call->ackr_skew);
@@ -57,6 +61,9 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
        pkt->ack.reason         = call->ackr_reason;
        pkt->ack.nAcks          = top - hard_ack;
 
+       if (pkt->ack.reason == RXRPC_ACK_PING)
+               pkt->whdr.flags |= RXRPC_REQUEST_ACK;
+
        if (after(top, hard_ack)) {
                seq = hard_ack + 1;
                do {
@@ -77,9 +84,6 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
        pkt->ackinfo.rwind      = htonl(call->rx_winsize);
        pkt->ackinfo.jumbo_max  = htonl(jmax);
 
-       trace_rxrpc_tx_ack(call, hard_ack + 1, serial, call->ackr_reason,
-                          top - hard_ack);
-
        *ackp++ = 0;
        *ackp++ = 0;
        *ackp++ = 0;
@@ -96,7 +100,9 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
        struct msghdr msg;
        struct kvec iov[2];
        rxrpc_serial_t serial;
+       rxrpc_seq_t hard_ack, top;
        size_t len, n;
+       bool ping = false;
        int ioc, ret;
        u32 abort_code;
 
@@ -115,8 +121,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
                return -ENOMEM;
        }
 
-       serial = atomic_inc_return(&conn->serial);
-
        msg.msg_name    = &call->peer->srx.transport;
        msg.msg_namelen = call->peer->srx.transport_len;
        msg.msg_control = NULL;
@@ -127,7 +131,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
        pkt->whdr.cid           = htonl(call->cid);
        pkt->whdr.callNumber    = htonl(call->call_id);
        pkt->whdr.seq           = 0;
-       pkt->whdr.serial        = htonl(serial);
        pkt->whdr.type          = type;
        pkt->whdr.flags         = conn->out_clientflag;
        pkt->whdr.userStatus    = 0;
@@ -147,19 +150,14 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
                        ret = 0;
                        goto out;
                }
-               n = rxrpc_fill_out_ack(call, pkt);
+               ping = (call->ackr_reason == RXRPC_ACK_PING);
+               n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top);
                call->ackr_reason = 0;
 
                spin_unlock_bh(&call->lock);
 
-               _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
-                      serial,
-                      ntohs(pkt->ack.maxSkew),
-                      ntohl(pkt->ack.firstPacket),
-                      ntohl(pkt->ack.previousPacket),
-                      ntohl(pkt->ack.serial),
-                      rxrpc_acks(pkt->ack.reason),
-                      pkt->ack.nAcks);
+
+               pkt->whdr.flags |= RXRPC_SLOW_START_OK;
 
                iov[0].iov_len += sizeof(pkt->ack) + n;
                iov[1].iov_base = &pkt->ackinfo;
@@ -171,7 +169,6 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
        case RXRPC_PACKET_TYPE_ABORT:
                abort_code = call->abort_code;
                pkt->abort_code = htonl(abort_code);
-               _proto("Tx ABORT %%%u { %d }", serial, abort_code);
                iov[0].iov_len += sizeof(pkt->abort_code);
                len += sizeof(pkt->abort_code);
                ioc = 1;
@@ -183,19 +180,52 @@ int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
                goto out;
        }
 
+       serial = atomic_inc_return(&conn->serial);
+       pkt->whdr.serial = htonl(serial);
+       switch (type) {
+       case RXRPC_PACKET_TYPE_ACK:
+               trace_rxrpc_tx_ack(call, serial,
+                                  ntohl(pkt->ack.firstPacket),
+                                  ntohl(pkt->ack.serial),
+                                  pkt->ack.reason, pkt->ack.nAcks);
+               break;
+       }
+
+       if (ping) {
+               call->ackr_ping = serial;
+               smp_wmb();
+               /* We need to stick a time in before we send the packet in case
+                * the reply gets back before kernel_sendmsg() completes - but
+                * asking UDP to send the packet can take a relatively long
+                * time, so we update the time after, on the assumption that
+                * the packet transmission is more likely to happen towards the
+                * end of the kernel_sendmsg() call.
+                */
+               call->ackr_ping_time = ktime_get_real();
+               set_bit(RXRPC_CALL_PINGING, &call->flags);
+               trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
+       }
        ret = kernel_sendmsg(conn->params.local->socket,
                             &msg, iov, ioc, len);
+       if (ping)
+               call->ackr_ping_time = ktime_get_real();
 
-       if (ret < 0 && call->state < RXRPC_CALL_COMPLETE) {
-               switch (type) {
-               case RXRPC_PACKET_TYPE_ACK:
+       if (type == RXRPC_PACKET_TYPE_ACK &&
+           call->state < RXRPC_CALL_COMPLETE) {
+               if (ret < 0) {
+                       clear_bit(RXRPC_CALL_PINGING, &call->flags);
                        rxrpc_propose_ACK(call, pkt->ack.reason,
                                          ntohs(pkt->ack.maxSkew),
                                          ntohl(pkt->ack.serial),
-                                         true, true);
-                       break;
-               case RXRPC_PACKET_TYPE_ABORT:
-                       break;
+                                         true, true,
+                                         rxrpc_propose_ack_retry_tx);
+               } else {
+                       spin_lock_bh(&call->lock);
+                       if (after(hard_ack, call->ackr_consumed))
+                               call->ackr_consumed = hard_ack;
+                       if (after(top, call->ackr_seen))
+                               call->ackr_seen = top;
+                       spin_unlock_bh(&call->lock);
                }
        }
 
@@ -208,52 +238,100 @@ out:
 /*
  * send a packet through the transport endpoint
  */
-int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb)
 {
-       struct kvec iov[1];
+       struct rxrpc_connection *conn = call->conn;
+       struct rxrpc_wire_header whdr;
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
        struct msghdr msg;
+       struct kvec iov[2];
+       rxrpc_serial_t serial;
+       size_t len;
        int ret, opt;
 
        _enter(",{%d}", skb->len);
 
-       iov[0].iov_base = skb->head;
-       iov[0].iov_len = skb->len;
+       /* Each transmission of a Tx packet needs a new serial number */
+       serial = atomic_inc_return(&conn->serial);
+
+       whdr.epoch      = htonl(conn->proto.epoch);
+       whdr.cid        = htonl(call->cid);
+       whdr.callNumber = htonl(call->call_id);
+       whdr.seq        = htonl(sp->hdr.seq);
+       whdr.serial     = htonl(serial);
+       whdr.type       = RXRPC_PACKET_TYPE_DATA;
+       whdr.flags      = sp->hdr.flags;
+       whdr.userStatus = 0;
+       whdr.securityIndex = call->security_ix;
+       whdr._rsvd      = htons(sp->hdr._rsvd);
+       whdr.serviceId  = htons(call->service_id);
+
+       iov[0].iov_base = &whdr;
+       iov[0].iov_len = sizeof(whdr);
+       iov[1].iov_base = skb->head;
+       iov[1].iov_len = skb->len;
+       len = iov[0].iov_len + iov[1].iov_len;
 
-       msg.msg_name = &conn->params.peer->srx.transport;
-       msg.msg_namelen = conn->params.peer->srx.transport_len;
+       msg.msg_name = &call->peer->srx.transport;
+       msg.msg_namelen = call->peer->srx.transport_len;
        msg.msg_control = NULL;
        msg.msg_controllen = 0;
        msg.msg_flags = 0;
 
+       /* If our RTT cache needs working on, request an ACK.  Also request
+        * ACKs if a DATA packet appears to have been lost.
+        */
+       if (call->cong_mode == RXRPC_CALL_FAST_RETRANSMIT ||
+           (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
+           ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+                        ktime_get_real()))
+               whdr.flags |= RXRPC_REQUEST_ACK;
+
        if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
                static int lose;
                if ((lose++ & 7) == 7) {
+                       trace_rxrpc_tx_data(call, sp->hdr.seq, serial,
+                                           whdr.flags, true);
                        rxrpc_lose_skb(skb, rxrpc_skb_tx_lost);
                        _leave(" = 0 [lose]");
                        return 0;
                }
        }
 
+       _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);
+
        /* send the packet with the don't fragment bit set if we currently
         * think it's small enough */
-       if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) {
-               down_read(&conn->params.local->defrag_sem);
-               /* send the packet by UDP
-                * - returns -EMSGSIZE if UDP would have to fragment the packet
-                *   to go out of the interface
-                *   - in which case, we'll have processed the ICMP error
-                *     message and update the peer record
-                */
-               ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
-                                    iov[0].iov_len);
-
-               up_read(&conn->params.local->defrag_sem);
-               if (ret == -EMSGSIZE)
-                       goto send_fragmentable;
-
-               _leave(" = %d [%u]", ret, conn->params.peer->maxdata);
-               return ret;
+       if (iov[1].iov_len >= call->peer->maxdata)
+               goto send_fragmentable;
+
+       down_read(&conn->params.local->defrag_sem);
+       /* send the packet by UDP
+        * - returns -EMSGSIZE if UDP would have to fragment the packet
+        *   to go out of the interface
+        *   - in which case, we'll have processed the ICMP error
+        *     message and update the peer record
+        */
+       ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+
+       up_read(&conn->params.local->defrag_sem);
+       if (ret == -EMSGSIZE)
+               goto send_fragmentable;
+
+done:
+       trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, false);
+       if (ret >= 0) {
+               ktime_t now = ktime_get_real();
+               skb->tstamp = now;
+               smp_wmb();
+               sp->hdr.serial = serial;
+               if (whdr.flags & RXRPC_REQUEST_ACK) {
+                       call->peer->rtt_last_req = now;
+                       trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
+               }
        }
+       _leave(" = %d [%u]", ret, call->peer->maxdata);
+       return ret;
 
 send_fragmentable:
        /* attempt to send this message with fragmentation enabled */
@@ -268,8 +346,8 @@ send_fragmentable:
                                        SOL_IP, IP_MTU_DISCOVER,
                                        (char *)&opt, sizeof(opt));
                if (ret == 0) {
-                       ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
-                                            iov[0].iov_len);
+                       ret = kernel_sendmsg(conn->params.local->socket, &msg,
+                                            iov, 2, len);
 
                        opt = IP_PMTUDISC_DO;
                        kernel_setsockopt(conn->params.local->socket, SOL_IP,
@@ -298,8 +376,7 @@ send_fragmentable:
        }
 
        up_write(&conn->params.local->defrag_sem);
-       _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata);
-       return ret;
+       goto done;
 }
 
 /*
index 18276e7..bf13b84 100644 (file)
@@ -305,3 +305,44 @@ void rxrpc_peer_error_distributor(struct work_struct *work)
        rxrpc_put_peer(peer);
        _leave("");
 }
+
+/*
+ * Add RTT information to cache.  This is called in softirq mode and has
+ * exclusive access to the peer RTT data.
+ */
+void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+                       rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
+                       ktime_t send_time, ktime_t resp_time)
+{
+       struct rxrpc_peer *peer = call->peer;
+       s64 rtt;
+       u64 sum = peer->rtt_sum, avg;
+       u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage;
+
+       rtt = ktime_to_ns(ktime_sub(resp_time, send_time));
+       if (rtt < 0)
+               return;
+
+       /* Replace the oldest datum in the RTT buffer */
+       sum -= peer->rtt_cache[cursor];
+       sum += rtt;
+       peer->rtt_cache[cursor] = rtt;
+       peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1);
+       peer->rtt_sum = sum;
+       if (usage < RXRPC_RTT_CACHE_SIZE) {
+               usage++;
+               peer->rtt_usage = usage;
+       }
+
+       /* Now recalculate the average */
+       if (usage == RXRPC_RTT_CACHE_SIZE) {
+               avg = sum / RXRPC_RTT_CACHE_SIZE;
+       } else {
+               avg = sum;
+               do_div(avg, usage);
+       }
+
+       peer->rtt = avg;
+       trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt,
+                          usage, avg);
+}
index f3e5766..941b724 100644 (file)
@@ -244,6 +244,7 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key)
        peer->hash_key = hash_key;
        rxrpc_assess_MTU_size(peer);
        peer->mtu = peer->if_mtu;
+       peer->rtt_last_req = ktime_get_real();
 
        switch (peer->srx.transport.family) {
        case AF_INET:
index 6ba4af5..038ae62 100644 (file)
@@ -133,7 +133,7 @@ static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx,
 /*
  * End the packet reception phase.
  */
-static void rxrpc_end_rx_phase(struct rxrpc_call *call)
+static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
 {
        _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]);
 
@@ -141,10 +141,9 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call)
        ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
 
        if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
-               rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, true, false);
+               rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
+                                 rxrpc_propose_ack_terminal_ack);
                rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
-       } else {
-               rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, false);
        }
 
        write_lock_bh(&call->state_lock);
@@ -202,8 +201,19 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
 
        _debug("%u,%u,%02x", hard_ack, top, flags);
        trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack);
-       if (flags & RXRPC_LAST_PACKET)
-               rxrpc_end_rx_phase(call);
+       if (flags & RXRPC_LAST_PACKET) {
+               rxrpc_end_rx_phase(call, serial);
+       } else {
+               /* Check to see if there's an ACK that needs sending. */
+               if (after_eq(hard_ack, call->ackr_consumed + 2) ||
+                   after_eq(top, call->ackr_seen + 2) ||
+                   (hard_ack == top && after(hard_ack, call->ackr_consumed)))
+                       rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial,
+                                         true, false,
+                                         rxrpc_propose_ack_rotate_rx);
+               if (call->ackr_reason)
+                       rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+       }
 }
 
 /*
index ae39255..88d080a 100644 (file)
@@ -80,12 +80,10 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
        case RXRPC_SECURITY_AUTH:
                conn->size_align = 8;
                conn->security_size = sizeof(struct rxkad_level1_hdr);
-               conn->header_size += sizeof(struct rxkad_level1_hdr);
                break;
        case RXRPC_SECURITY_ENCRYPT:
                conn->size_align = 8;
                conn->security_size = sizeof(struct rxkad_level2_hdr);
-               conn->header_size += sizeof(struct rxkad_level2_hdr);
                break;
        default:
                ret = -EKEYREJECTED;
@@ -161,7 +159,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
 
        _enter("");
 
-       check = sp->hdr.seq ^ sp->hdr.callNumber;
+       check = sp->hdr.seq ^ call->call_id;
        data_size |= (u32)check << 16;
 
        hdr.data_size = htonl(data_size);
@@ -205,7 +203,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 
        _enter("");
 
-       check = sp->hdr.seq ^ sp->hdr.callNumber;
+       check = sp->hdr.seq ^ call->call_id;
 
        rxkhdr.data_size = htonl(data_size | (u32)check << 16);
        rxkhdr.checksum = 0;
@@ -277,7 +275,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
        /* calculate the security checksum */
        x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
        x |= sp->hdr.seq & 0x3fffffff;
-       call->crypto_buf[0] = htonl(sp->hdr.callNumber);
+       call->crypto_buf[0] = htonl(call->call_id);
        call->crypto_buf[1] = htonl(x);
 
        sg_init_one(&sg, call->crypto_buf, 8);
index 6a39ee9..1f8040d 100644 (file)
@@ -45,7 +45,9 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
        for (;;) {
                set_current_state(TASK_INTERRUPTIBLE);
                ret = 0;
-               if (call->tx_top - call->tx_hard_ack < call->tx_winsize)
+               if (call->tx_top - call->tx_hard_ack <
+                   min_t(unsigned int, call->tx_winsize,
+                         call->cong_cwnd + call->cong_extra))
                        break;
                if (call->state >= RXRPC_CALL_COMPLETE) {
                        ret = -call->error;
@@ -94,25 +96,30 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
        rxrpc_seq_t seq = sp->hdr.seq;
        int ret, ix;
+       u8 annotation = RXRPC_TX_ANNO_UNACK;
 
        _net("queue skb %p [%d]", skb, seq);
 
        ASSERTCMP(seq, ==, call->tx_top + 1);
 
+       if (last)
+               annotation |= RXRPC_TX_ANNO_LAST;
+
+       /* We have to set the timestamp before queueing as the retransmit
+        * algorithm can see the packet as soon as we queue it.
+        */
+       skb->tstamp = ktime_get_real();
+
        ix = seq & RXRPC_RXTX_BUFF_MASK;
        rxrpc_get_skb(skb, rxrpc_skb_tx_got);
-       call->rxtx_annotations[ix] = RXRPC_TX_ANNO_UNACK;
+       call->rxtx_annotations[ix] = annotation;
        smp_wmb();
        call->rxtx_buffer[ix] = skb;
        call->tx_top = seq;
-       if (last) {
-               set_bit(RXRPC_CALL_TX_LAST, &call->flags);
+       if (last)
                trace_rxrpc_transmit(call, rxrpc_transmit_queue_last);
-       } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
-               trace_rxrpc_transmit(call, rxrpc_transmit_queue_reqack);
-       } else {
+       else
                trace_rxrpc_transmit(call, rxrpc_transmit_queue);
-       }
 
        if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
                _debug("________awaiting reply/ACK__________");
@@ -134,45 +141,28 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
                write_unlock_bh(&call->state_lock);
        }
 
-       _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
-
        if (seq == 1 && rxrpc_is_client_call(call))
                rxrpc_expose_client_call(call);
 
-       sp->resend_at = jiffies + rxrpc_resend_timeout;
-       ret = rxrpc_send_data_packet(call->conn, skb);
+       ret = rxrpc_send_data_packet(call, skb);
        if (ret < 0) {
                _debug("need instant resend %d", ret);
                rxrpc_instant_resend(call, ix);
+       } else {
+               unsigned long resend_at;
+
+               resend_at = jiffies + msecs_to_jiffies(rxrpc_resend_timeout);
+
+               if (time_before(resend_at, call->resend_at)) {
+                       call->resend_at = resend_at;
+                       rxrpc_set_timer(call, rxrpc_timer_set_for_send);
+               }
        }
 
        rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
        _leave("");
 }
 
-/*
- * Convert a host-endian header into a network-endian header.
- */
-static void rxrpc_insert_header(struct sk_buff *skb)
-{
-       struct rxrpc_wire_header whdr;
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
-       whdr.epoch      = htonl(sp->hdr.epoch);
-       whdr.cid        = htonl(sp->hdr.cid);
-       whdr.callNumber = htonl(sp->hdr.callNumber);
-       whdr.seq        = htonl(sp->hdr.seq);
-       whdr.serial     = htonl(sp->hdr.serial);
-       whdr.type       = sp->hdr.type;
-       whdr.flags      = sp->hdr.flags;
-       whdr.userStatus = sp->hdr.userStatus;
-       whdr.securityIndex = sp->hdr.securityIndex;
-       whdr._rsvd      = htons(sp->hdr._rsvd);
-       whdr.serviceId  = htons(sp->hdr.serviceId);
-
-       memcpy(skb->head, &whdr, sizeof(whdr));
-}
-
 /*
  * send data through a socket
  * - must be called in process context
@@ -205,13 +195,18 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
        copied = 0;
        do {
+               /* Check to see if there's a ping ACK to reply to. */
+               if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
+                       rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+
                if (!skb) {
                        size_t size, chunk, max, space;
 
                        _debug("alloc");
 
                        if (call->tx_top - call->tx_hard_ack >=
-                           call->tx_winsize) {
+                           min_t(unsigned int, call->tx_winsize,
+                                 call->cong_cwnd + call->cong_extra)) {
                                ret = -EAGAIN;
                                if (msg->msg_flags & MSG_DONTWAIT)
                                        goto maybe_error;
@@ -232,7 +227,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
                        space = chunk + call->conn->size_align;
                        space &= ~(call->conn->size_align - 1UL);
 
-                       size = space + call->conn->header_size;
+                       size = space + call->conn->security_size;
 
                        _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
 
@@ -248,9 +243,9 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
                        ASSERTCMP(skb->mark, ==, 0);
 
-                       _debug("HS: %u", call->conn->header_size);
-                       skb_reserve(skb, call->conn->header_size);
-                       skb->len += call->conn->header_size;
+                       _debug("HS: %u", call->conn->security_size);
+                       skb_reserve(skb, call->conn->security_size);
+                       skb->len += call->conn->security_size;
 
                        sp = rxrpc_skb(skb);
                        sp->remain = chunk;
@@ -312,33 +307,21 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
                        seq = call->tx_top + 1;
 
-                       sp->hdr.epoch   = conn->proto.epoch;
-                       sp->hdr.cid     = call->cid;
-                       sp->hdr.callNumber = call->call_id;
                        sp->hdr.seq     = seq;
-                       sp->hdr.serial  = atomic_inc_return(&conn->serial);
-                       sp->hdr.type    = RXRPC_PACKET_TYPE_DATA;
-                       sp->hdr.userStatus = 0;
-                       sp->hdr.securityIndex = call->security_ix;
                        sp->hdr._rsvd   = 0;
-                       sp->hdr.serviceId = call->service_id;
+                       sp->hdr.flags   = conn->out_clientflag;
 
-                       sp->hdr.flags = conn->out_clientflag;
                        if (msg_data_left(msg) == 0 && !more)
                                sp->hdr.flags |= RXRPC_LAST_PACKET;
                        else if (call->tx_top - call->tx_hard_ack <
                                 call->tx_winsize)
                                sp->hdr.flags |= RXRPC_MORE_PACKETS;
-                       if (more && seq & 1)
-                               sp->hdr.flags |= RXRPC_REQUEST_ACK;
 
                        ret = conn->security->secure_packet(
-                               call, skb, skb->mark,
-                               skb->head + sizeof(struct rxrpc_wire_header));
+                               call, skb, skb->mark, skb->head);
                        if (ret < 0)
                                goto out;
 
-                       rxrpc_insert_header(skb);
                        rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
                        skb = NULL;
                }
index a03c61c..13d1df0 100644 (file)
@@ -59,7 +59,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
                .data           = &rxrpc_resend_timeout,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_ms_jiffies,
+               .proc_handler   = proc_dointvec,
                .extra1         = (void *)&one,
        },
        {
index a4ce39b..455fc8f 100644 (file)
@@ -559,7 +559,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
        return 0;
 
 errout:
-       tcf_exts_destroy(&f->exts);
+       if (f)
+               tcf_exts_destroy(&f->exts);
        kfree(f);
        return err;
 }
index 5dd929c..18e7524 100644 (file)
@@ -86,6 +86,7 @@ struct fq_sched_data {
 
        struct rb_root  delayed;        /* for rate limited flows */
        u64             time_next_delayed_flow;
+       unsigned long   unthrottle_latency_ns;
 
        struct fq_flow  internal;       /* for non classified or high prio packets */
        u32             quantum;
@@ -408,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 static void fq_check_throttled(struct fq_sched_data *q, u64 now)
 {
+       unsigned long sample;
        struct rb_node *p;
 
        if (q->time_next_delayed_flow > now)
                return;
 
+       /* Update unthrottle latency EWMA.
+        * This is cheap and can help diagnosing timer/latency problems.
+        */
+       sample = (unsigned long)(now - q->time_next_delayed_flow);
+       q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
+       q->unthrottle_latency_ns += sample >> 3;
+
        q->time_next_delayed_flow = ~0ULL;
        while ((p = rb_first(&q->delayed)) != NULL) {
                struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
@@ -515,7 +524,12 @@ begin:
                        len = NSEC_PER_SEC;
                        q->stat_pkts_too_long++;
                }
-
+               /* Account for schedule/timers drifts.
+                * f->time_next_packet was set when prior packet was sent,
+                * and current time (@now) can be too late by tens of us.
+                */
+               if (f->time_next_packet)
+                       len -= min(len/2, now - f->time_next_packet);
                f->time_next_packet = now + len;
        }
 out:
@@ -787,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
        q->initial_quantum      = 10 * psched_mtu(qdisc_dev(sch));
        q->flow_refill_delay    = msecs_to_jiffies(40);
        q->flow_max_rate        = ~0U;
+       q->time_next_delayed_flow = ~0ULL;
        q->rate_enable          = 1;
        q->new_flows.first      = NULL;
        q->old_flows.first      = NULL;
@@ -854,8 +869,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
        st.flows                  = q->flows;
        st.inactive_flows         = q->inactive_flows;
        st.throttled_flows        = q->throttled_flows;
-       st.pad                    = 0;
-
+       st.unthrottle_latency_ns  = min_t(unsigned long,
+                                         q->unthrottle_latency_ns, ~0U);
        sch_tree_unlock(sch);
 
        return gnet_stats_copy_app(d, &st, sizeof(st));
index a1d8506..a2ea1d1 100644 (file)
@@ -796,27 +796,34 @@ struct sctp_hash_cmp_arg {
 static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
                                const void *ptr)
 {
+       struct sctp_transport *t = (struct sctp_transport *)ptr;
        const struct sctp_hash_cmp_arg *x = arg->key;
-       const struct sctp_transport *t = ptr;
-       struct sctp_association *asoc = t->asoc;
-       const struct net *net = x->net;
+       struct sctp_association *asoc;
+       int err = 1;
 
        if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
-               return 1;
-       if (!net_eq(sock_net(asoc->base.sk), net))
-               return 1;
+               return err;
+       if (!sctp_transport_hold(t))
+               return err;
+
+       asoc = t->asoc;
+       if (!net_eq(sock_net(asoc->base.sk), x->net))
+               goto out;
        if (x->ep) {
                if (x->ep != asoc->ep)
-                       return 1;
+                       goto out;
        } else {
                if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
-                       return 1;
+                       goto out;
                if (!sctp_bind_addr_match(&asoc->base.bind_addr,
                                          x->laddr, sctp_sk(asoc->base.sk)))
-                       return 1;
+                       goto out;
        }
 
-       return 0;
+       err = 0;
+out:
+       sctp_transport_put(t);
+       return err;
 }
 
 static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
index 8c3f446..3ec6da8 100644 (file)
@@ -1719,7 +1719,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn)
 {
        int i;
        sctp_sack_variable_t *frags;
-       __u16 gap;
+       __u16 tsn_offset, blocks;
        __u32 ctsn = ntohl(sack->cum_tsn_ack);
 
        if (TSN_lte(tsn, ctsn))
@@ -1738,10 +1738,11 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn)
         */
 
        frags = sack->variable;
-       gap = tsn - ctsn;
-       for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) {
-               if (TSN_lte(ntohs(frags[i].gab.start), gap) &&
-                   TSN_lte(gap, ntohs(frags[i].gab.end)))
+       blocks = ntohs(sack->num_gap_ack_blocks);
+       tsn_offset = tsn - ctsn;
+       for (i = 0; i < blocks; ++i) {
+               if (tsn_offset >= ntohs(frags[i].gab.start) &&
+                   tsn_offset <= ntohs(frags[i].gab.end))
                        goto pass;
        }
 
index 1d28181..d858202 100644 (file)
@@ -569,9 +569,10 @@ gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle)
        struct rsc *found;
 
        memset(&rsci, 0, sizeof(rsci));
-       rsci.handle.data = handle->data;
-       rsci.handle.len = handle->len;
+       if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
+               return NULL;
        found = rsc_lookup(cd, &rsci);
+       rsc_free(&rsci);
        if (!found)
                return NULL;
        if (cache_check(cd, &found->h, NULL))
index 536d0be..799cce6 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/slab.h>
 #include <linux/prefetch.h>
 #include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/svc_rdma.h>
 #include <asm/bitops.h>
 #include <linux/module.h> /* try_module_get()/module_put() */
 
@@ -923,7 +924,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
        }
 
        INIT_LIST_HEAD(&buf->rb_recv_bufs);
-       for (i = 0; i < buf->rb_max_requests; i++) {
+       for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
                struct rpcrdma_rep *rep;
 
                rep = rpcrdma_create_rep(r_xprt);
@@ -1018,6 +1019,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
                rep = rpcrdma_buffer_get_rep_locked(buf);
                rpcrdma_destroy_rep(ia, rep);
        }
+       buf->rb_send_count = 0;
 
        spin_lock(&buf->rb_reqslock);
        while (!list_empty(&buf->rb_allreqs)) {
@@ -1032,6 +1034,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
                spin_lock(&buf->rb_reqslock);
        }
        spin_unlock(&buf->rb_reqslock);
+       buf->rb_recv_count = 0;
 
        rpcrdma_destroy_mrs(buf);
 }
@@ -1074,8 +1077,27 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
        spin_unlock(&buf->rb_mwlock);
 }
 
+static struct rpcrdma_rep *
+rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers)
+{
+       /* If an RPC previously completed without a reply (say, a
+        * credential problem or a soft timeout occurs) then hold off
+        * on supplying more Receive buffers until the number of new
+        * pending RPCs catches up to the number of posted Receives.
+        */
+       if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
+               return NULL;
+
+       if (unlikely(list_empty(&buffers->rb_recv_bufs)))
+               return NULL;
+       buffers->rb_recv_count++;
+       return rpcrdma_buffer_get_rep_locked(buffers);
+}
+
 /*
  * Get a set of request/reply buffers.
+ *
+ * Reply buffer (if available) is attached to send buffer upon return.
  */
 struct rpcrdma_req *
 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
@@ -1085,21 +1107,15 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
        spin_lock(&buffers->rb_lock);
        if (list_empty(&buffers->rb_send_bufs))
                goto out_reqbuf;
+       buffers->rb_send_count++;
        req = rpcrdma_buffer_get_req_locked(buffers);
-       if (list_empty(&buffers->rb_recv_bufs))
-               goto out_repbuf;
-       req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+       req->rl_reply = rpcrdma_buffer_get_rep(buffers);
        spin_unlock(&buffers->rb_lock);
        return req;
 
 out_reqbuf:
        spin_unlock(&buffers->rb_lock);
-       pr_warn("rpcrdma: out of request buffers (%p)\n", buffers);
-       return NULL;
-out_repbuf:
-       list_add(&req->rl_free, &buffers->rb_send_bufs);
-       spin_unlock(&buffers->rb_lock);
-       pr_warn("rpcrdma: out of reply buffers (%p)\n", buffers);
+       pr_warn("RPC:       %s: out of request buffers\n", __func__);
        return NULL;
 }
 
@@ -1117,9 +1133,12 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
        req->rl_reply = NULL;
 
        spin_lock(&buffers->rb_lock);
+       buffers->rb_send_count--;
        list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
-       if (rep)
+       if (rep) {
+               buffers->rb_recv_count--;
                list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+       }
        spin_unlock(&buffers->rb_lock);
 }
 
@@ -1133,8 +1152,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
        struct rpcrdma_buffer *buffers = req->rl_buffer;
 
        spin_lock(&buffers->rb_lock);
-       if (!list_empty(&buffers->rb_recv_bufs))
-               req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+       req->rl_reply = rpcrdma_buffer_get_rep(buffers);
        spin_unlock(&buffers->rb_lock);
 }
 
@@ -1148,6 +1166,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
        struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
 
        spin_lock(&buffers->rb_lock);
+       buffers->rb_recv_count--;
        list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
        spin_unlock(&buffers->rb_lock);
 }
index 670fad5..a71b0f5 100644 (file)
@@ -321,6 +321,7 @@ struct rpcrdma_buffer {
        char                    *rb_pool;
 
        spinlock_t              rb_lock;        /* protect buf lists */
+       int                     rb_send_count, rb_recv_count;
        struct list_head        rb_send_bufs;
        struct list_head        rb_recv_bufs;
        u32                     rb_max_requests;
index 8ede3bc..bf16883 100644 (file)
@@ -1074,7 +1074,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
                skb = skb_recv_datagram(sk, 0, 1, &err);
                if (skb != NULL) {
                        xs_udp_data_read_skb(&transport->xprt, sk, skb);
-                       skb_free_datagram(sk, skb);
+                       skb_free_datagram_locked(sk, skb);
                        continue;
                }
                if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
index 887c4c1..fd111e2 100644 (file)
@@ -7014,7 +7014,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 
                params.n_counter_offsets_presp = len / sizeof(u16);
                if (rdev->wiphy.max_num_csa_counters &&
-                   (params.n_counter_offsets_beacon >
+                   (params.n_counter_offsets_presp >
                     rdev->wiphy.max_num_csa_counters))
                        return -EINVAL;
 
index ba8bf51..419bf5d 100644 (file)
@@ -350,6 +350,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 {
        tasklet_hrtimer_cancel(&x->mtimer);
        del_timer_sync(&x->rtimer);
+       kfree(x->aead);
        kfree(x->aalg);
        kfree(x->ealg);
        kfree(x->calg);
@@ -1431,9 +1432,9 @@ xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32
 {
        struct xfrm_state *x;
 
-       spin_lock_bh(&net->xfrm.xfrm_state_lock);
+       rcu_read_lock();
        x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
-       spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+       rcu_read_unlock();
        return x;
 }
 EXPORT_SYMBOL(xfrm_state_lookup);
index cb65d91..0889209 100644 (file)
@@ -581,9 +581,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
        if (err)
                goto error;
 
-       if (attrs[XFRMA_SEC_CTX] &&
-           security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX])))
-               goto error;
+       if (attrs[XFRMA_SEC_CTX]) {
+               err = security_xfrm_state_alloc(x,
+                                               nla_data(attrs[XFRMA_SEC_CTX]));
+               if (err)
+                       goto error;
+       }
 
        if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
                                               attrs[XFRMA_REPLAY_ESN_VAL])))
diff --git a/scripts/faddr2line b/scripts/faddr2line
new file mode 100755 (executable)
index 0000000..450b332
--- /dev/null
@@ -0,0 +1,177 @@
+#!/bin/bash
+#
+# Translate stack dump function offsets.
+#
+# addr2line doesn't work with KASLR addresses.  This works similarly to
+# addr2line, but instead takes the 'func+0x123' format as input:
+#
+#   $ ./scripts/faddr2line ~/k/vmlinux meminfo_proc_show+0x5/0x568
+#   meminfo_proc_show+0x5/0x568:
+#   meminfo_proc_show at fs/proc/meminfo.c:27
+#
+# If the address is part of an inlined function, the full inline call chain is
+# printed:
+#
+#   $ ./scripts/faddr2line ~/k/vmlinux native_write_msr+0x6/0x27
+#   native_write_msr+0x6/0x27:
+#   arch_static_branch at arch/x86/include/asm/msr.h:121
+#    (inlined by) static_key_false at include/linux/jump_label.h:125
+#    (inlined by) native_write_msr at arch/x86/include/asm/msr.h:125
+#
+# The function size after the '/' in the input is optional, but recommended.
+# It's used to help disambiguate any duplicate symbol names, which can occur
+# rarely.  If the size is omitted for a duplicate symbol then it's possible for
+# multiple code sites to be printed:
+#
+#   $ ./scripts/faddr2line ~/k/vmlinux raw_ioctl+0x5
+#   raw_ioctl+0x5/0x20:
+#   raw_ioctl at drivers/char/raw.c:122
+#
+#   raw_ioctl+0x5/0xb1:
+#   raw_ioctl at net/ipv4/raw.c:876
+#
+# Multiple addresses can be specified on a single command line:
+#
+#   $ ./scripts/faddr2line ~/k/vmlinux type_show+0x10/45 free_reserved_area+0x90
+#   type_show+0x10/0x2d:
+#   type_show at drivers/video/backlight/backlight.c:213
+#
+#   free_reserved_area+0x90/0x123:
+#   free_reserved_area at mm/page_alloc.c:6429 (discriminator 2)
+
+
+set -o errexit
+set -o nounset
+
+command -v awk >/dev/null 2>&1 || die "awk isn't installed"
+command -v readelf >/dev/null 2>&1 || die "readelf isn't installed"
+command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed"
+
+usage() {
+       echo "usage: faddr2line <object file> <func+offset> <func+offset>..." >&2
+       exit 1
+}
+
+warn() {
+       echo "$1" >&2
+}
+
+die() {
+       echo "ERROR: $1" >&2
+       exit 1
+}
+
+# Try to figure out the source directory prefix so we can remove it from the
+# addr2line output.  HACK ALERT: This assumes that start_kernel() is in
+# kernel/init.c!  This only works for vmlinux.  Otherwise it falls back to
+# printing the absolute path.
+find_dir_prefix() {
+       local objfile=$1
+
+       local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
+       [[ -z $start_kernel_addr ]] && return
+
+       local file_line=$(addr2line -e $objfile $start_kernel_addr)
+       [[ -z $file_line ]] && return
+
+       local prefix=${file_line%init/main.c:*}
+       if [[ -z $prefix ]] || [[ $prefix = $file_line ]]; then
+               return
+       fi
+
+       DIR_PREFIX=$prefix
+       return 0
+}
+
+__faddr2line() {
+       local objfile=$1
+       local func_addr=$2
+       local dir_prefix=$3
+       local print_warnings=$4
+
+       local func=${func_addr%+*}
+       local offset=${func_addr#*+}
+       offset=${offset%/*}
+       local size=
+       [[ $func_addr =~ "/" ]] && size=${func_addr#*/}
+
+       if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then
+               warn "bad func+offset $func_addr"
+               DONE=1
+               return
+       fi
+
+       # Go through each of the object's symbols which match the func name.
+       # In rare cases there might be duplicates.
+       while read symbol; do
+               local fields=($symbol)
+               local sym_base=0x${fields[1]}
+               local sym_size=${fields[2]}
+               local sym_type=${fields[3]}
+
+               # calculate the address
+               local addr=$(($sym_base + $offset))
+               if [[ -z $addr ]] || [[ $addr = 0 ]]; then
+                       warn "bad address: $sym_base + $offset"
+                       DONE=1
+                       return
+               fi
+               local hexaddr=0x$(printf %x $addr)
+
+               # weed out non-function symbols
+               if [[ $sym_type != "FUNC" ]]; then
+                       [[ $print_warnings = 1 ]] &&
+                               echo "skipping $func address at $hexaddr due to non-function symbol"
+                       continue
+               fi
+
+               # if the user provided a size, make sure it matches the symbol's size
+               if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
+                       [[ $print_warnings = 1 ]] &&
+                               echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)"
+                       continue;
+               fi
+
+               # make sure the provided offset is within the symbol's range
+               if [[ $offset -gt $sym_size ]]; then
+                       [[ $print_warnings = 1 ]] &&
+                               echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)"
+                       continue
+               fi
+
+               # separate multiple entries with a blank line
+               [[ $FIRST = 0 ]] && echo
+               FIRST=0
+
+               local hexsize=0x$(printf %x $sym_size)
+               echo "$func+$offset/$hexsize:"
+               addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;"
+               DONE=1
+
+       done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}')
+}
+
+[[ $# -lt 2 ]] && usage
+
+objfile=$1
+[[ ! -f $objfile ]] && die "can't find objfile $objfile"
+shift
+
+DIR_PREFIX=supercalifragilisticexpialidocious
+find_dir_prefix $objfile
+
+FIRST=1
+while [[ $# -gt 0 ]]; do
+       func_addr=$1
+       shift
+
+       # print any matches found
+       DONE=0
+       __faddr2line $objfile $func_addr $DIR_PREFIX 0
+
+       # if no match was found, print warnings
+       if [[ $DONE = 0 ]]; then
+               __faddr2line $objfile $func_addr $DIR_PREFIX 1
+               warn "no match for $func_addr"
+       fi
+done
index d9836c5..11c8d9b 100644 (file)
@@ -3266,6 +3266,9 @@ int main(int argc, char *argv[])
                }
        }
 
+       /* If we exit via err(), this kills all the threads, restores tty. */
+       atexit(cleanup_devices);
+
        /* We always have a console device, and it's always device 1. */
        setup_console();
 
@@ -3369,9 +3372,6 @@ int main(int argc, char *argv[])
        /* Ensure that we terminate if a device-servicing child dies. */
        signal(SIGCHLD, kill_launcher);
 
-       /* If we exit via err(), this kills all the threads, restores tty. */
-       atexit(cleanup_devices);
-
        /* If requested, chroot to a directory */
        if (chroot_path) {
                if (chroot(chroot_path) != 0)