F: include/linux/altera_uart.h
F: include/linux/altera_jtaguart.h
+AMAZON ETHERNET DRIVERS
+M: Netanel Belgazal <netanel@annapurnalabs.com>
+R: Saeed Bishara <saeed@annapurnalabs.com>
+R: Zorik Machulsky <zorik@annapurnalabs.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: Documentation/networking/ena.txt
+F: drivers/net/ethernet/amazon/
+
AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
M: Tom Lendacky <thomas.lendacky@amd.com>
M: Gary Hook <gary.hook@amd.com>
ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
M: Kukjin Kim <kgene@kernel.org>
M: Krzysztof Kozlowski <krzk@kernel.org>
+ R: Javier Martinez Canillas <javier@osg.samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
S: Maintained
F: kernel/bpf/
BROADCOM B44 10/100 ETHERNET DRIVER
- M: Gary Zambrano <zambrano@broadcom.com>
+ M: Michael Chan <michael.chan@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/broadcom/b44.*
F: drivers/cpufreq/intel_pstate.c
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
- M: Maik Broemme <mbroemme@plusserver.de>
+ M: Maik Broemme <mbroemme@libmpq.org>
L: linux-fbdev@vger.kernel.org
S: Maintained
F: Documentation/fb/intelfb.txt
W: https://fedorahosted.org/dropwatch/
F: net/core/drop_monitor.c
+ NETWORKING [DSA]
+ M: Andrew Lunn <andrew@lunn.ch>
+ M: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ M: Florian Fainelli <f.fainelli@gmail.com>
+ S: Maintained
+ F: net/dsa/
+ F: include/net/dsa.h
+ F: drivers/net/dsa/
+
NETWORKING [GENERAL]
M: "David S. Miller" <davem@davemloft.net>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/wireless/ath/ath10k/
+QUALCOMM EMAC GIGABIT ETHERNET DRIVER
+M: Timur Tabi <timur@codeaurora.org>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/qualcomm/emac/
+
QUALCOMM HEXAGON ARCHITECTURE
M: Richard Kuo <rkuo@codeaurora.org>
L: linux-hexagon@vger.kernel.org
RHASHTABLE
M: Thomas Graf <tgraf@suug.ch>
+M: Herbert Xu <herbert@gondor.apana.org.au>
L: netdev@vger.kernel.org
S: Maintained
F: lib/rhashtable.c
USB SMSC95XX ETHERNET DRIVER
M: Steve Glendinning <steve.glendinning@shawell.net>
+M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/usb/smsc95xx.*
F: net/8021q/
VLYNQ BUS
- M: Florian Fainelli <florian@openwrt.org>
+ M: Florian Fainelli <f.fainelli@gmail.com>
L: openwrt-devel@lists.openwrt.org (subscribers-only)
S: Maintained
F: drivers/vlynq/vlynq.c
timer {
compatible = "arm,armv8-timer";
- interrupts = <1 0 0xff01>, /* Secure Phys IRQ */
- <1 13 0xff01>, /* Non-secure Phys IRQ */
- <1 14 0xff01>, /* Virt IRQ */
- <1 15 0xff01>; /* Hyp IRQ */
+ interrupts = <1 0 0xff08>, /* Secure Phys IRQ */
+ <1 13 0xff08>, /* Non-secure Phys IRQ */
+ <1 14 0xff08>, /* Virt IRQ */
+ <1 15 0xff08>; /* Hyp IRQ */
clock-frequency = <50000000>;
};
/* mac address will be overwritten by the bootloader */
local-mac-address = [00 00 00 00 00 00];
phy-connection-type = "rgmii";
- phy-handle = <&menet0phy>,<&menetphy>;
+ phy-handle = <&menetphy>,<&menet0phy>;
mdio {
compatible = "apm,xgene-mdio";
#address-cells = <1>;
#include <rdma/ib_addr.h>
+#include <libcxgb_cm.h>
#include "iw_cxgb4.h"
#include "clip_tbl.h"
static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
{
- struct cpl_tid_release *req;
+ u32 len = roundup(sizeof(struct cpl_tid_release), 16);
- skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+ skb = get_skb(skb, len, GFP_KERNEL);
if (!skb)
return;
- req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
- INIT_TP_WR(req, hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
- set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
+
+ cxgb_mk_tid_release(skb, len, hwtid, 0);
c4iw_ofld_send(rdev, skb);
return;
}
spin_lock_irqsave(&ep->com.dev->lock, flags);
_remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
+ if (idr_is_empty(&ep->com.dev->hwtid_idr))
+ wake_up(&ep->com.dev->wait);
spin_unlock_irqrestore(&ep->com.dev->lock, flags);
}
return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
}
-static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
-{
- int i;
-
- egress_dev = get_real_dev(egress_dev);
- for (i = 0; i < dev->rdev.lldi.nports; i++)
- if (dev->rdev.lldi.ports[i] == egress_dev)
- return 1;
- return 0;
-}
-
-static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
- __u8 *peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos,
- __u32 sin6_scope_id)
-{
- struct dst_entry *dst = NULL;
-
- if (IS_ENABLED(CONFIG_IPV6)) {
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- memcpy(&fl6.daddr, peer_ip, 16);
- memcpy(&fl6.saddr, local_ip, 16);
- if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
- fl6.flowi6_oif = sin6_scope_id;
- dst = ip6_route_output(&init_net, NULL, &fl6);
- if (!dst)
- goto out;
- if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
- !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
- dst_release(dst);
- dst = NULL;
- }
- }
-
-out:
- return dst;
-}
-
-static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
- __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos)
-{
- struct rtable *rt;
- struct flowi4 fl4;
- struct neighbour *n;
-
- rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
- peer_port, local_port, IPPROTO_TCP,
- tos, 0);
- if (IS_ERR(rt))
- return NULL;
- n = dst_neigh_lookup(&rt->dst, &peer_ip);
- if (!n)
- return NULL;
- if (!our_interface(dev, n->dev) &&
- !(n->dev->flags & IFF_LOOPBACK)) {
- neigh_release(n);
- dst_release(&rt->dst);
- return NULL;
- }
- neigh_release(n);
- return &rt->dst;
-}
-
static void arp_failure_discard(void *handle, struct sk_buff *skb)
{
pr_err(MOD "ARP failure\n");
static int send_halfclose(struct c4iw_ep *ep)
{
- struct cpl_close_con_req *req;
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
- int wrlen = roundup(sizeof *req, 16);
+ u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!skb))
return -ENOMEM;
- set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
- req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
- memset(req, 0, wrlen);
- INIT_TP_WR(req, ep->hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
- ep->hwtid));
+ cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
+ NULL, arp_failure_discard);
+
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
static int send_abort(struct c4iw_ep *ep)
{
- struct cpl_abort_req *req;
- int wrlen = roundup(sizeof *req, 16);
+ u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!req_skb))
return -ENOMEM;
- set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
- req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
- memset(req, 0, wrlen);
- INIT_TP_WR(req, ep->hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
- req->cmd = CPL_ABORT_SEND_RST;
- return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
-}
+ cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
+ ep, abort_arp_failure);
-static void best_mtu(const unsigned short *mtus, unsigned short mtu,
- unsigned int *idx, int use_ts, int ipv6)
-{
- unsigned short hdr_size = (ipv6 ?
- sizeof(struct ipv6hdr) :
- sizeof(struct iphdr)) +
- sizeof(struct tcphdr) +
- (use_ts ?
- round_up(TCPOLEN_TIMESTAMP, 4) : 0);
- unsigned short data_size = mtu - hdr_size;
-
- cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+ return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
}
static int send_connect(struct c4iw_ep *ep)
u64 opt0;
u32 opt2;
unsigned int mtu_idx;
- int wscale;
+ u32 wscale;
int win, sizev4, sizev6, wrlen;
struct sockaddr_in *la = (struct sockaddr_in *)
&ep->com.local_addr;
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
- best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps,
- (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- wscale = compute_wscale(rcv_win);
+ cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps,
+ (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ wscale = cxgb_compute_wscale(rcv_win);
/*
* Specify the largest window that will fit in opt0. The
static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
{
- struct cpl_rx_data_ack *req;
struct sk_buff *skb;
- int wrlen = roundup(sizeof *req, 16);
+ u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
+ u32 credit_dack;
PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
- req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
- memset(req, 0, wrlen);
- INIT_TP_WR(req, ep->hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
- ep->hwtid));
- req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
- RX_DACK_CHANGE_F |
- RX_DACK_MODE_V(dack_mode));
- set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
+ credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
+ RX_DACK_MODE_V(dack_mode);
+
+ cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
+ credit_dack);
+
c4iw_ofld_send(&ep->com.dev->rdev, skb);
return credits;
}
struct sk_buff *skb;
struct fw_ofld_connection_wr *req;
unsigned int mtu_idx;
- int wscale;
+ u32 wscale;
struct sockaddr_in *sin;
int win;
htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
req->tcb.tx_max = (__force __be32) jiffies;
req->tcb.rcv_adv = htons(1);
- best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps,
- (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- wscale = compute_wscale(rcv_win);
+ cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps,
+ (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ wscale = cxgb_compute_wscale(rcv_win);
/*
* Specify the largest window that will fit in opt0. The
status != CPL_ERR_CONN_EXIST);
}
-/* Returns whether a CPL status conveys negative advice.
- */
-static int is_neg_adv(unsigned int status)
-{
- return status == CPL_ERR_RTX_NEG_ADVICE ||
- status == CPL_ERR_PERSIST_NEG_ADVICE ||
- status == CPL_ERR_KEEPALV_NEG_ADVICE;
-}
-
static char *neg_adv_str(unsigned int status)
{
switch (status) {
}
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
n, pdev, rt_tos2priority(tos));
- if (!ep->l2t)
+ if (!ep->l2t) {
+ dev_put(pdev);
goto out;
+ }
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
/* find a route */
if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
- ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
- raddr->sin_addr.s_addr, laddr->sin_port,
- raddr->sin_port, ep->com.cm_id->tos);
+ ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
+ laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr,
+ laddr->sin_port,
+ raddr->sin_port, ep->com.cm_id->tos);
iptype = 4;
ra = (__u8 *)&raddr->sin_addr;
} else {
- ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
- raddr6->sin6_addr.s6_addr,
- laddr6->sin6_port, raddr6->sin6_port, 0,
- raddr6->sin6_scope_id);
+ ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
+ get_real_dev,
+ laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port,
+ raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
iptype = 6;
ra = (__u8 *)&raddr6->sin6_addr;
}
PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
status, status2errno(status));
- if (is_neg_adv(status)) {
+ if (cxgb_is_neg_adv(status)) {
PDBG("%s Connection problems for atid %u status %u (%s)\n",
__func__, atid, status, neg_adv_str(status));
ep->stats.connect_neg_adv++;
unsigned int mtu_idx;
u64 opt0;
u32 opt2;
- int wscale;
+ u32 wscale;
struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
int win;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
ep->hwtid));
- best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps && req->tcpopt.tstamp,
- (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- wscale = compute_wscale(rcv_win);
+ cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps && req->tcpopt.tstamp,
+ (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ wscale = cxgb_compute_wscale(rcv_win);
/*
* Specify the largest window that will fit in opt0. The
return;
}
-static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
- int *iptype, __u8 *local_ip, __u8 *peer_ip,
- __be16 *local_port, __be16 *peer_port)
-{
- int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
- ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
- T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
- IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
- T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
- struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
- struct tcphdr *tcp = (struct tcphdr *)
- ((u8 *)(req + 1) + eth_len + ip_len);
-
- if (ip->version == 4) {
- PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
- ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
- ntohs(tcp->dest));
- *iptype = 4;
- memcpy(peer_ip, &ip->saddr, 4);
- memcpy(local_ip, &ip->daddr, 4);
- } else {
- PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
- ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
- ntohs(tcp->dest));
- *iptype = 6;
- memcpy(peer_ip, ip6->saddr.s6_addr, 16);
- memcpy(local_ip, ip6->daddr.s6_addr, 16);
- }
- *peer_port = tcp->source;
- *local_port = tcp->dest;
-
- return;
-}
-
static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct c4iw_ep *child_ep = NULL, *parent_ep;
goto reject;
}
- get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
- local_ip, peer_ip, &local_port, &peer_port);
+ cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
+ &iptype, local_ip, peer_ip, &local_port, &peer_port);
/* Find output route */
if (iptype == 4) {
, __func__, parent_ep, hwtid,
local_ip, peer_ip, ntohs(local_port),
ntohs(peer_port), peer_mss);
- dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
- local_port, peer_port,
- tos);
+ dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ *(__be32 *)local_ip, *(__be32 *)peer_ip,
+ local_port, peer_port, tos);
} else {
PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
, __func__, parent_ep, hwtid,
local_ip, peer_ip, ntohs(local_port),
ntohs(peer_port), peer_mss);
- dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
- PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
- ((struct sockaddr_in6 *)
- &parent_ep->com.local_addr)->sin6_scope_id);
+ dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+ local_ip, peer_ip, local_port, peer_port,
+ PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+ ((struct sockaddr_in6 *)
+ &parent_ep->com.local_addr)->sin6_scope_id);
}
if (!dst) {
printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
{
struct cpl_abort_req_rss *req = cplhdr(skb);
struct c4iw_ep *ep;
- struct cpl_abort_rpl *rpl;
struct sk_buff *rpl_skb;
struct c4iw_qp_attributes attrs;
int ret;
int release = 0;
unsigned int tid = GET_TID(req);
+ u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- if (is_neg_adv(req->status)) {
+ if (cxgb_is_neg_adv(req->status)) {
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
neg_adv_str(req->status));
release = 1;
goto out;
}
- set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
- INIT_TP_WR(rpl, ep->hwtid);
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
- rpl->cmd = CPL_ABORT_NO_RST;
+
+ cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
+
c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
out:
if (release)
PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
__func__, &laddr->sin_addr, ntohs(laddr->sin_port),
ra, ntohs(raddr->sin_port));
- ep->dst = find_route(dev, laddr->sin_addr.s_addr,
- raddr->sin_addr.s_addr, laddr->sin_port,
- raddr->sin_port, cm_id->tos);
+ ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr,
+ laddr->sin_port,
+ raddr->sin_port, cm_id->tos);
} else {
iptype = 6;
ra = (__u8 *)&raddr6->sin6_addr;
__func__, laddr6->sin6_addr.s6_addr,
ntohs(laddr6->sin6_port),
raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
- ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
- raddr6->sin6_addr.s6_addr,
- laddr6->sin6_port, raddr6->sin6_port, 0,
- raddr6->sin6_scope_id);
+ ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+ laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port,
+ raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
}
if (!ep->dst) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
ntohs(tcph->source), iph->tos);
- dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
- iph->tos);
+ dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ iph->daddr, iph->saddr, tcph->dest,
+ tcph->source, iph->tos);
if (!dst) {
pr_err("%s - failed to find dst entry!\n",
__func__);
kfree_skb(skb);
return 0;
}
- if (is_neg_adv(req->status)) {
+ if (cxgb_is_neg_adv(req->status)) {
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
neg_adv_str(req->status));
static void c4iw_dealloc(struct uld_ctx *ctx)
{
c4iw_rdev_close(&ctx->dev->rdev);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
idr_destroy(&ctx->dev->cqidr);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
idr_destroy(&ctx->dev->qpidr);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
idr_destroy(&ctx->dev->mmidr);
+ wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
idr_destroy(&ctx->dev->hwtid_idr);
idr_destroy(&ctx->dev->stid_idr);
idr_destroy(&ctx->dev->atid_idr);
mutex_init(&devp->rdev.stats.lock);
mutex_init(&devp->db_mutex);
INIT_LIST_HEAD(&devp->db_fc_list);
+ init_waitqueue_head(&devp->wait);
devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
if (c4iw_debugfs_root) {
static struct cxgb4_uld_info c4iw_uld_info = {
.name = DRV_NAME,
+ .nrxq = MAX_ULD_QSETS,
+ .rxq_size = 511,
+ .ciq = true,
+ .lro = false,
.add = c4iw_uld_add,
.rx_handler = c4iw_uld_rx_handler,
.state_change = c4iw_uld_state_change,
struct idr stid_idr;
struct list_head db_fc_list;
u32 avail_ird;
+ wait_queue_head_t wait;
};
static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
return cm_id->provider_data;
}
-static inline int compute_wscale(int win)
-{
- int wscale = 0;
-
- while (wscale < 14 && (65535<<wscale) < win)
- wscale++;
- return wscale;
-}
-
static inline int ocqp_supported(const struct cxgb4_lld_info *infop)
{
#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
- u32 in[MLX5_ST_SZ_DW(set_roce_address_in)];
- u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
if (ll != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
- memset(in, 0, sizeof(in));
-
ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
MLX5_SET(set_roce_address_in, in, roce_address_index, index);
MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
-
- memset(out, 0, sizeof(out));
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
- return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+ if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
+ return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+ return 0;
}
enum {
&props->active_width);
if (err)
goto out;
- err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
- port);
+ err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
if (err)
goto out;
dmac_47_16),
ib_spec->eth.val.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ smac_47_16),
+ ib_spec->eth.mask.src_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ smac_47_16),
+ ib_spec->eth.val.src_mac);
+
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
vlan_tag, 1);
#include <linux/log2.h>
#include <linux/aer.h>
-#if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
+#if IS_ENABLED(CONFIG_CNIC)
#define BCM_CNIC 1
#include "cnic_if.h"
#endif
struct bnx2 *bp = netdev_priv(dev);
int rc;
- rc = bnx2_request_firmware(bp);
- if (rc < 0)
- goto out;
-
netif_carrier_off(dev);
bnx2_disable_int(bp);
bnx2_free_irq(bp);
bnx2_free_mem(bp);
bnx2_del_napi(bp);
- bnx2_release_firmware(bp);
goto out;
}
pci_set_drvdata(pdev, dev);
+ rc = bnx2_request_firmware(bp);
+ if (rc < 0)
+ goto error;
+
+
+ bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
return 0;
error:
+ bnx2_release_firmware(bp);
pci_iounmap(pdev, bp->regview);
pci_release_regions(pdev);
pci_disable_device(pdev);
/*
* This file is part of the Chelsio T4 Ethernet driver for Linux.
*
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
#include "cxgb4_uld.h"
#define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
+extern struct list_head adapter_list;
+extern struct mutex uld_mutex;
enum {
MAX_NPORTS = 4, /* max # of ports */
enum chip_type chip; /* chip code */
struct arch_specific_params arch; /* chip specific params */
unsigned char offload;
+ unsigned char crypto; /* HW capability for crypto */
unsigned char bypass;
unsigned int ofldq_wr_cred;
bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */
+ unsigned int nsched_cls; /* number of traffic classes */
unsigned int max_ordird_qp; /* Max read depth per RDMA QP */
unsigned int max_ird_adapter; /* Max read depth per adapter */
};
struct fw_hdr fw_hdr;
};
-
struct trace_params {
u32 data[TRACE_LEN / 4];
u32 mask[TRACE_LEN / 4];
unsigned short supported; /* link capabilities */
unsigned short advertising; /* advertised capabilities */
unsigned short lp_advertising; /* peer advertised capabilities */
- unsigned short requested_speed; /* speed user has requested */
- unsigned short speed; /* actual link speed */
+ unsigned int requested_speed; /* speed user has requested */
+ unsigned int speed; /* actual link speed */
unsigned char requested_fc; /* flow control user has requested */
unsigned char fc; /* actual link flow control */
unsigned char autoneg; /* autonegotiating? */
MAX_ETH_QSETS = 32, /* # of Ethernet Tx/Rx queue sets */
MAX_OFLD_QSETS = 16, /* # of offload Tx, iscsi Rx queue sets */
MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */
- MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */
- MAX_RDMA_CIQS = 32, /* # of RDMA concentrator IQs */
-
- /* # of streaming iSCSIT Rx queues */
- MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS,
};
enum {
enum {
INGQ_EXTRAS = 2, /* firmware event queue and */
/* forwarded interrupts */
- MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES +
- MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS,
+ MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS,
};
struct adapter;
#endif /* CONFIG_CHELSIO_T4_FCOE */
bool rxtstamp; /* Enable TS */
struct hwtstamp_config tstamp_config;
+ struct sched_table *sched_tbl;
};
struct dentry;
FW_OFLD_CONN = (1 << 9),
};
+enum {
+ ULP_CRYPTO_LOOKASIDE = 1 << 0,
+};
+
struct rx_sw_desc;
struct sge_fl { /* SGE free-buffer queue state */
u8 full; /* the Tx ring is full */
} ____cacheline_aligned_in_smp;
+struct sge_uld_rxq_info {
+ char name[IFNAMSIZ]; /* name of ULD driver */
+ struct sge_ofld_rxq *uldrxq; /* Rxq's for ULD */
+ u16 *msix_tbl; /* msix_tbl for uld */
+ u16 *rspq_id; /* response queue id's of rxq */
+ u16 nrxq; /* # of ingress uld queues */
+ u16 nciq; /* # of completion queues */
+ u8 uld; /* uld type */
+};
+
struct sge {
struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
struct sge_ofld_txq ofldtxq[MAX_OFLD_QSETS];
struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES];
struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
- struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS];
- struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES];
- struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES];
- struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS];
struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
+ struct sge_uld_rxq_info **uld_rxq_info;
struct sge_rspq intrq ____cacheline_aligned_in_smp;
spinlock_t intrq_lock;
u16 max_ethqsets; /* # of available Ethernet queue sets */
u16 ethqsets; /* # of active Ethernet queue sets */
u16 ethtxq_rover; /* Tx queue to clean up next */
- u16 iscsiqsets; /* # of active iSCSI queue sets */
- u16 niscsitq; /* # of available iSCST Rx queues */
- u16 rdmaqs; /* # of available RDMA Rx queues */
- u16 rdmaciqs; /* # of available RDMA concentrator IQs */
- u16 iscsi_rxq[MAX_OFLD_QSETS];
- u16 iscsit_rxq[MAX_ISCSIT_QUEUES];
- u16 rdma_rxq[MAX_RDMA_QUEUES];
- u16 rdma_ciq[MAX_RDMA_CIQS];
+ u16 ofldqsets; /* # of active ofld queue sets */
+ u16 nqs_per_uld; /* # of Rx queues per ULD */
u16 timer_val[SGE_NTIMERS];
u8 counter_val[SGE_NCOUNTERS];
u32 fl_pg_order; /* large page allocation size */
};
#define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
-#define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++)
-#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++)
-#define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++)
-#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++)
+#define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++)
struct l2t_data;
u8 addr[ETH_ALEN];
};
+struct uld_msix_bmap {
+ unsigned long *msix_bmap;
+ unsigned int mapsize;
+ spinlock_t lock; /* lock for acquiring bitmap */
+};
+
+struct uld_msix_info {
+ unsigned short vec;
+ char desc[IFNAMSIZ + 10];
+ unsigned int idx;
+};
+
+struct vf_info {
+ unsigned char vf_mac_addr[ETH_ALEN];
+ bool pf_set_mac;
+};
+
struct adapter {
void __iomem *regs;
void __iomem *bar2;
unsigned int mbox;
unsigned int pf;
unsigned int flags;
+ unsigned int adap_idx;
enum chip_type chip;
int msg_enable;
unsigned short vec;
char desc[IFNAMSIZ + 10];
} msix_info[MAX_INGQ + 1];
+ struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
+ struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
+ int msi_idx;
struct doorbell_stats db_stats;
struct sge sge;
struct net_device *port[MAX_NPORTS];
u8 chan_map[NCHAN]; /* channel -> port map */
+ struct vf_info *vfinfo;
+ u8 num_vfs;
+
u32 filter_mode;
unsigned int l2t_start;
unsigned int l2t_end;
unsigned int clipt_start;
unsigned int clipt_end;
struct clip_tbl *clipt;
+ struct cxgb4_uld_info *uld;
void *uld_handle[CXGB4_ULD_MAX];
+ unsigned int num_uld;
+ unsigned int num_ofld_uld;
struct list_head list_node;
struct list_head rcu_node;
struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
#define T4_OS_LOG_MBOX_CMDS 256
struct mbox_cmd_log *mbox_log;
+ struct mutex uld_mutex;
+
struct dentry *debugfs_root;
bool use_bd; /* Use SGE Back Door intfc for reading SGE Contexts */
bool trace_rss; /* 1 implies that different RSS flit per filter is
spinlock_t stats_lock;
spinlock_t win0_lock ____cacheline_aligned_in_smp;
+
+ /* TC u32 offload */
+ struct cxgb4_tc_u32_table *tc_u32;
+};
+
+/* Support for "sched-class" command to allow a TX Scheduling Class to be
+ * programmed with various parameters.
+ */
+struct ch_sched_params {
+ s8 type; /* packet or flow */
+ union {
+ struct {
+ s8 level; /* scheduler hierarchy level */
+ s8 mode; /* per-class or per-flow */
+ s8 rateunit; /* bit or packet rate */
+ s8 ratemode; /* %port relative or kbps absolute */
+ s8 channel; /* scheduler channel [0..N] */
+ s8 class; /* scheduler class [0..N] */
+ s32 minrate; /* minimum rate */
+ s32 maxrate; /* maximum rate */
+ s16 weight; /* percent weight */
+ s16 pktsize; /* average packet size */
+ } params;
+ } u;
+};
+
+enum {
+ SCHED_CLASS_TYPE_PACKET = 0, /* class type */
+};
+
+enum {
+ SCHED_CLASS_LEVEL_CL_RL = 0, /* class rate limiter */
+};
+
+enum {
+ SCHED_CLASS_MODE_CLASS = 0, /* per-class scheduling */
+};
+
+enum {
+ SCHED_CLASS_RATEUNIT_BITS = 0, /* bit rate scheduling */
+};
+
+enum {
+ SCHED_CLASS_RATEMODE_ABS = 1, /* Kb/s */
+};
+
+/* Support for "sched_queue" command to allow one or more NIC TX Queues
+ * to be bound to a TX Scheduling Class.
+ */
+struct ch_sched_queue {
+ s8 queue; /* queue index */
+ s8 class; /* class index */
};
/* Defined bit width of user definable filter tuples
VLAN_REWRITE
};
+/* Host shadow copy of ingress filter entry. This is in host native format
+ * and doesn't match the ordering or bit order, etc. of the hardware of the
+ * firmware command. The use of bit-field structure elements is purely to
+ * remind ourselves of the field size limitations and save memory in the case
+ * where the filter table is large.
+ */
+struct filter_entry {
+ /* Administrative fields for filter. */
+ u32 valid:1; /* filter allocated and valid */
+ u32 locked:1; /* filter is administratively locked */
+
+ u32 pending:1; /* filter action is pending firmware reply */
+ u32 smtidx:8; /* Source MAC Table index for smac */
+ struct filter_ctx *ctx; /* Caller's completion hook */
+ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */
+ struct net_device *dev; /* Associated net device */
+ u32 tid; /* This will store the actual tid */
+
+ /* The filter itself. Most of this is a straight copy of information
+ * provided by the extended ioctl(). Some fields are translated to
+ * internal forms -- for instance the Ingress Queue ID passed in from
+ * the ioctl() is translated into the Absolute Ingress Queue ID.
+ */
+ struct ch_filter_specification fs;
+};
+
static inline int is_offload(const struct adapter *adap)
{
return adap->params.offload;
}
+static inline int is_pci_uld(const struct adapter *adap)
+{
+ return adap->params.crypto;
+}
+
+static inline int is_uld(const struct adapter *adap)
+{
+ return (adap->params.offload || adap->params.crypto);
+}
+
static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
{
return readl(adap->regs + reg_addr);
int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
struct net_device *dev, unsigned int iqid,
unsigned int cmplqid);
+int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
+ unsigned int cmplqid);
int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
struct net_device *dev, unsigned int iqid);
irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
void t4_sge_start(struct adapter *adap);
void t4_sge_stop(struct adapter *adap);
int cxgb_busy_poll(struct napi_struct *napi);
-int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
- unsigned int cnt);
void cxgb4_set_ethtool_ops(struct net_device *netdev);
int cxgb4_write_rss(const struct port_info *pi, const u16 *queues);
extern int dbfifo_int_thresh;
return a & 0x3f;
}
+int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
+ unsigned int cnt);
+static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
+ unsigned int us, unsigned int cnt,
+ unsigned int size, unsigned int iqe_size)
+{
+ q->adap = adap;
+ cxgb4_set_rspq_intr_params(q, us, cnt);
+ q->iqe_len = iqe_size;
+ q->size = size;
+}
+
void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
unsigned int data_reg, const u32 *vals,
unsigned int nregs, unsigned int start_idx);
int filter_index, int *enabled);
int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
u32 addr, u32 val);
+int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
+ int rateunit, int ratemode, int channel, int class,
+ int minrate, int maxrate, int weight, int pktsize);
void t4_sge_decode_idma_state(struct adapter *adapter, int state);
void t4_free_mem(void *addr);
void t4_idma_monitor_init(struct adapter *adapter,
void t4_idma_monitor(struct adapter *adapter,
struct sge_idma_monitor_state *idma,
int hz, int ticks);
+int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+ unsigned int naddr, u8 *addr);
+void t4_uld_mem_free(struct adapter *adap);
+int t4_uld_mem_alloc(struct adapter *adap);
+void t4_uld_clean_up(struct adapter *adap);
+void t4_register_netevent_notifier(void);
+void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
#endif /* __CXGB4_H__ */
/*
* This file is part of the Chelsio T4 Ethernet driver for Linux.
*
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
#include <linux/crash_dump.h>
#include "cxgb4.h"
+#include "cxgb4_filter.h"
#include "t4_regs.h"
#include "t4_values.h"
#include "t4_msg.h"
#include "cxgb4_debugfs.h"
#include "clip_tbl.h"
#include "l2t.h"
+#include "sched.h"
+#include "cxgb4_tc_u32.h"
char cxgb4_driver_name[] = KBUILD_MODNAME;
const char cxgb4_driver_version[] = DRV_VERSION;
#define DRV_DESC "Chelsio T4/T5/T6 Network Driver"
-/* Host shadow copy of ingress filter entry. This is in host native format
- * and doesn't match the ordering or bit order, etc. of the hardware of the
- * firmware command. The use of bit-field structure elements is purely to
- * remind ourselves of the field size limitations and save memory in the case
- * where the filter table is large.
- */
-struct filter_entry {
- /* Administrative fields for filter.
- */
- u32 valid:1; /* filter allocated and valid */
- u32 locked:1; /* filter is administratively locked */
-
- u32 pending:1; /* filter action is pending firmware reply */
- u32 smtidx:8; /* Source MAC Table index for smac */
- struct l2t_entry *l2t; /* Layer Two Table entry for dmac */
-
- /* The filter itself. Most of this is a straight copy of information
- * provided by the extended ioctl(). Some fields are translated to
- * internal forms -- for instance the Ingress Queue ID passed in from
- * the ioctl() is translated into the Absolute Ingress Queue ID.
- */
- struct ch_filter_specification fs;
-};
-
#define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
static struct dentry *cxgb4_debugfs_root;
-static LIST_HEAD(adapter_list);
-static DEFINE_MUTEX(uld_mutex);
-/* Adapter list to be accessed from atomic context */
-static LIST_HEAD(adap_rcu_list);
-static DEFINE_SPINLOCK(adap_rcu_lock);
-static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
-static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" };
+LIST_HEAD(adapter_list);
+DEFINE_MUTEX(uld_mutex);
static void link_report(struct net_device *dev)
{
}
#endif /* CONFIG_CHELSIO_T4_DCB */
-/* Clear a filter and release any of its resources that we own. This also
- * clears the filter's "pending" status.
- */
-static void clear_filter(struct adapter *adap, struct filter_entry *f)
-{
- /* If the new or old filter have loopback rewriteing rules then we'll
- * need to free any existing Layer Two Table (L2T) entries of the old
- * filter rule. The firmware will handle freeing up any Source MAC
- * Table (SMT) entries used for rewriting Source MAC Addresses in
- * loopback rules.
- */
- if (f->l2t)
- cxgb4_l2t_release(f->l2t);
-
- /* The zeroing of the filter rule below clears the filter valid,
- * pending, locked flags, l2t pointer, etc. so it's all we need for
- * this operation.
- */
- memset(f, 0, sizeof(*f));
-}
-
-/* Handle a filter write/deletion reply.
- */
-static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
-{
- unsigned int idx = GET_TID(rpl);
- unsigned int nidx = idx - adap->tids.ftid_base;
- unsigned int ret;
- struct filter_entry *f;
-
- if (idx >= adap->tids.ftid_base && nidx <
- (adap->tids.nftids + adap->tids.nsftids)) {
- idx = nidx;
- ret = TCB_COOKIE_G(rpl->cookie);
- f = &adap->tids.ftid_tab[idx];
-
- if (ret == FW_FILTER_WR_FLT_DELETED) {
- /* Clear the filter when we get confirmation from the
- * hardware that the filter has been deleted.
- */
- clear_filter(adap, f);
- } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
- dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
- idx);
- clear_filter(adap, f);
- } else if (ret == FW_FILTER_WR_FLT_ADDED) {
- f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
- f->pending = 0; /* asynchronous setup completed */
- f->valid = 1;
- } else {
- /* Something went wrong. Issue a warning about the
- * problem and clear everything out.
- */
- dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
- idx, ret);
- clear_filter(adap, f);
- }
- }
-}
-
/* Response queue handler for the FW event queue.
*/
static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
return 0;
}
-/* Flush the aggregated lro sessions */
-static void uldrx_flush_handler(struct sge_rspq *q)
-{
- if (ulds[q->uld].lro_flush)
- ulds[q->uld].lro_flush(&q->lro_mgr);
-}
-
-/**
- * uldrx_handler - response queue handler for ULD queues
- * @q: the response queue that received the packet
- * @rsp: the response queue descriptor holding the offload message
- * @gl: the gather list of packet fragments
- *
- * Deliver an ingress offload packet to a ULD. All processing is done by
- * the ULD, we just maintain statistics.
- */
-static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
- const struct pkt_gl *gl)
-{
- struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
- int ret;
-
- /* FW can send CPLs encapsulated in a CPL_FW4_MSG.
- */
- if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG &&
- ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
- rsp += 2;
-
- if (q->flush_handler)
- ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld],
- rsp, gl, &q->lro_mgr,
- &q->napi);
- else
- ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld],
- rsp, gl);
-
- if (ret) {
- rxq->stats.nomem++;
- return -1;
- }
-
- if (gl == NULL)
- rxq->stats.imm++;
- else if (gl == CXGB4_MSG_AN)
- rxq->stats.an++;
- else
- rxq->stats.pkts++;
- return 0;
-}
-
static void disable_msi(struct adapter *adapter)
{
if (adapter->flags & USING_MSIX) {
snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
d->name, i);
}
-
- /* offload queues */
- for_each_iscsirxq(&adap->sge, i)
- snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d",
- adap->port[0]->name, i);
-
- for_each_iscsitrxq(&adap->sge, i)
- snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d",
- adap->port[0]->name, i);
-
- for_each_rdmarxq(&adap->sge, i)
- snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
- adap->port[0]->name, i);
-
- for_each_rdmaciq(&adap->sge, i)
- snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d",
- adap->port[0]->name, i);
}
static int request_msix_queue_irqs(struct adapter *adap)
{
struct sge *s = &adap->sge;
- int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
- int iscsitqidx = 0;
+ int err, ethqidx;
int msi_index = 2;
err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
goto unwind;
msi_index++;
}
- for_each_iscsirxq(s, iscsiqidx) {
- err = request_irq(adap->msix_info[msi_index].vec,
- t4_sge_intr_msix, 0,
- adap->msix_info[msi_index].desc,
- &s->iscsirxq[iscsiqidx].rspq);
- if (err)
- goto unwind;
- msi_index++;
- }
- for_each_iscsitrxq(s, iscsitqidx) {
- err = request_irq(adap->msix_info[msi_index].vec,
- t4_sge_intr_msix, 0,
- adap->msix_info[msi_index].desc,
- &s->iscsitrxq[iscsitqidx].rspq);
- if (err)
- goto unwind;
- msi_index++;
- }
- for_each_rdmarxq(s, rdmaqidx) {
- err = request_irq(adap->msix_info[msi_index].vec,
- t4_sge_intr_msix, 0,
- adap->msix_info[msi_index].desc,
- &s->rdmarxq[rdmaqidx].rspq);
- if (err)
- goto unwind;
- msi_index++;
- }
- for_each_rdmaciq(s, rdmaciqqidx) {
- err = request_irq(adap->msix_info[msi_index].vec,
- t4_sge_intr_msix, 0,
- adap->msix_info[msi_index].desc,
- &s->rdmaciq[rdmaciqqidx].rspq);
- if (err)
- goto unwind;
- msi_index++;
- }
return 0;
unwind:
- while (--rdmaciqqidx >= 0)
- free_irq(adap->msix_info[--msi_index].vec,
- &s->rdmaciq[rdmaciqqidx].rspq);
- while (--rdmaqidx >= 0)
- free_irq(adap->msix_info[--msi_index].vec,
- &s->rdmarxq[rdmaqidx].rspq);
- while (--iscsitqidx >= 0)
- free_irq(adap->msix_info[--msi_index].vec,
- &s->iscsitrxq[iscsitqidx].rspq);
- while (--iscsiqidx >= 0)
- free_irq(adap->msix_info[--msi_index].vec,
- &s->iscsirxq[iscsiqidx].rspq);
while (--ethqidx >= 0)
free_irq(adap->msix_info[--msi_index].vec,
&s->ethrxq[ethqidx].rspq);
free_irq(adap->msix_info[1].vec, &s->fw_evtq);
for_each_ethrxq(s, i)
free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq);
- for_each_iscsirxq(s, i)
- free_irq(adap->msix_info[msi_index++].vec,
- &s->iscsirxq[i].rspq);
- for_each_iscsitrxq(s, i)
- free_irq(adap->msix_info[msi_index++].vec,
- &s->iscsitrxq[i].rspq);
- for_each_rdmarxq(s, i)
- free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
- for_each_rdmaciq(s, i)
- free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq);
}
/**
}
}
-static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
- unsigned int nq, unsigned int per_chan, int msi_idx,
- u16 *ids, bool lro)
+
+static int setup_fw_sge_queues(struct adapter *adap)
{
- int i, err;
+ struct sge *s = &adap->sge;
+ int err = 0;
+
+ bitmap_zero(s->starving_fl, s->egr_sz);
+ bitmap_zero(s->txq_maperr, s->egr_sz);
- for (i = 0; i < nq; i++, q++) {
- if (msi_idx > 0)
- msi_idx++;
- err = t4_sge_alloc_rxq(adap, &q->rspq, false,
- adap->port[i / per_chan],
- msi_idx, q->fl.size ? &q->fl : NULL,
- uldrx_handler,
- lro ? uldrx_flush_handler : NULL,
- 0);
+ if (adap->flags & USING_MSIX)
+ adap->msi_idx = 1; /* vector 0 is for non-queue interrupts */
+ else {
+ err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
+ NULL, NULL, NULL, -1);
if (err)
return err;
- memset(&q->stats, 0, sizeof(q->stats));
- if (ids)
- ids[i] = q->rspq.abs_id;
+ adap->msi_idx = -((int)s->intrq.abs_id + 1);
}
- return 0;
+
+ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
+ adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
+ if (err)
+ t4_free_sge_resources(adap);
+ return err;
}
/**
*/
static int setup_sge_queues(struct adapter *adap)
{
- int err, msi_idx, i, j;
+ int err, i, j;
struct sge *s = &adap->sge;
-
- bitmap_zero(s->starving_fl, s->egr_sz);
- bitmap_zero(s->txq_maperr, s->egr_sz);
-
- if (adap->flags & USING_MSIX)
- msi_idx = 1; /* vector 0 is for non-queue interrupts */
- else {
- err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
- NULL, NULL, NULL, -1);
- if (err)
- return err;
- msi_idx = -((int)s->intrq.abs_id + 1);
- }
-
- /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here,
- * don't forget to update the following which need to be
- * synchronized to and changes here.
- *
- * 1. The calculations of MAX_INGQ in cxgb4.h.
- *
- * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs
- * to accommodate any new/deleted Ingress Queues
- * which need MSI-X Vectors.
- *
- * 3. Update sge_qinfo_show() to include information on the
- * new/deleted queues.
- */
- err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
- msi_idx, NULL, fwevtq_handler, NULL, -1);
- if (err) {
-freeout: t4_free_sge_resources(adap);
- return err;
- }
+ struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
+ unsigned int cmplqid = 0;
for_each_port(adap, i) {
struct net_device *dev = adap->port[i];
struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
for (j = 0; j < pi->nqsets; j++, q++) {
- if (msi_idx > 0)
- msi_idx++;
+ if (adap->msi_idx > 0)
+ adap->msi_idx++;
err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
- msi_idx, &q->fl,
+ adap->msi_idx, &q->fl,
t4_ethrx_handler,
NULL,
t4_get_mps_bg_map(adap,
}
}
- j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */
- for_each_iscsirxq(s, i) {
+ j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */
+ for_each_ofldtxq(s, i) {
err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i],
adap->port[i / j],
s->fw_evtq.cntxt_id);
goto freeout;
}
-#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \
- err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids, lro); \
- if (err) \
- goto freeout; \
- if (msi_idx > 0) \
- msi_idx += nq; \
-} while (0)
-
- ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false);
- ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true);
- ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false);
- j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */
- ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false);
-
-#undef ALLOC_OFLD_RXQS
-
for_each_port(adap, i) {
- /*
- * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
+ /* Note that cmplqid below is 0 if we don't
* have RDMA queues, and that's the right value.
*/
+ if (rxq_info)
+ cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id;
+
err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
- s->fw_evtq.cntxt_id,
- s->rdmarxq[i].rspq.cntxt_id);
+ s->fw_evtq.cntxt_id, cmplqid);
if (err)
goto freeout;
}
RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) |
QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id));
return 0;
+freeout:
+ t4_free_sge_resources(adap);
+ return err;
}
/*
kvfree(addr);
}
-/* Send a Work Request to write the filter at a specified index. We construct
- * a Firmware Filter Work Request to have the work done and put the indicated
- * filter into "pending" mode which will prevent any further actions against
- * it till we get a reply from the firmware on the completion status of the
- * request.
- */
-static int set_filter_wr(struct adapter *adapter, int fidx)
-{
- struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
- struct sk_buff *skb;
- struct fw_filter_wr *fwr;
- unsigned int ftid;
-
- skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
-
- /* If the new filter requires loopback Destination MAC and/or VLAN
- * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
- * the filter.
- */
- if (f->fs.newdmac || f->fs.newvlan) {
- /* allocate L2T entry for new filter */
- f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
- f->fs.eport, f->fs.dmac);
- if (f->l2t == NULL) {
- kfree_skb(skb);
- return -ENOMEM;
- }
- }
-
- ftid = adapter->tids.ftid_base + fidx;
-
- fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
- memset(fwr, 0, sizeof(*fwr));
-
- /* It would be nice to put most of the following in t4_hw.c but most
- * of the work is translating the cxgbtool ch_filter_specification
- * into the Work Request and the definition of that structure is
- * currently in cxgbtool.h which isn't appropriate to pull into the
- * common code. We may eventually try to come up with a more neutral
- * filter specification structure but for now it's easiest to simply
- * put this fairly direct code in line ...
- */
- fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
- fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16));
- fwr->tid_to_iq =
- htonl(FW_FILTER_WR_TID_V(ftid) |
- FW_FILTER_WR_RQTYPE_V(f->fs.type) |
- FW_FILTER_WR_NOREPLY_V(0) |
- FW_FILTER_WR_IQ_V(f->fs.iq));
- fwr->del_filter_to_l2tix =
- htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
- FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
- FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
- FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
- FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
- FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
- FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
- FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
- FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
- f->fs.newvlan == VLAN_REWRITE) |
- FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
- f->fs.newvlan == VLAN_REWRITE) |
- FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
- FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
- FW_FILTER_WR_PRIO_V(f->fs.prio) |
- FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
- fwr->ethtype = htons(f->fs.val.ethtype);
- fwr->ethtypem = htons(f->fs.mask.ethtype);
- fwr->frag_to_ovlan_vldm =
- (FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
- FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
- FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
- FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
- FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
- FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
- fwr->smac_sel = 0;
- fwr->rx_chan_rx_rpl_iq =
- htons(FW_FILTER_WR_RX_CHAN_V(0) |
- FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
- fwr->maci_to_matchtypem =
- htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
- FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
- FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
- FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
- FW_FILTER_WR_PORT_V(f->fs.val.iport) |
- FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
- FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
- FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
- fwr->ptcl = f->fs.val.proto;
- fwr->ptclm = f->fs.mask.proto;
- fwr->ttyp = f->fs.val.tos;
- fwr->ttypm = f->fs.mask.tos;
- fwr->ivlan = htons(f->fs.val.ivlan);
- fwr->ivlanm = htons(f->fs.mask.ivlan);
- fwr->ovlan = htons(f->fs.val.ovlan);
- fwr->ovlanm = htons(f->fs.mask.ovlan);
- memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
- memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
- memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
- memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
- fwr->lp = htons(f->fs.val.lport);
- fwr->lpm = htons(f->fs.mask.lport);
- fwr->fp = htons(f->fs.val.fport);
- fwr->fpm = htons(f->fs.mask.fport);
- if (f->fs.newsmac)
- memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
-
- /* Mark the filter as "pending" and ship off the Filter Work Request.
- * When we get the Work Request Reply we'll clear the pending status.
- */
- f->pending = 1;
- set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
- t4_ofld_send(adapter, skb);
- return 0;
-}
-
-/* Delete the filter at a specified index.
- */
-static int del_filter_wr(struct adapter *adapter, int fidx)
-{
- struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
- struct sk_buff *skb;
- struct fw_filter_wr *fwr;
- unsigned int len, ftid;
-
- len = sizeof(*fwr);
- ftid = adapter->tids.ftid_base + fidx;
-
- skb = alloc_skb(len, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
-
- fwr = (struct fw_filter_wr *)__skb_put(skb, len);
- t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id);
-
- /* Mark the filter as "pending" and ship off the Filter Work Request.
- * When we get the Work Request Reply we'll clear the pending status.
- */
- f->pending = 1;
- t4_mgmt_tx(adapter, skb);
- return 0;
-}
-
static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
*/
static int tid_init(struct tid_info *t)
{
- size_t size;
- unsigned int stid_bmap_size;
- unsigned int natids = t->natids;
struct adapter *adap = container_of(t, struct adapter, tids);
+ unsigned int max_ftids = t->nftids + t->nsftids;
+ unsigned int natids = t->natids;
+ unsigned int stid_bmap_size;
+ unsigned int ftid_bmap_size;
+ size_t size;
stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
+ ftid_bmap_size = BITS_TO_LONGS(t->nftids);
size = t->ntids * sizeof(*t->tid_tab) +
natids * sizeof(*t->atid_tab) +
t->nstids * sizeof(*t->stid_tab) +
t->nsftids * sizeof(*t->stid_tab) +
stid_bmap_size * sizeof(long) +
- t->nftids * sizeof(*t->ftid_tab) +
- t->nsftids * sizeof(*t->ftid_tab);
+ max_ftids * sizeof(*t->ftid_tab) +
+ ftid_bmap_size * sizeof(long);
t->tid_tab = t4_alloc_mem(size);
if (!t->tid_tab)
t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
+ t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
spin_lock_init(&t->stid_lock);
spin_lock_init(&t->atid_lock);
+ spin_lock_init(&t->ftid_lock);
t->stids_in_use = 0;
t->sftids_in_use = 0;
t->atid_tab[natids - 1].next = &t->atid_tab[natids];
t->afree = t->atid_tab;
}
- bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
- /* Reserve stid 0 for T4/T5 adapters */
- if (!t->stid_base &&
- (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5))
- __set_bit(0, t->stid_bmap);
+ if (is_offload(adap)) {
+ bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
+ /* Reserve stid 0 for T4/T5 adapters */
+ if (!t->stid_base &&
+ CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+ __set_bit(0, t->stid_bmap);
+ }
+
+ bitmap_zero(t->ftid_bmap, t->nftids);
return 0;
}
for_each_ethrxq(&adap->sge, i)
disable_txq_db(&adap->sge.ethtxq[i].q);
- for_each_iscsirxq(&adap->sge, i)
+ for_each_ofldtxq(&adap->sge, i)
disable_txq_db(&adap->sge.ofldtxq[i].q);
for_each_port(adap, i)
disable_txq_db(&adap->sge.ctrlq[i].q);
for_each_ethrxq(&adap->sge, i)
enable_txq_db(adap, &adap->sge.ethtxq[i].q);
- for_each_iscsirxq(&adap->sge, i)
+ for_each_ofldtxq(&adap->sge, i)
enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
for_each_port(adap, i)
enable_txq_db(adap, &adap->sge.ctrlq[i].q);
static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
{
- if (adap->uld_handle[CXGB4_ULD_RDMA])
- ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
- cmd);
+ enum cxgb4_uld type = CXGB4_ULD_RDMA;
+
+ if (adap->uld && adap->uld[type].handle)
+ adap->uld[type].control(adap->uld[type].handle, cmd);
}
static void process_db_full(struct work_struct *work)
if (ret)
CH_WARN(adap, "DB drop recovery failed.\n");
}
+
static void recover_all_queues(struct adapter *adap)
{
int i;
for_each_ethrxq(&adap->sge, i)
sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
- for_each_iscsirxq(&adap->sge, i)
+ for_each_ofldtxq(&adap->sge, i)
sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
for_each_port(adap, i)
sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
queue_work(adap->workq, &adap->db_drop_task);
}
-static void uld_attach(struct adapter *adap, unsigned int uld)
-{
- void *handle;
- struct cxgb4_lld_info lli;
- unsigned short i;
-
- lli.pdev = adap->pdev;
- lli.pf = adap->pf;
- lli.l2t = adap->l2t;
- lli.tids = &adap->tids;
- lli.ports = adap->port;
- lli.vr = &adap->vres;
- lli.mtus = adap->params.mtus;
- if (uld == CXGB4_ULD_RDMA) {
- lli.rxq_ids = adap->sge.rdma_rxq;
- lli.ciq_ids = adap->sge.rdma_ciq;
- lli.nrxq = adap->sge.rdmaqs;
- lli.nciq = adap->sge.rdmaciqs;
- } else if (uld == CXGB4_ULD_ISCSI) {
- lli.rxq_ids = adap->sge.iscsi_rxq;
- lli.nrxq = adap->sge.iscsiqsets;
- } else if (uld == CXGB4_ULD_ISCSIT) {
- lli.rxq_ids = adap->sge.iscsit_rxq;
- lli.nrxq = adap->sge.niscsitq;
- }
- lli.ntxq = adap->sge.iscsiqsets;
- lli.nchan = adap->params.nports;
- lli.nports = adap->params.nports;
- lli.wr_cred = adap->params.ofldq_wr_cred;
- lli.adapter_type = adap->params.chip;
- lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
- lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
- lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
- lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
- lli.iscsi_ppm = &adap->iscsi_ppm;
- lli.cclk_ps = 1000000000 / adap->params.vpd.cclk;
- lli.udb_density = 1 << adap->params.sge.eq_qpp;
- lli.ucq_density = 1 << adap->params.sge.iq_qpp;
- lli.filt_mode = adap->params.tp.vlan_pri_map;
- /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
- for (i = 0; i < NCHAN; i++)
- lli.tx_modq[i] = i;
- lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A);
- lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A);
- lli.fw_vers = adap->params.fw_vers;
- lli.dbfifo_int_thresh = dbfifo_int_thresh;
- lli.sge_ingpadboundary = adap->sge.fl_align;
- lli.sge_egrstatuspagesize = adap->sge.stat_len;
- lli.sge_pktshift = adap->sge.pktshift;
- lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
- lli.max_ordird_qp = adap->params.max_ordird_qp;
- lli.max_ird_adapter = adap->params.max_ird_adapter;
- lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
- lli.nodeid = dev_to_node(adap->pdev_dev);
-
- handle = ulds[uld].add(&lli);
- if (IS_ERR(handle)) {
- dev_warn(adap->pdev_dev,
- "could not attach to the %s driver, error %ld\n",
- uld_str[uld], PTR_ERR(handle));
- return;
- }
-
- adap->uld_handle[uld] = handle;
-
+void t4_register_netevent_notifier(void)
+{
if (!netevent_registered) {
register_netevent_notifier(&cxgb4_netevent_nb);
netevent_registered = true;
}
-
- if (adap->flags & FULL_INIT_DONE)
- ulds[uld].state_change(handle, CXGB4_STATE_UP);
-}
-
-static void attach_ulds(struct adapter *adap)
-{
- unsigned int i;
-
- spin_lock(&adap_rcu_lock);
- list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list);
- spin_unlock(&adap_rcu_lock);
-
- mutex_lock(&uld_mutex);
- list_add_tail(&adap->list_node, &adapter_list);
- for (i = 0; i < CXGB4_ULD_MAX; i++)
- if (ulds[i].add)
- uld_attach(adap, i);
- mutex_unlock(&uld_mutex);
}
static void detach_ulds(struct adapter *adap)
mutex_lock(&uld_mutex);
list_del(&adap->list_node);
for (i = 0; i < CXGB4_ULD_MAX; i++)
- if (adap->uld_handle[i]) {
- ulds[i].state_change(adap->uld_handle[i],
+ if (adap->uld && adap->uld[i].handle) {
+ adap->uld[i].state_change(adap->uld[i].handle,
CXGB4_STATE_DETACH);
- adap->uld_handle[i] = NULL;
+ adap->uld[i].handle = NULL;
}
if (netevent_registered && list_empty(&adapter_list)) {
unregister_netevent_notifier(&cxgb4_netevent_nb);
netevent_registered = false;
}
mutex_unlock(&uld_mutex);
-
- spin_lock(&adap_rcu_lock);
- list_del_rcu(&adap->rcu_node);
- spin_unlock(&adap_rcu_lock);
}
static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
mutex_lock(&uld_mutex);
for (i = 0; i < CXGB4_ULD_MAX; i++)
- if (adap->uld_handle[i])
- ulds[i].state_change(adap->uld_handle[i], new_state);
+ if (adap->uld && adap->uld[i].handle)
+ adap->uld[i].state_change(adap->uld[i].handle,
+ new_state);
mutex_unlock(&uld_mutex);
}
-/**
- * cxgb4_register_uld - register an upper-layer driver
- * @type: the ULD type
- * @p: the ULD methods
- *
- * Registers an upper-layer driver with this driver and notifies the ULD
- * about any presently available devices that support its type. Returns
- * %-EBUSY if a ULD of the same type is already registered.
- */
-int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
-{
- int ret = 0;
- struct adapter *adap;
-
- if (type >= CXGB4_ULD_MAX)
- return -EINVAL;
- mutex_lock(&uld_mutex);
- if (ulds[type].add) {
- ret = -EBUSY;
- goto out;
- }
- ulds[type] = *p;
- list_for_each_entry(adap, &adapter_list, list_node)
- uld_attach(adap, type);
-out: mutex_unlock(&uld_mutex);
- return ret;
-}
-EXPORT_SYMBOL(cxgb4_register_uld);
-
-/**
- * cxgb4_unregister_uld - unregister an upper-layer driver
- * @type: the ULD type
- *
- * Unregisters an existing upper-layer driver.
- */
-int cxgb4_unregister_uld(enum cxgb4_uld type)
-{
- struct adapter *adap;
-
- if (type >= CXGB4_ULD_MAX)
- return -EINVAL;
- mutex_lock(&uld_mutex);
- list_for_each_entry(adap, &adapter_list, list_node)
- adap->uld_handle[type] = NULL;
- ulds[type].add = NULL;
- mutex_unlock(&uld_mutex);
- return 0;
-}
-EXPORT_SYMBOL(cxgb4_unregister_uld);
-
#if IS_ENABLED(CONFIG_IPV6)
static int cxgb4_inet6addr_handler(struct notifier_block *this,
unsigned long event, void *data)
adap->msix_info[0].desc, adap);
if (err)
goto irq_err;
-
err = request_msix_queue_irqs(adap);
if (err) {
free_irq(adap->msix_info[0].vec, adap);
return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
}
-/* Return an error number if the indicated filter isn't writable ...
- */
-static int writable_filter(struct filter_entry *f)
-{
- if (f->locked)
- return -EPERM;
- if (f->pending)
- return -EBUSY;
-
- return 0;
-}
-
-/* Delete the filter at the specified index (if valid). The checks for all
- * the common problems with doing this like the filter being locked, currently
- * pending in another operation, etc.
- */
-static int delete_filter(struct adapter *adapter, unsigned int fidx)
-{
- struct filter_entry *f;
- int ret;
-
- if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
- return -EINVAL;
-
- f = &adapter->tids.ftid_tab[fidx];
- ret = writable_filter(f);
- if (ret)
- return ret;
- if (f->valid)
- return del_filter_wr(adapter, fidx);
-
- return 0;
-}
-
int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
__be32 sip, __be16 sport, __be16 vlan,
unsigned int queue, unsigned char port, unsigned char mask)
int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
unsigned int queue, bool ipv6)
{
- int ret;
struct filter_entry *f;
struct adapter *adap;
/* Unlock the filter */
f->locked = 0;
- ret = delete_filter(adap, stid);
- if (ret)
- return ret;
-
- return 0;
+ return delete_filter(adap, stid);
}
EXPORT_SYMBOL(cxgb4_remove_server_filter);
return ret;
}
+#ifdef CONFIG_PCI_IOV
+static int dummy_open(struct net_device *dev)
+{
+ /* Turn carrier off since we don't have to transmit anything on this
+ * interface.
+ */
+ netif_carrier_off(dev);
+ return 0;
+}
+
+/* Fill MAC address that will be assigned by the FW */
+static void fill_vf_station_mac_addr(struct adapter *adap)
+{
+ unsigned int i;
+ u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN];
+ int err;
+ u8 *na;
+ u16 a, b;
+
+ err = t4_get_raw_vpd_params(adap, &adap->params.vpd);
+ if (!err) {
+ na = adap->params.vpd.na;
+ for (i = 0; i < ETH_ALEN; i++)
+ hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 +
+ hex2val(na[2 * i + 1]));
+ a = (hw_addr[0] << 8) | hw_addr[1];
+ b = (hw_addr[1] << 8) | hw_addr[2];
+ a ^= b;
+ a |= 0x0200; /* locally assigned Ethernet MAC address */
+ a &= ~0x0100; /* not a multicast Ethernet MAC address */
+ macaddr[0] = a >> 8;
+ macaddr[1] = a & 0xff;
+
+ for (i = 2; i < 5; i++)
+ macaddr[i] = hw_addr[i + 1];
+
+ for (i = 0; i < adap->num_vfs; i++) {
+ macaddr[5] = adap->pf * 16 + i;
+ ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr);
+ }
+ }
+}
+
+static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+ int ret;
+
+ /* verify MAC addr is valid */
+ if (!is_valid_ether_addr(mac)) {
+ dev_err(pi->adapter->pdev_dev,
+ "Invalid Ethernet address %pM for VF %d\n",
+ mac, vf);
+ return -EINVAL;
+ }
+
+ dev_info(pi->adapter->pdev_dev,
+ "Setting MAC %pM on VF %d\n", mac, vf);
+ ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
+ if (!ret)
+ ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac);
+ return ret;
+}
+
+static int cxgb_get_vf_config(struct net_device *dev,
+ int vf, struct ifla_vf_info *ivi)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+
+ if (vf >= adap->num_vfs)
+ return -EINVAL;
+ ivi->vf = vf;
+ ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr);
+ return 0;
+}
+#endif
+
static int cxgb_set_mac_addr(struct net_device *dev, void *p)
{
int ret;
}
#endif
+static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+ struct sched_class *e;
+ struct ch_sched_params p;
+ struct ch_sched_queue qe;
+ u32 req_rate;
+ int err = 0;
+
+ if (!can_sched(dev))
+ return -ENOTSUPP;
+
+ if (index < 0 || index > pi->nqsets - 1)
+ return -EINVAL;
+
+ if (!(adap->flags & FULL_INIT_DONE)) {
+ dev_err(adap->pdev_dev,
+ "Failed to rate limit on queue %d. Link Down?\n",
+ index);
+ return -EINVAL;
+ }
+
+ /* Convert from Mbps to Kbps */
+ req_rate = rate << 10;
+
+ /* Max rate is 10 Gbps */
+ if (req_rate >= SCHED_MAX_RATE_KBPS) {
+ dev_err(adap->pdev_dev,
+ "Invalid rate %u Mbps, Max rate is %u Gbps\n",
+ rate, SCHED_MAX_RATE_KBPS);
+ return -ERANGE;
+ }
+
+ /* First unbind the queue from any existing class */
+ memset(&qe, 0, sizeof(qe));
+ qe.queue = index;
+ qe.class = SCHED_CLS_NONE;
+
+ err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE);
+ if (err) {
+ dev_err(adap->pdev_dev,
+ "Unbinding Queue %d on port %d fail. Err: %d\n",
+ index, pi->port_id, err);
+ return err;
+ }
+
+ /* Queue already unbound */
+ if (!req_rate)
+ return 0;
+
+ /* Fetch any available unused or matching scheduling class */
+ memset(&p, 0, sizeof(p));
+ p.type = SCHED_CLASS_TYPE_PACKET;
+ p.u.params.level = SCHED_CLASS_LEVEL_CL_RL;
+ p.u.params.mode = SCHED_CLASS_MODE_CLASS;
+ p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS;
+ p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS;
+ p.u.params.channel = pi->tx_chan;
+ p.u.params.class = SCHED_CLS_NONE;
+ p.u.params.minrate = 0;
+ p.u.params.maxrate = req_rate;
+ p.u.params.weight = 0;
+ p.u.params.pktsize = dev->mtu;
+
+ e = cxgb4_sched_class_alloc(dev, &p);
+ if (!e)
+ return -ENOMEM;
+
+ /* Bind the queue to a scheduling class */
+ memset(&qe, 0, sizeof(qe));
+ qe.queue = index;
+ qe.class = e->idx;
+
+ err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE);
+ if (err)
+ dev_err(adap->pdev_dev,
+ "Queue rate limiting failed. Err: %d\n", err);
+ return err;
+}
+
+int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+ struct tc_to_netdev *tc)
+{
+ struct port_info *pi = netdev2pinfo(dev);
+ struct adapter *adap = netdev2adap(dev);
+
+ if (!(adap->flags & FULL_INIT_DONE)) {
+ dev_err(adap->pdev_dev,
+ "Failed to setup tc on port %d. Link Down?\n",
+ pi->port_id);
+ return -EINVAL;
+ }
+
+ if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
+ tc->type == TC_SETUP_CLSU32) {
+ switch (tc->cls_u32->command) {
+ case TC_CLSU32_NEW_KNODE:
+ case TC_CLSU32_REPLACE_KNODE:
+ return cxgb4_config_knode(dev, proto, tc->cls_u32);
+ case TC_CLSU32_DELETE_KNODE:
+ return cxgb4_delete_knode(dev, proto, tc->cls_u32);
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return -EOPNOTSUPP;
+}
+
static const struct net_device_ops cxgb4_netdev_ops = {
.ndo_open = cxgb_open,
.ndo_stop = cxgb_close,
#ifdef CONFIG_NET_RX_BUSY_POLL
.ndo_busy_poll = cxgb_busy_poll,
#endif
+ .ndo_set_tx_maxrate = cxgb_set_tx_maxrate,
+ .ndo_setup_tc = cxgb_setup_tc,
+};
+#ifdef CONFIG_PCI_IOV
+static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
+ .ndo_open = dummy_open,
+ .ndo_set_vf_mac = cxgb_set_vf_mac,
+ .ndo_get_vf_config = cxgb_get_vf_config,
+};
+#endif
+
+static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+ struct adapter *adapter = netdev2adap(dev);
+
+ strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
+ strlcpy(info->version, cxgb4_driver_version,
+ sizeof(info->version));
+ strlcpy(info->bus_info, pci_name(adapter->pdev),
+ sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
+ .get_drvinfo = get_drvinfo,
};
void t4_fatal_err(struct adapter *adap)
adap->clipt_start = val[0];
adap->clipt_end = val[1];
+ /* We don't yet have a PARAMs calls to retrieve the number of Traffic
+ * Classes supported by the hardware/firmware so we hard code it here
+ * for now.
+ */
+ adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
+
/* query params related to active filter region */
params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
adap->params.ofldq_wr_cred = val[5];
adap->params.offload = 1;
+ adap->num_ofld_uld += 1;
}
if (caps_cmd.rdmacaps) {
params[0] = FW_PARAM_PFVF(STAG_START);
"max_ordird_qp %d max_ird_adapter %d\n",
adap->params.max_ordird_qp,
adap->params.max_ird_adapter);
+ adap->num_ofld_uld += 2;
}
if (caps_cmd.iscsicaps) {
params[0] = FW_PARAM_PFVF(ISCSI_START);
goto bye;
adap->vres.iscsi.start = val[0];
adap->vres.iscsi.size = val[1] - val[0] + 1;
+ /* LIO target and cxgb4i initiaitor */
+ adap->num_ofld_uld += 2;
+ }
+ if (caps_cmd.cryptocaps) {
+ /* Should query params here...TODO */
+ adap->params.crypto |= ULP_CRYPTO_LOOKASIDE;
+ adap->num_uld += 1;
}
#undef FW_PARAM_PFVF
#undef FW_PARAM_DEV
.resume = eeh_resume,
};
+ /* Return true if the Link Configuration supports "High Speeds" (those greater
+ * than 1Gb/s).
+ */
static inline bool is_x_10g_port(const struct link_config *lc)
{
- return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
- (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
+ unsigned int speeds, high_speeds;
+
+ speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
+ high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
+
+ return high_speeds != 0;
}
-static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
- unsigned int us, unsigned int cnt,
- unsigned int size, unsigned int iqe_size)
-{
- q->adap = adap;
- cxgb4_set_rspq_intr_params(q, us, cnt);
- q->iqe_len = iqe_size;
- q->size = size;
-}
-
/*
* Perform default configuration of DMA queues depending on the number and type
* of ports we found and the number of available CPUs. Most settings can be
#ifndef CONFIG_CHELSIO_T4_DCB
int q10g = 0;
#endif
- int ciq_size;
/* Reduce memory usage in kdump environment, disable all offload.
*/
- if (is_kdump_kernel())
+ if (is_kdump_kernel()) {
+ adap->params.offload = 0;
+ adap->params.crypto = 0;
+ } else if (is_uld(adap) && t4_uld_mem_alloc(adap)) {
adap->params.offload = 0;
+ adap->params.crypto = 0;
+ }
for_each_port(adap, i)
n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
s->ethqsets = qidx;
s->max_ethqsets = qidx; /* MSI-X may lower it later */
- if (is_offload(adap)) {
+ if (is_uld(adap)) {
/*
* For offload we use 1 queue/channel if all ports are up to 1G,
* otherwise we divide all available queues amongst the channels
* capped by the number of available cores.
*/
if (n10g) {
- i = min_t(int, ARRAY_SIZE(s->iscsirxq),
- num_online_cpus());
- s->iscsiqsets = roundup(i, adap->params.nports);
- } else
- s->iscsiqsets = adap->params.nports;
- /* For RDMA one Rx queue per channel suffices */
- s->rdmaqs = adap->params.nports;
- /* Try and allow at least 1 CIQ per cpu rounding down
- * to the number of ports, with a minimum of 1 per port.
- * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port.
- * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port.
- * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port.
- */
- s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus());
- s->rdmaciqs = (s->rdmaciqs / adap->params.nports) *
- adap->params.nports;
- s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports);
-
- if (!is_t4(adap->params.chip))
- s->niscsitq = s->iscsiqsets;
+ i = num_online_cpus();
+ s->ofldqsets = roundup(i, adap->params.nports);
+ } else {
+ s->ofldqsets = adap->params.nports;
+ }
}
for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
s->ofldtxq[i].q.size = 1024;
- for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) {
- struct sge_ofld_rxq *r = &s->iscsirxq[i];
-
- init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
- r->rspq.uld = CXGB4_ULD_ISCSI;
- r->fl.size = 72;
- }
-
- if (!is_t4(adap->params.chip)) {
- for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) {
- struct sge_ofld_rxq *r = &s->iscsitrxq[i];
-
- init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
- r->rspq.uld = CXGB4_ULD_ISCSIT;
- r->fl.size = 72;
- }
- }
-
- for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
- struct sge_ofld_rxq *r = &s->rdmarxq[i];
-
- init_rspq(adap, &r->rspq, 5, 1, 511, 64);
- r->rspq.uld = CXGB4_ULD_RDMA;
- r->fl.size = 72;
- }
-
- ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
- if (ciq_size > SGE_MAX_IQ_SIZE) {
- CH_WARN(adap, "CIQ size too small for available IQs\n");
- ciq_size = SGE_MAX_IQ_SIZE;
- }
-
- for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) {
- struct sge_ofld_rxq *r = &s->rdmaciq[i];
-
- init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
- r->rspq.uld = CXGB4_ULD_RDMA;
- }
-
init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
- init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64);
+ init_rspq(adap, &s->intrq, 0, 1, 512, 64);
}
/*
}
}
+static int get_msix_info(struct adapter *adap)
+{
+ struct uld_msix_info *msix_info;
+ unsigned int max_ingq = 0;
+
+ if (is_offload(adap))
+ max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld;
+ if (is_pci_uld(adap))
+ max_ingq += MAX_OFLD_QSETS * adap->num_uld;
+
+ if (!max_ingq)
+ goto out;
+
+ msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL);
+ if (!msix_info)
+ return -ENOMEM;
+
+ adap->msix_bmap_ulds.msix_bmap = kcalloc(BITS_TO_LONGS(max_ingq),
+ sizeof(long), GFP_KERNEL);
+ if (!adap->msix_bmap_ulds.msix_bmap) {
+ kfree(msix_info);
+ return -ENOMEM;
+ }
+ spin_lock_init(&adap->msix_bmap_ulds.lock);
+ adap->msix_info_ulds = msix_info;
+out:
+ return 0;
+}
+
+static void free_msix_info(struct adapter *adap)
+{
+ if (!(adap->num_uld && adap->num_ofld_uld))
+ return;
+
+ kfree(adap->msix_info_ulds);
+ kfree(adap->msix_bmap_ulds.msix_bmap);
+}
+
/* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
#define EXTRA_VECS 2
static int enable_msix(struct adapter *adap)
{
- int ofld_need = 0;
- int i, want, need, allocated;
+ int ofld_need = 0, uld_need = 0;
+ int i, j, want, need, allocated;
struct sge *s = &adap->sge;
unsigned int nchan = adap->params.nports;
struct msix_entry *entries;
+ int max_ingq = MAX_INGQ;
- entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1),
+ if (is_pci_uld(adap))
+ max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
+ if (is_offload(adap))
+ max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld);
+ entries = kmalloc(sizeof(*entries) * (max_ingq + 1),
GFP_KERNEL);
if (!entries)
return -ENOMEM;
- for (i = 0; i < MAX_INGQ + 1; ++i)
+ /* map for msix */
+ if (get_msix_info(adap)) {
+ adap->params.offload = 0;
+ adap->params.crypto = 0;
+ }
+
+ for (i = 0; i < max_ingq + 1; ++i)
entries[i].entry = i;
want = s->max_ethqsets + EXTRA_VECS;
if (is_offload(adap)) {
- want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets +
- s->niscsitq;
- /* need nchan for each possible ULD */
- if (is_t4(adap->params.chip))
- ofld_need = 3 * nchan;
- else
- ofld_need = 4 * nchan;
+ want += adap->num_ofld_uld * s->ofldqsets;
+ ofld_need = adap->num_ofld_uld * nchan;
+ }
+ if (is_pci_uld(adap)) {
+ want += adap->num_uld * s->ofldqsets;
+ uld_need = adap->num_uld * nchan;
}
#ifdef CONFIG_CHELSIO_T4_DCB
/* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
* each port.
*/
- need = 8 * adap->params.nports + EXTRA_VECS + ofld_need;
+ need = 8 * adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
#else
- need = adap->params.nports + EXTRA_VECS + ofld_need;
+ need = adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
#endif
allocated = pci_enable_msix_range(adap->pdev, entries, need, want);
if (allocated < 0) {
* Every group gets its minimum requirement and NIC gets top
* priority for leftovers.
*/
- i = allocated - EXTRA_VECS - ofld_need;
+ i = allocated - EXTRA_VECS - ofld_need - uld_need;
if (i < s->max_ethqsets) {
s->max_ethqsets = i;
if (i < s->ethqsets)
reduce_ethqs(adap, i);
}
- if (is_offload(adap)) {
- if (allocated < want) {
- s->rdmaqs = nchan;
- s->rdmaciqs = nchan;
+ if (is_uld(adap)) {
+ if (allocated < want)
+ s->nqs_per_uld = nchan;
+ else
+ s->nqs_per_uld = s->ofldqsets;
+ }
- if (!is_t4(adap->params.chip))
- s->niscsitq = nchan;
+ for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i)
+ adap->msix_info[i].vec = entries[i].vector;
+ if (is_uld(adap)) {
+ for (j = 0 ; i < allocated; ++i, j++) {
+ adap->msix_info_ulds[j].vec = entries[i].vector;
+ adap->msix_info_ulds[j].idx = i;
}
-
- /* leftovers go to OFLD */
- i = allocated - EXTRA_VECS - s->max_ethqsets -
- s->rdmaqs - s->rdmaciqs - s->niscsitq;
- s->iscsiqsets = (i / nchan) * nchan; /* round down */
-
+ adap->msix_bmap_ulds.mapsize = j;
}
- for (i = 0; i < allocated; ++i)
- adap->msix_info[i].vec = entries[i].vector;
dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, "
- "nic %d iscsi %d rdma cpl %d rdma ciq %d\n",
- allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs,
- s->rdmaciqs);
+ "nic %d per uld %d\n",
+ allocated, s->max_ethqsets, s->nqs_per_uld);
kfree(entries);
return 0;
bufp += sprintf(bufp, "1000/");
if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
bufp += sprintf(bufp, "10G/");
+ if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G)
+ bufp += sprintf(bufp, "25G/");
if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
bufp += sprintf(bufp, "40G/");
+ if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G)
+ bufp += sprintf(bufp, "100G/");
if (bufp != buf)
--bufp;
sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
unsigned int i;
t4_free_mem(adapter->l2t);
+ t4_cleanup_sched(adapter);
t4_free_mem(adapter->tids.tid_tab);
+ cxgb4_cleanup_tc_u32(adapter);
kfree(adapter->sge.egr_map);
kfree(adapter->sge.ingr_map);
kfree(adapter->sge.starving_fl);
}
#ifdef CONFIG_PCI_IOV
+static void dummy_setup(struct net_device *dev)
+{
+ dev->type = ARPHRD_NONE;
+ dev->mtu = 0;
+ dev->hard_header_len = 0;
+ dev->addr_len = 0;
+ dev->tx_queue_len = 0;
+ dev->flags |= IFF_NOARP;
+ dev->priv_flags |= IFF_NO_QUEUE;
+
+ /* Initialize the device structure. */
+ dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
+ dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
+ dev->destructor = free_netdev;
+}
+
+static int config_mgmt_dev(struct pci_dev *pdev)
+{
+ struct adapter *adap = pci_get_drvdata(pdev);
+ struct net_device *netdev;
+ struct port_info *pi;
+ char name[IFNAMSIZ];
+ int err;
+
+ snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf);
+ netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, dummy_setup);
+ if (!netdev)
+ return -ENOMEM;
+
+ pi = netdev_priv(netdev);
+ pi->adapter = adap;
+ SET_NETDEV_DEV(netdev, &pdev->dev);
+
+ adap->port[0] = netdev;
+
+ err = register_netdev(adap->port[0]);
+ if (err) {
+ pr_info("Unable to register VF mgmt netdev %s\n", name);
+ free_netdev(adap->port[0]);
+ adap->port[0] = NULL;
+ return err;
+ }
+ return 0;
+}
+
static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
{
+ struct adapter *adap = pci_get_drvdata(pdev);
int err = 0;
int current_vfs = pci_num_vf(pdev);
u32 pcie_fw;
- void __iomem *regs;
-
- regs = pci_ioremap_bar(pdev, 0);
- if (!regs) {
- dev_err(&pdev->dev, "cannot map device registers\n");
- return -ENOMEM;
- }
- pcie_fw = readl(regs + PCIE_FW_A);
- iounmap(regs);
+ pcie_fw = readl(adap->regs + PCIE_FW_A);
/* Check if cxgb4 is the MASTER and fw is initialized */
if (!(pcie_fw & PCIE_FW_INIT_F) ||
!(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
*/
if (!num_vfs) {
pci_disable_sriov(pdev);
+ if (adap->port[0]) {
+ unregister_netdev(adap->port[0]);
+ adap->port[0] = NULL;
+ }
+ /* free VF resources */
+ kfree(adap->vfinfo);
+ adap->vfinfo = NULL;
+ adap->num_vfs = 0;
return num_vfs;
}
err = pci_enable_sriov(pdev, num_vfs);
if (err)
return err;
+
+ adap->num_vfs = num_vfs;
+ err = config_mgmt_dev(pdev);
+ if (err)
+ return err;
}
+
+ adap->vfinfo = kcalloc(adap->num_vfs,
+ sizeof(struct vf_info), GFP_KERNEL);
+ if (adap->vfinfo)
+ fill_vf_station_mac_addr(adap);
return num_vfs;
}
#endif
struct port_info *pi;
bool highdma = false;
struct adapter *adapter = NULL;
+ struct net_device *netdev;
void __iomem *regs;
u32 whoami, pl_rev;
enum chip_type chip;
+ static int adap_idx = 1;
printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ?
SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
if (func != ent->driver_data) {
+#ifndef CONFIG_PCI_IOV
iounmap(regs);
+#endif
pci_disable_device(pdev);
pci_save_state(pdev); /* to restore SR-IOV later */
goto sriov;
err = -ENOMEM;
goto out_unmap_bar0;
}
+ adap_idx++;
adapter->workq = create_singlethread_workqueue("cxgb4");
if (!adapter->workq) {
T6_STATMODE_V(0)));
for_each_port(adapter, i) {
- struct net_device *netdev;
-
netdev = alloc_etherdev_mq(sizeof(struct port_info),
MAX_ETH_QSETS);
if (!netdev) {
netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_RXCSUM | NETIF_F_RXHASH |
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_TC;
if (highdma)
netdev->hw_features |= NETIF_F_HIGHDMA;
netdev->features |= netdev->hw_features;
}
}
#endif
- if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
+
+ for_each_port(adapter, i) {
+ pi = adap2pinfo(adapter, i);
+ pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls);
+ if (!pi->sched_tbl)
+ dev_warn(&pdev->dev,
+ "could not activate scheduling on port %d\n",
+ i);
+ }
+
+ if (tid_init(&adapter->tids) < 0) {
dev_warn(&pdev->dev, "could not allocate TID table, "
"continuing\n");
adapter->params.offload = 0;
+ } else {
+ adapter->tc_u32 = cxgb4_init_tc_u32(adapter,
+ CXGB4_MAX_LINK_HANDLE);
+ if (!adapter->tc_u32)
+ dev_warn(&pdev->dev,
+ "could not offload tc u32, continuing\n");
}
if (is_offload(adapter)) {
/* See what interrupts we'll be using */
if (msi > 1 && enable_msix(adapter) == 0)
adapter->flags |= USING_MSIX;
- else if (msi > 0 && pci_enable_msi(pdev) == 0)
+ else if (msi > 0 && pci_enable_msi(pdev) == 0) {
adapter->flags |= USING_MSI;
+ if (msi > 1)
+ free_msix_info(adapter);
+ }
/* check for PCI Express bandwidth capabiltites */
cxgb4_check_pcie_caps(adapter);
/* PCIe EEH recovery on powerpc platforms needs fundamental reset */
pdev->needs_freset = 1;
- if (is_offload(adapter))
- attach_ulds(adapter);
+ if (is_uld(adapter)) {
+ mutex_lock(&uld_mutex);
+ list_add_tail(&adapter->list_node, &adapter_list);
+ mutex_unlock(&uld_mutex);
+ }
print_adapter_info(adapter);
+ setup_fw_sge_queues(adapter);
+ return 0;
sriov:
#ifdef CONFIG_PCI_IOV
"instantiated %u virtual functions\n",
num_vf[func]);
}
-#endif
+
+ adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ if (!adapter) {
+ err = -ENOMEM;
+ goto free_pci_region;
+ }
+
+ adapter->pdev = pdev;
+ adapter->pdev_dev = &pdev->dev;
+ adapter->name = pci_name(pdev);
+ adapter->mbox = func;
+ adapter->pf = func;
+ adapter->regs = regs;
+ adapter->adap_idx = adap_idx;
+ adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
+ (sizeof(struct mbox_cmd) *
+ T4_OS_LOG_MBOX_CMDS),
+ GFP_KERNEL);
+ if (!adapter->mbox_log) {
+ err = -ENOMEM;
+ goto free_adapter;
+ }
+ pci_set_drvdata(pdev, adapter);
+ return 0;
+
+ free_adapter:
+ kfree(adapter);
+ free_pci_region:
+ iounmap(regs);
+ pci_disable_sriov(pdev);
+ pci_release_regions(pdev);
+ return err;
+#else
return 0;
+#endif
out_free_dev:
free_some_resources(adapter);
+ if (adapter->flags & USING_MSIX)
+ free_msix_info(adapter);
+ if (adapter->num_uld || adapter->num_ofld_uld)
+ t4_uld_mem_free(adapter);
out_unmap_bar:
if (!is_t4(adapter->params.chip))
iounmap(adapter->bar2);
{
struct adapter *adapter = pci_get_drvdata(pdev);
-#ifdef CONFIG_PCI_IOV
- pci_disable_sriov(pdev);
-
-#endif
+ if (!adapter) {
+ pci_release_regions(pdev);
+ return;
+ }
- if (adapter) {
+ if (adapter->pf == 4) {
int i;
/* Tear down per-adapter Work Queue first since it can contain
*/
destroy_workqueue(adapter->workq);
- if (is_offload(adapter))
+ if (is_uld(adapter))
detach_ulds(adapter);
disable_interrupts(adapter);
/* If we allocated filters, free up state associated with any
* valid filters ...
*/
- if (adapter->tids.ftid_tab) {
- struct filter_entry *f = &adapter->tids.ftid_tab[0];
- for (i = 0; i < (adapter->tids.nftids +
- adapter->tids.nsftids); i++, f++)
- if (f->valid)
- clear_filter(adapter, f);
- }
+ clear_all_filters(adapter);
if (adapter->flags & FULL_INIT_DONE)
cxgb_down(adapter);
+ if (adapter->flags & USING_MSIX)
+ free_msix_info(adapter);
+ if (adapter->num_uld || adapter->num_ofld_uld)
+ t4_uld_mem_free(adapter);
free_some_resources(adapter);
#if IS_ENABLED(CONFIG_IPV6)
t4_cleanup_clip_tbl(adapter);
kfree(adapter->mbox_log);
synchronize_rcu();
kfree(adapter);
- } else
+ }
+#ifdef CONFIG_PCI_IOV
+ else {
+ if (adapter->port[0])
+ unregister_netdev(adapter->port[0]);
+ iounmap(adapter->regs);
+ kfree(adapter->vfinfo);
+ kfree(adapter);
+ pci_disable_sriov(pdev);
+ pci_release_regions(pdev);
+ }
+#endif
+}
+
+/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
+ * delivery. This is essentially a stripped down version of the PCI remove()
+ * function where we do the minimal amount of work necessary to shutdown any
+ * further activity.
+ */
+static void shutdown_one(struct pci_dev *pdev)
+{
+ struct adapter *adapter = pci_get_drvdata(pdev);
+
+ /* As with remove_one() above (see extended comment), we only want do
+ * do cleanup on PCI Devices which went all the way through init_one()
+ * ...
+ */
+ if (!adapter) {
pci_release_regions(pdev);
+ return;
+ }
+
+ if (adapter->pf == 4) {
+ int i;
+
+ for_each_port(adapter, i)
+ if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+ cxgb_close(adapter->port[i]);
+
+ t4_uld_clean_up(adapter);
+ disable_interrupts(adapter);
+ disable_msi(adapter);
+
+ t4_sge_stop(adapter);
+ if (adapter->flags & FW_OK)
+ t4_fw_bye(adapter, adapter->mbox);
+ }
+#ifdef CONFIG_PCI_IOV
+ else {
+ if (adapter->port[0])
+ unregister_netdev(adapter->port[0]);
+ iounmap(adapter->regs);
+ kfree(adapter->vfinfo);
+ kfree(adapter);
+ pci_disable_sriov(pdev);
+ pci_release_regions(pdev);
+ }
+#endif
}
static struct pci_driver cxgb4_driver = {
.id_table = cxgb4_pci_tbl,
.probe = init_one,
.remove = remove_one,
- .shutdown = remove_one,
+ .shutdown = shutdown_one,
#ifdef CONFIG_PCI_IOV
.sriov_configure = cxgb4_iov_configure,
#endif
/*
* This file is part of the Chelsio T4 Ethernet driver for Linux.
*
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
out:
vfree(vpd);
- return ret;
+ return ret < 0 ? ret : 0;
}
/**
}
#define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
- FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
+ FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
+ FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
FW_PORT_CAP_ANEG)
/**
speed = 1000;
else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
speed = 10000;
+ else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+ speed = 25000;
else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
speed = 40000;
+ else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+ speed = 100000;
lc = &pi->link_cfg;
t4_sge_decode_idma_state(adapter, idma->idma_state[i]);
}
}
+
+/**
+ * t4_set_vf_mac - Set MAC address for the specified VF
+ * @adapter: The adapter
+ * @vf: one of the VFs instantiated by the specified PF
+ * @naddr: the number of MAC addresses
+ * @addr: the MAC address(es) to be set to the specified VF
+ */
+int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+ unsigned int naddr, u8 *addr)
+{
+ struct fw_acl_mac_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+ FW_CMD_REQUEST_F |
+ FW_CMD_WRITE_F |
+ FW_ACL_MAC_CMD_PFN_V(adapter->pf) |
+ FW_ACL_MAC_CMD_VFN_V(vf));
+
+ /* Note: Do not enable the ACL */
+ cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+ cmd.nmac = naddr;
+
+ switch (adapter->pf) {
+ case 3:
+ memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3));
+ break;
+ case 2:
+ memcpy(cmd.macaddr2, addr, sizeof(cmd.macaddr2));
+ break;
+ case 1:
+ memcpy(cmd.macaddr1, addr, sizeof(cmd.macaddr1));
+ break;
+ case 0:
+ memcpy(cmd.macaddr0, addr, sizeof(cmd.macaddr0));
+ break;
+ }
+
+ return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd);
+}
+
+int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
+ int rateunit, int ratemode, int channel, int class,
+ int minrate, int maxrate, int weight, int pktsize)
+{
+ struct fw_sched_cmd cmd;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.op_to_write = cpu_to_be32(FW_CMD_OP_V(FW_SCHED_CMD) |
+ FW_CMD_REQUEST_F |
+ FW_CMD_WRITE_F);
+ cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
+
+ cmd.u.params.sc = FW_SCHED_SC_PARAMS;
+ cmd.u.params.type = type;
+ cmd.u.params.level = level;
+ cmd.u.params.mode = mode;
+ cmd.u.params.ch = channel;
+ cmd.u.params.cl = class;
+ cmd.u.params.unit = rateunit;
+ cmd.u.params.rate = ratemode;
+ cmd.u.params.min = cpu_to_be32(minrate);
+ cmd.u.params.max = cpu_to_be32(maxrate);
+ cmd.u.params.weight = cpu_to_be16(weight);
+ cmd.u.params.pktsize = cpu_to_be16(pktsize);
+
+ return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
+ NULL, 1);
+}
/*
* This file is part of the Chelsio T4 Ethernet driver for Linux.
*
- * Copyright (c) 2009-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2009-2016 Chelsio Communications, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
FW_RI_FR_NSMR_WR = 0x19,
FW_RI_INV_LSTAG_WR = 0x1a,
FW_ISCSI_TX_DATA_WR = 0x45,
+ FW_CRYPTO_LOOKASIDE_WR = 0X6d,
FW_LASTC2E_WR = 0x70
};
FW_RSS_IND_TBL_CMD = 0x20,
FW_RSS_GLB_CONFIG_CMD = 0x22,
FW_RSS_VI_CONFIG_CMD = 0x23,
+ FW_SCHED_CMD = 0x24,
FW_DEVLOG_CMD = 0x25,
FW_CLIP_CMD = 0x28,
FW_LASTC2E_CMD = 0x40,
__be16 niccaps;
__be16 ofldcaps;
__be16 rdmacaps;
- __be16 r4;
+ __be16 cryptocaps;
__be16 iscsicaps;
__be16 fcoecaps;
__be32 cfcsum;
FW_PORT_CAP_802_3_ASM_DIR = 0x8000,
};
+ #define FW_PORT_CAP_SPEED_S 0
+ #define FW_PORT_CAP_SPEED_M 0x3f
+ #define FW_PORT_CAP_SPEED_V(x) ((x) << FW_PORT_CAP_SPEED_S)
+ #define FW_PORT_CAP_SPEED_G(x) \
+ (((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M)
+
enum fw_port_mdi {
FW_PORT_CAP_MDI_UNCHANGED,
FW_PORT_CAP_MDI_AUTO,
#define FW_RSS_VI_CONFIG_CMD_UDPEN_V(x) ((x) << FW_RSS_VI_CONFIG_CMD_UDPEN_S)
#define FW_RSS_VI_CONFIG_CMD_UDPEN_F FW_RSS_VI_CONFIG_CMD_UDPEN_V(1U)
+enum fw_sched_sc {
+ FW_SCHED_SC_PARAMS = 1,
+};
+
+struct fw_sched_cmd {
+ __be32 op_to_write;
+ __be32 retval_len16;
+ union fw_sched {
+ struct fw_sched_config {
+ __u8 sc;
+ __u8 type;
+ __u8 minmaxen;
+ __u8 r3[5];
+ __u8 nclasses[4];
+ __be32 r4;
+ } config;
+ struct fw_sched_params {
+ __u8 sc;
+ __u8 type;
+ __u8 level;
+ __u8 mode;
+ __u8 unit;
+ __u8 rate;
+ __u8 ch;
+ __u8 cl;
+ __be32 min;
+ __be32 max;
+ __be16 weight;
+ __be16 pktsize;
+ __be16 burstsize;
+ __be16 r4;
+ } params;
+ } u;
+};
+
struct fw_clip_cmd {
__be32 op_to_write;
__be32 alloc_to_len16;
#define PCIE_FW_PF_DEVLOG_MEMTYPE_G(x) \
(((x) >> PCIE_FW_PF_DEVLOG_MEMTYPE_S) & PCIE_FW_PF_DEVLOG_MEMTYPE_M)
+#define MAX_IMM_OFLD_TX_DATA_WR_LEN (0xff + sizeof(struct fw_ofld_tx_data_wr))
+
+struct fw_crypto_lookaside_wr {
+ __be32 op_to_cctx_size;
+ __be32 len16_pkd;
+ __be32 session_id;
+ __be32 rx_chid_to_rx_q_id;
+ __be32 key_addr;
+ __be32 pld_size_hash_size;
+ __be64 cookie;
+};
+
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_S 24
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_OPCODE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_OPCODE_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_OPCODE_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_S 23
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_M 0x1
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_COMPL_S)
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_COMPL_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_COMPL_M)
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_F FW_CRYPTO_LOOKASIDE_WR_COMPL_V(1U)
+
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S 15
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S)
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S 5
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S)
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S 0
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M 0x1f
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_S 0
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_LEN16_S)
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_LEN16_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_LEN16_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S 29
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S)
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_S 27
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_LCB_S)
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_LCB_S) & FW_CRYPTO_LOOKASIDE_WR_LCB_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_S 25
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_PHASH_S)
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_PHASH_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_PHASH_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_IV_S 23
+#define FW_CRYPTO_LOOKASIDE_WR_IV_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_IV_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_IV_S)
+#define FW_CRYPTO_LOOKASIDE_WR_IV_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_IV_S) & FW_CRYPTO_LOOKASIDE_WR_IV_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_S 10
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_TX_CH_S)
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_TX_CH_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_TX_CH_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S 0
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M 0x3ff
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S)
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S 24
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S 17
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M 0x7f
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(x) \
+ ((x) << FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_G(x) \
+ (((x) >> FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S) & \
+ FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M)
+
#endif /* _T4FW_INTERFACE_H_ */
unsigned int supported; /* link capabilities */
unsigned int advertising; /* advertised capabilities */
unsigned short lp_advertising; /* peer advertised capabilities */
- unsigned short requested_speed; /* speed user has requested */
- unsigned short speed; /* actual link speed */
+ unsigned int requested_speed; /* speed user has requested */
+ unsigned int speed; /* actual link speed */
unsigned char requested_fc; /* flow control user has requested */
unsigned char fc; /* actual link flow control */
unsigned char autoneg; /* autonegotiating? */
return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
}
+ /* Return true if the Link Configuration supports "High Speeds" (those greater
+ * than 1Gb/s).
+ */
static inline bool is_x_10g_port(const struct link_config *lc)
{
- return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
- (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
+ unsigned int speeds, high_speeds;
+
+ speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
+ high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
+
+ return high_speeds != 0;
}
static inline unsigned int core_ticks_per_usec(const struct adapter *adapter)
u64 *pbar2_qoffset,
unsigned int *pbar2_qid);
+unsigned int t4vf_get_pf_from_vf(struct adapter *);
int t4vf_get_sge_params(struct adapter *);
int t4vf_get_vpd_params(struct adapter *);
int t4vf_get_dev_params(struct adapter *);
int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
int t4vf_prep_adapter(struct adapter *);
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+ unsigned int *naddr, u8 *addr);
#endif /* __T4VF_COMMON_H__ */
}
#define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
- FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
- FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG)
+ FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
+ FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
+ FW_PORT_CAP_ANEG)
/**
* init_link_config - initialize a link's SW state
return 0;
}
+unsigned int t4vf_get_pf_from_vf(struct adapter *adapter)
+{
+ u32 whoami;
+
+ whoami = t4_read_reg(adapter, T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
+ return (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+ SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami));
+}
+
/**
* t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters
* @adapter: the adapter
* read.
*/
if (!is_t4(adapter->params.chip)) {
- u32 whoami;
unsigned int pf, s_hps, s_qpp;
params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_REG) |
* register we just read. Do it once here so other code in
* the driver can just use it.
*/
- whoami = t4_read_reg(adapter,
- T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
- pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
- SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
-
+ pf = t4vf_get_pf_from_vf(adapter);
s_hps = (HOSTPAGESIZEPF0_S +
(HOSTPAGESIZEPF1_S - HOSTPAGESIZEPF0_S) * pf);
sge_params->sge_vf_hps =
speed = 1000;
else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
speed = 10000;
+ else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+ speed = 25000;
else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
speed = 40000;
+ else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+ speed = 100000;
/*
* Scan all of our "ports" (Virtual Interfaces) looking for
return 0;
}
+
+/**
+ * t4vf_get_vf_mac_acl - Get the MAC address to be set to
+ * the VI of this VF.
+ * @adapter: The adapter
+ * @pf: The pf associated with vf
+ * @naddr: the number of ACL MAC addresses returned in addr
+ * @addr: Placeholder for MAC addresses
+ *
+ * Find the MAC address to be set to the VF's VI. The requested MAC address
+ * is from the host OS via callback in the PF driver.
+ */
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+ unsigned int *naddr, u8 *addr)
+{
+ struct fw_acl_mac_cmd cmd;
+ int ret;
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+ FW_CMD_REQUEST_F |
+ FW_CMD_READ_F);
+ cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+ ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd);
+ if (ret)
+ return ret;
+
+ if (cmd.nmac < *naddr)
+ *naddr = cmd.nmac;
+
+ switch (pf) {
+ case 3:
+ memcpy(addr, cmd.macaddr3, sizeof(cmd.macaddr3));
+ break;
+ case 2:
+ memcpy(addr, cmd.macaddr2, sizeof(cmd.macaddr2));
+ break;
+ case 1:
+ memcpy(addr, cmd.macaddr1, sizeof(cmd.macaddr1));
+ break;
+ case 0:
+ memcpy(addr, cmd.macaddr0, sizeof(cmd.macaddr0));
+ break;
+ }
+
+ return ret;
+}
dev->mcast_pending = 1;
return;
}
+
+ mutex_lock(&dev->link_lock);
__emac_set_multicast_list(dev);
+ mutex_unlock(&dev->link_lock);
+ }
+
+ static int emac_set_mac_address(struct net_device *ndev, void *sa)
+ {
+ struct emac_instance *dev = netdev_priv(ndev);
+ struct sockaddr *addr = sa;
+ struct emac_regs __iomem *p = dev->emacp;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ mutex_lock(&dev->link_lock);
+
+ memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
+
+ emac_rx_disable(dev);
+ emac_tx_disable(dev);
+ out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]);
+ out_be32(&p->ialr, (ndev->dev_addr[2] << 24) |
+ (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) |
+ ndev->dev_addr[5]);
+ emac_tx_enable(dev);
+ emac_rx_enable(dev);
+
+ mutex_unlock(&dev->link_lock);
+
+ return 0;
}
static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
.ndo_do_ioctl = emac_ioctl,
.ndo_tx_timeout = emac_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = eth_mac_addr,
+ .ndo_set_mac_address = emac_set_mac_address,
.ndo_start_xmit = emac_start_xmit,
.ndo_change_mtu = eth_change_mtu,
};
.ndo_do_ioctl = emac_ioctl,
.ndo_tx_timeout = emac_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
- .ndo_set_mac_address = eth_mac_addr,
+ .ndo_set_mac_address = emac_set_mac_address,
.ndo_start_xmit = emac_start_xmit_sg,
.ndo_change_mtu = emac_change_mtu,
};
/* Get interrupts. EMAC irq is mandatory, WOL irq is optional */
dev->emac_irq = irq_of_parse_and_map(np, 0);
dev->wol_irq = irq_of_parse_and_map(np, 1);
- if (dev->emac_irq == NO_IRQ) {
+ if (!dev->emac_irq) {
printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name);
goto err_free;
}
err_reg_unmap:
iounmap(dev->emacp);
err_irq_unmap:
- if (dev->wol_irq != NO_IRQ)
+ if (dev->wol_irq)
irq_dispose_mapping(dev->wol_irq);
- if (dev->emac_irq != NO_IRQ)
+ if (dev->emac_irq)
irq_dispose_mapping(dev->emac_irq);
err_free:
free_netdev(ndev);
emac_dbg_unregister(dev);
iounmap(dev->emacp);
- if (dev->wol_irq != NO_IRQ)
+ if (dev->wol_irq)
irq_dispose_mapping(dev->wol_irq);
- if (dev->emac_irq != NO_IRQ)
+ if (dev->emac_irq)
irq_dispose_mapping(dev->emac_irq);
free_netdev(dev->ndev);
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
#include <linux/clk.h>
+#include <linux/pm_runtime.h>
#include <linux/if_vlan.h>
#include <linux/reset.h>
#include <linux/tcp.h>
};
static const char * const mtk_clks_source_name[] = {
- "ethif", "esw", "gp1", "gp2"
+ "ethif", "esw", "gp1", "gp2", "trgpll"
};
void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
return _mtk_mdio_read(eth, phy_addr, phy_reg);
}
+static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed)
+{
+ u32 val;
+ int ret;
+
+ val = (speed == SPEED_1000) ?
+ INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100;
+ mtk_w32(eth, val, INTF_MODE);
+
+ regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0,
+ ETHSYS_TRGMII_CLK_SEL362_5,
+ ETHSYS_TRGMII_CLK_SEL362_5);
+
+ val = (speed == SPEED_1000) ? 250000000 : 500000000;
+ ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val);
+ if (ret)
+ dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret);
+
+ val = (speed == SPEED_1000) ?
+ RCK_CTRL_RGMII_1000 : RCK_CTRL_RGMII_10_100;
+ mtk_w32(eth, val, TRGMII_RCK_CTRL);
+
+ val = (speed == SPEED_1000) ?
+ TCK_CTRL_RGMII_1000 : TCK_CTRL_RGMII_10_100;
+ mtk_w32(eth, val, TRGMII_TCK_CTRL);
+}
+
static void mtk_phy_link_adjust(struct net_device *dev)
{
struct mtk_mac *mac = netdev_priv(dev);
MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN |
MAC_MCR_BACKPR_EN;
- switch (mac->phy_dev->speed) {
+ if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ return;
+
+ switch (dev->phydev->speed) {
case SPEED_1000:
mcr |= MAC_MCR_SPEED_1000;
break;
break;
};
- if (mac->phy_dev->link)
+ if (mac->id == 0 && !mac->trgmii)
+ mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed);
+
+ if (dev->phydev->link)
mcr |= MAC_MCR_FORCE_LINK;
- if (mac->phy_dev->duplex) {
+ if (dev->phydev->duplex) {
mcr |= MAC_MCR_FORCE_DPX;
- if (mac->phy_dev->pause)
+ if (dev->phydev->pause)
rmt_adv = LPA_PAUSE_CAP;
- if (mac->phy_dev->asym_pause)
+ if (dev->phydev->asym_pause)
rmt_adv |= LPA_PAUSE_ASYM;
- if (mac->phy_dev->advertising & ADVERTISED_Pause)
+ if (dev->phydev->advertising & ADVERTISED_Pause)
lcl_adv |= ADVERTISE_PAUSE_CAP;
- if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause)
+ if (dev->phydev->advertising & ADVERTISED_Asym_Pause)
lcl_adv |= ADVERTISE_PAUSE_ASYM;
flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
- if (mac->phy_dev->link)
+ if (dev->phydev->link)
netif_carrier_on(dev);
else
netif_carrier_off(dev);
static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac,
struct device_node *phy_node)
{
- const __be32 *_addr = NULL;
struct phy_device *phydev;
- int phy_mode, addr;
-
- _addr = of_get_property(phy_node, "reg", NULL);
+ int phy_mode;
- if (!_addr || (be32_to_cpu(*_addr) >= 0x20)) {
- pr_err("%s: invalid phy address\n", phy_node->name);
- return -EINVAL;
- }
- addr = be32_to_cpu(*_addr);
phy_mode = of_get_phy_mode(phy_node);
if (phy_mode < 0) {
dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode);
mac->id, phydev_name(phydev), phydev->phy_id,
phydev->drv->name);
- mac->phy_dev = phydev;
-
return 0;
}
-static int mtk_phy_connect(struct mtk_mac *mac)
+static int mtk_phy_connect(struct net_device *dev)
{
- struct mtk_eth *eth = mac->hw;
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth;
struct device_node *np;
- u32 val, ge_mode;
+ u32 val;
+ eth = mac->hw;
np = of_parse_phandle(mac->of_node, "phy-handle", 0);
if (!np && of_phy_is_fixed_link(mac->of_node))
if (!of_phy_register_fixed_link(mac->of_node))
return -ENODEV;
switch (of_get_phy_mode(np)) {
+ case PHY_INTERFACE_MODE_TRGMII:
+ mac->trgmii = true;
case PHY_INTERFACE_MODE_RGMII_TXID:
case PHY_INTERFACE_MODE_RGMII_RXID:
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII:
- ge_mode = 0;
+ mac->ge_mode = 0;
break;
case PHY_INTERFACE_MODE_MII:
- ge_mode = 1;
+ mac->ge_mode = 1;
break;
case PHY_INTERFACE_MODE_REVMII:
- ge_mode = 2;
+ mac->ge_mode = 2;
break;
case PHY_INTERFACE_MODE_RMII:
if (!mac->id)
goto err_phy;
- ge_mode = 3;
+ mac->ge_mode = 3;
break;
default:
goto err_phy;
/* put the gmac into the right mode */
regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
- val |= SYSCFG0_GE_MODE(ge_mode, mac->id);
+ val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id);
regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
- mtk_phy_connect_node(eth, mac, np);
- mac->phy_dev->autoneg = AUTONEG_ENABLE;
- mac->phy_dev->speed = 0;
- mac->phy_dev->duplex = 0;
+ /* couple phydev to net_device */
+ if (mtk_phy_connect_node(eth, mac, np))
+ goto err_phy;
+
+ dev->phydev->autoneg = AUTONEG_ENABLE;
+ dev->phydev->speed = 0;
+ dev->phydev->duplex = 0;
if (of_phy_is_fixed_link(mac->of_node))
- mac->phy_dev->supported |=
+ dev->phydev->supported |=
SUPPORTED_Pause | SUPPORTED_Asym_Pause;
- mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
+ dev->phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
SUPPORTED_Asym_Pause;
- mac->phy_dev->advertising = mac->phy_dev->supported |
+ dev->phydev->advertising = dev->phydev->supported |
ADVERTISED_Autoneg;
- phy_start_aneg(mac->phy_dev);
+ phy_start_aneg(dev->phydev);
of_node_put(np);
err_phy:
of_node_put(np);
- dev_err(eth->dev, "invalid phy_mode\n");
+ dev_err(eth->dev, "%s: invalid phy\n", __func__);
return -EINVAL;
}
mdiobus_unregister(eth->mii_bus);
}
-static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
+static inline void mtk_irq_disable(struct mtk_eth *eth,
+ unsigned reg, u32 mask)
{
unsigned long flags;
u32 val;
spin_lock_irqsave(ð->irq_lock, flags);
- val = mtk_r32(eth, MTK_QDMA_INT_MASK);
- mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
+ val = mtk_r32(eth, reg);
+ mtk_w32(eth, val & ~mask, reg);
spin_unlock_irqrestore(ð->irq_lock, flags);
}
-static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
+static inline void mtk_irq_enable(struct mtk_eth *eth,
+ unsigned reg, u32 mask)
{
unsigned long flags;
u32 val;
spin_lock_irqsave(ð->irq_lock, flags);
- val = mtk_r32(eth, MTK_QDMA_INT_MASK);
- mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
+ val = mtk_r32(eth, reg);
+ mtk_w32(eth, val | mask, reg);
spin_unlock_irqrestore(ð->irq_lock, flags);
}
int ret = eth_mac_addr(dev, p);
struct mtk_mac *mac = netdev_priv(dev);
const char *macaddr = dev->dev_addr;
- unsigned long flags;
if (ret)
return ret;
- spin_lock_irqsave(&mac->hw->page_lock, flags);
+ if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ return -EBUSY;
+
+ spin_lock_bh(&mac->hw->page_lock);
mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
MTK_GDMA_MAC_ADRH(mac->id));
mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
(macaddr[4] << 8) | macaddr[5],
MTK_GDMA_MAC_ADRL(mac->id));
- spin_unlock_irqrestore(&mac->hw->page_lock, flags);
+ spin_unlock_bh(&mac->hw->page_lock);
return 0;
}
struct mtk_eth *eth = mac->hw;
struct mtk_tx_ring *ring = ð->tx_ring;
struct net_device_stats *stats = &dev->stats;
- unsigned long flags;
bool gso = false;
int tx_num;
* however we have 2 queues running on the same ring so we need to lock
* the ring access
*/
- spin_lock_irqsave(ð->page_lock, flags);
+ spin_lock(ð->page_lock);
+
+ if (unlikely(test_bit(MTK_RESETTING, ð->state)))
+ goto drop;
tx_num = mtk_cal_txd_req(skb);
if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
mtk_stop_queue(eth);
netif_err(eth, tx_queued, dev,
"Tx Ring full when queue awake!\n");
- spin_unlock_irqrestore(ð->page_lock, flags);
+ spin_unlock(ð->page_lock);
return NETDEV_TX_BUSY;
}
if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
mtk_stop_queue(eth);
- spin_unlock_irqrestore(ð->page_lock, flags);
+ spin_unlock(ð->page_lock);
return NETDEV_TX_OK;
drop:
- spin_unlock_irqrestore(ð->page_lock, flags);
+ spin_unlock(ð->page_lock);
stats->tx_dropped++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
+static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth)
+{
+ int i;
+ struct mtk_rx_ring *ring;
+ int idx;
+
+ if (!eth->hwlro)
+ return ð->rx_ring[0];
+
+ for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
+ ring = ð->rx_ring[i];
+ idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
+ if (ring->dma[idx].rxd2 & RX_DMA_DONE) {
+ ring->calc_idx_update = true;
+ return ring;
+ }
+ }
+
+ return NULL;
+}
+
+static void mtk_update_rx_cpu_idx(struct mtk_eth *eth)
+{
+ struct mtk_rx_ring *ring;
+ int i;
+
+ if (!eth->hwlro) {
+ ring = ð->rx_ring[0];
+ mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+ } else {
+ for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
+ ring = ð->rx_ring[i];
+ if (ring->calc_idx_update) {
+ ring->calc_idx_update = false;
+ mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+ }
+ }
+ }
+}
+
static int mtk_poll_rx(struct napi_struct *napi, int budget,
struct mtk_eth *eth)
{
- struct mtk_rx_ring *ring = ð->rx_ring;
- int idx = ring->calc_idx;
+ struct mtk_rx_ring *ring;
+ int idx;
struct sk_buff *skb;
u8 *data, *new_data;
struct mtk_rx_dma *rxd, trxd;
dma_addr_t dma_addr;
int mac = 0;
- idx = NEXT_RX_DESP_IDX(idx);
+ ring = mtk_get_rx_ring(eth);
+ if (unlikely(!ring))
+ goto rx_done;
+
+ idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
rxd = &ring->dma[idx];
data = ring->data[idx];
netdev = eth->netdev[mac];
+ if (unlikely(test_bit(MTK_RESETTING, ð->state)))
+ goto release_desc;
+
/* alloc new buffer */
new_data = napi_alloc_frag(ring->frag_size);
if (unlikely(!new_data)) {
rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
ring->calc_idx = idx;
+
+ done++;
+ }
+
+rx_done:
+ if (done) {
/* make sure that all changes to the dma ring are flushed before
* we continue
*/
wmb();
- mtk_w32(eth, ring->calc_idx, MTK_QRX_CRX_IDX0);
- done++;
+ mtk_update_rx_cpu_idx(eth);
}
- if (done < budget)
- mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
-
return done;
}
return budget;
napi_complete(napi);
- mtk_irq_enable(eth, MTK_TX_DONE_INT);
+ mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
return tx_done;
}
struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
u32 status, mask;
int rx_done = 0;
+ int remain_budget = budget;
mtk_handle_status_irq(eth);
- mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
- rx_done = mtk_poll_rx(napi, budget, eth);
+
+poll_again:
+ mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
+ rx_done = mtk_poll_rx(napi, remain_budget, eth);
if (unlikely(netif_msg_intr(eth))) {
- status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
- mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+ status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
+ mask = mtk_r32(eth, MTK_PDMA_INT_MASK);
dev_info(eth->dev,
"done rx %d, intr 0x%08x/0x%x\n",
rx_done, status, mask);
}
-
- if (rx_done == budget)
- return budget;
-
- status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
- if (status & MTK_RX_DONE_INT)
+ if (rx_done == remain_budget)
return budget;
+ status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
+ if (status & MTK_RX_DONE_INT) {
+ remain_budget -= rx_done;
+ goto poll_again;
+ }
napi_complete(napi);
- mtk_irq_enable(eth, MTK_RX_DONE_INT);
+ mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
- return rx_done;
+ return rx_done + budget - remain_budget;
}
static int mtk_tx_alloc(struct mtk_eth *eth)
mtk_w32(eth,
ring->phys + ((MTK_DMA_SIZE - 1) * sz),
MTK_QTX_DRX_PTR);
+ mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
return 0;
}
}
-static int mtk_rx_alloc(struct mtk_eth *eth)
+static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
{
- struct mtk_rx_ring *ring = ð->rx_ring;
+ struct mtk_rx_ring *ring = ð->rx_ring[ring_no];
+ int rx_data_len, rx_dma_size;
int i;
- ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN);
+ if (rx_flag == MTK_RX_FLAGS_HWLRO) {
+ rx_data_len = MTK_MAX_LRO_RX_LENGTH;
+ rx_dma_size = MTK_HW_LRO_DMA_SIZE;
+ } else {
+ rx_data_len = ETH_DATA_LEN;
+ rx_dma_size = MTK_DMA_SIZE;
+ }
+
+ ring->frag_size = mtk_max_frag_size(rx_data_len);
ring->buf_size = mtk_max_buf_size(ring->frag_size);
- ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data),
+ ring->data = kcalloc(rx_dma_size, sizeof(*ring->data),
GFP_KERNEL);
if (!ring->data)
return -ENOMEM;
- for (i = 0; i < MTK_DMA_SIZE; i++) {
+ for (i = 0; i < rx_dma_size; i++) {
ring->data[i] = netdev_alloc_frag(ring->frag_size);
if (!ring->data[i])
return -ENOMEM;
}
ring->dma = dma_alloc_coherent(eth->dev,
- MTK_DMA_SIZE * sizeof(*ring->dma),
+ rx_dma_size * sizeof(*ring->dma),
&ring->phys,
GFP_ATOMIC | __GFP_ZERO);
if (!ring->dma)
return -ENOMEM;
- for (i = 0; i < MTK_DMA_SIZE; i++) {
+ for (i = 0; i < rx_dma_size; i++) {
dma_addr_t dma_addr = dma_map_single(eth->dev,
ring->data[i] + NET_SKB_PAD,
ring->buf_size,
ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
}
- ring->calc_idx = MTK_DMA_SIZE - 1;
+ ring->dma_size = rx_dma_size;
+ ring->calc_idx_update = false;
+ ring->calc_idx = rx_dma_size - 1;
+ ring->crx_idx_reg = MTK_PRX_CRX_IDX_CFG(ring_no);
/* make sure that all changes to the dma ring are flushed before we
* continue
*/
wmb();
- mtk_w32(eth, eth->rx_ring.phys, MTK_QRX_BASE_PTR0);
- mtk_w32(eth, MTK_DMA_SIZE, MTK_QRX_MAX_CNT0);
- mtk_w32(eth, eth->rx_ring.calc_idx, MTK_QRX_CRX_IDX0);
- mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX);
- mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
+ mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no));
+ mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no));
+ mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+ mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX);
return 0;
}
-static void mtk_rx_clean(struct mtk_eth *eth)
+static void mtk_rx_clean(struct mtk_eth *eth, int ring_no)
{
- struct mtk_rx_ring *ring = ð->rx_ring;
+ struct mtk_rx_ring *ring = ð->rx_ring[ring_no];
int i;
if (ring->data && ring->dma) {
- for (i = 0; i < MTK_DMA_SIZE; i++) {
+ for (i = 0; i < ring->dma_size; i++) {
if (!ring->data[i])
continue;
if (!ring->dma[i].rxd1)
if (ring->dma) {
dma_free_coherent(eth->dev,
- MTK_DMA_SIZE * sizeof(*ring->dma),
+ ring->dma_size * sizeof(*ring->dma),
ring->dma,
ring->phys);
ring->dma = NULL;
}
}
+static int mtk_hwlro_rx_init(struct mtk_eth *eth)
+{
+ int i;
+ u32 ring_ctrl_dw1 = 0, ring_ctrl_dw2 = 0, ring_ctrl_dw3 = 0;
+ u32 lro_ctrl_dw0 = 0, lro_ctrl_dw3 = 0;
+
+ /* set LRO rings to auto-learn modes */
+ ring_ctrl_dw2 |= MTK_RING_AUTO_LERAN_MODE;
+
+ /* validate LRO ring */
+ ring_ctrl_dw2 |= MTK_RING_VLD;
+
+ /* set AGE timer (unit: 20us) */
+ ring_ctrl_dw2 |= MTK_RING_AGE_TIME_H;
+ ring_ctrl_dw1 |= MTK_RING_AGE_TIME_L;
+
+ /* set max AGG timer (unit: 20us) */
+ ring_ctrl_dw2 |= MTK_RING_MAX_AGG_TIME;
+
+ /* set max LRO AGG count */
+ ring_ctrl_dw2 |= MTK_RING_MAX_AGG_CNT_L;
+ ring_ctrl_dw3 |= MTK_RING_MAX_AGG_CNT_H;
+
+ for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) {
+ mtk_w32(eth, ring_ctrl_dw1, MTK_LRO_CTRL_DW1_CFG(i));
+ mtk_w32(eth, ring_ctrl_dw2, MTK_LRO_CTRL_DW2_CFG(i));
+ mtk_w32(eth, ring_ctrl_dw3, MTK_LRO_CTRL_DW3_CFG(i));
+ }
+
+ /* IPv4 checksum update enable */
+ lro_ctrl_dw0 |= MTK_L3_CKS_UPD_EN;
+
+ /* switch priority comparison to packet count mode */
+ lro_ctrl_dw0 |= MTK_LRO_ALT_PKT_CNT_MODE;
+
+ /* bandwidth threshold setting */
+ mtk_w32(eth, MTK_HW_LRO_BW_THRE, MTK_PDMA_LRO_CTRL_DW2);
+
+ /* auto-learn score delta setting */
+ mtk_w32(eth, MTK_HW_LRO_REPLACE_DELTA, MTK_PDMA_LRO_ALT_SCORE_DELTA);
+
+ /* set refresh timer for altering flows to 1 sec. (unit: 20us) */
+ mtk_w32(eth, (MTK_HW_LRO_TIMER_UNIT << 16) | MTK_HW_LRO_REFRESH_TIME,
+ MTK_PDMA_LRO_ALT_REFRESH_TIMER);
+
+ /* set HW LRO mode & the max aggregation count for rx packets */
+ lro_ctrl_dw3 |= MTK_ADMA_MODE | (MTK_HW_LRO_MAX_AGG_CNT & 0xff);
+
+ /* the minimal remaining room of SDL0 in RXD for lro aggregation */
+ lro_ctrl_dw3 |= MTK_LRO_MIN_RXD_SDL;
+
+ /* enable HW LRO */
+ lro_ctrl_dw0 |= MTK_LRO_EN;
+
+ mtk_w32(eth, lro_ctrl_dw3, MTK_PDMA_LRO_CTRL_DW3);
+ mtk_w32(eth, lro_ctrl_dw0, MTK_PDMA_LRO_CTRL_DW0);
+
+ return 0;
+}
+
+static void mtk_hwlro_rx_uninit(struct mtk_eth *eth)
+{
+ int i;
+ u32 val;
+
+ /* relinquish lro rings, flush aggregated packets */
+ mtk_w32(eth, MTK_LRO_RING_RELINQUISH_REQ, MTK_PDMA_LRO_CTRL_DW0);
+
+ /* wait for relinquishments done */
+ for (i = 0; i < 10; i++) {
+ val = mtk_r32(eth, MTK_PDMA_LRO_CTRL_DW0);
+ if (val & MTK_LRO_RING_RELINQUISH_DONE) {
+ msleep(20);
+ continue;
+ }
+ }
+
+ /* invalidate lro rings */
+ for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
+ mtk_w32(eth, 0, MTK_LRO_CTRL_DW2_CFG(i));
+
+ /* disable HW LRO */
+ mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0);
+}
+
+static void mtk_hwlro_val_ipaddr(struct mtk_eth *eth, int idx, __be32 ip)
+{
+ u32 reg_val;
+
+ reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx));
+
+ /* invalidate the IP setting */
+ mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx));
+
+ mtk_w32(eth, ip, MTK_LRO_DIP_DW0_CFG(idx));
+
+ /* validate the IP setting */
+ mtk_w32(eth, (reg_val | MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx));
+}
+
+static void mtk_hwlro_inval_ipaddr(struct mtk_eth *eth, int idx)
+{
+ u32 reg_val;
+
+ reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx));
+
+ /* invalidate the IP setting */
+ mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx));
+
+ mtk_w32(eth, 0, MTK_LRO_DIP_DW0_CFG(idx));
+}
+
+static int mtk_hwlro_get_ip_cnt(struct mtk_mac *mac)
+{
+ int cnt = 0;
+ int i;
+
+ for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+ if (mac->hwlro_ip[i])
+ cnt++;
+ }
+
+ return cnt;
+}
+
+static int mtk_hwlro_add_ipaddr(struct net_device *dev,
+ struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ int hwlro_idx;
+
+ if ((fsp->flow_type != TCP_V4_FLOW) ||
+ (!fsp->h_u.tcp_ip4_spec.ip4dst) ||
+ (fsp->location > 1))
+ return -EINVAL;
+
+ mac->hwlro_ip[fsp->location] = htonl(fsp->h_u.tcp_ip4_spec.ip4dst);
+ hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location;
+
+ mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac);
+
+ mtk_hwlro_val_ipaddr(eth, hwlro_idx, mac->hwlro_ip[fsp->location]);
+
+ return 0;
+}
+
+static int mtk_hwlro_del_ipaddr(struct net_device *dev,
+ struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ int hwlro_idx;
+
+ if (fsp->location > 1)
+ return -EINVAL;
+
+ mac->hwlro_ip[fsp->location] = 0;
+ hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location;
+
+ mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac);
+
+ mtk_hwlro_inval_ipaddr(eth, hwlro_idx);
+
+ return 0;
+}
+
+static void mtk_hwlro_netdev_disable(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ int i, hwlro_idx;
+
+ for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+ mac->hwlro_ip[i] = 0;
+ hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + i;
+
+ mtk_hwlro_inval_ipaddr(eth, hwlro_idx);
+ }
+
+ mac->hwlro_ip_cnt = 0;
+}
+
+static int mtk_hwlro_get_fdir_entry(struct net_device *dev,
+ struct ethtool_rxnfc *cmd)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+ /* only tcp dst ipv4 is meaningful, others are meaningless */
+ fsp->flow_type = TCP_V4_FLOW;
+ fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]);
+ fsp->m_u.tcp_ip4_spec.ip4dst = 0;
+
+ fsp->h_u.tcp_ip4_spec.ip4src = 0;
+ fsp->m_u.tcp_ip4_spec.ip4src = 0xffffffff;
+ fsp->h_u.tcp_ip4_spec.psrc = 0;
+ fsp->m_u.tcp_ip4_spec.psrc = 0xffff;
+ fsp->h_u.tcp_ip4_spec.pdst = 0;
+ fsp->m_u.tcp_ip4_spec.pdst = 0xffff;
+ fsp->h_u.tcp_ip4_spec.tos = 0;
+ fsp->m_u.tcp_ip4_spec.tos = 0xff;
+
+ return 0;
+}
+
+static int mtk_hwlro_get_fdir_all(struct net_device *dev,
+ struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ int cnt = 0;
+ int i;
+
+ for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+ if (mac->hwlro_ip[i]) {
+ rule_locs[cnt] = i;
+ cnt++;
+ }
+ }
+
+ cmd->rule_cnt = cnt;
+
+ return 0;
+}
+
+static netdev_features_t mtk_fix_features(struct net_device *dev,
+ netdev_features_t features)
+{
+ if (!(features & NETIF_F_LRO)) {
+ struct mtk_mac *mac = netdev_priv(dev);
+ int ip_cnt = mtk_hwlro_get_ip_cnt(mac);
+
+ if (ip_cnt) {
+ netdev_info(dev, "RX flow is programmed, LRO should keep on\n");
+
+ features |= NETIF_F_LRO;
+ }
+ }
+
+ return features;
+}
+
+static int mtk_set_features(struct net_device *dev, netdev_features_t features)
+{
+ int err = 0;
+
+ if (!((dev->features ^ features) & NETIF_F_LRO))
+ return 0;
+
+ if (!(features & NETIF_F_LRO))
+ mtk_hwlro_netdev_disable(dev);
+
+ return err;
+}
+
/* wait for DMA to finish whatever it is doing before we start using it again */
static int mtk_dma_busy_wait(struct mtk_eth *eth)
{
static int mtk_dma_init(struct mtk_eth *eth)
{
int err;
+ u32 i;
if (mtk_dma_busy_wait(eth))
return -EBUSY;
if (err)
return err;
- err = mtk_rx_alloc(eth);
+ err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL);
if (err)
return err;
+ if (eth->hwlro) {
+ for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) {
+ err = mtk_rx_alloc(eth, i, MTK_RX_FLAGS_HWLRO);
+ if (err)
+ return err;
+ }
+ err = mtk_hwlro_rx_init(eth);
+ if (err)
+ return err;
+ }
+
/* Enable random early drop and set drop threshold automatically */
mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN,
MTK_QDMA_FC_THRES);
eth->phy_scratch_ring = 0;
}
mtk_tx_clean(eth);
- mtk_rx_clean(eth);
+ mtk_rx_clean(eth, 0);
+
+ if (eth->hwlro) {
+ mtk_hwlro_rx_uninit(eth);
+ for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
+ mtk_rx_clean(eth, i);
+ }
+
kfree(eth->scratch_head);
}
if (likely(napi_schedule_prep(ð->rx_napi))) {
__napi_schedule(ð->rx_napi);
- mtk_irq_disable(eth, MTK_RX_DONE_INT);
+ mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
}
return IRQ_HANDLED;
if (likely(napi_schedule_prep(ð->tx_napi))) {
__napi_schedule(ð->tx_napi);
- mtk_irq_disable(eth, MTK_TX_DONE_INT);
+ mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
}
return IRQ_HANDLED;
{
struct mtk_mac *mac = netdev_priv(dev);
struct mtk_eth *eth = mac->hw;
- u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT;
- mtk_irq_disable(eth, int_mask);
+ mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+ mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
mtk_handle_irq_rx(eth->irq[2], dev);
- mtk_irq_enable(eth, int_mask);
+ mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+ mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
}
#endif
}
mtk_w32(eth,
- MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
- MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
- MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
+ MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
+ MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO,
MTK_QDMA_GLO_CFG);
+ mtk_w32(eth,
+ MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+ MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
+ MTK_PDMA_GLO_CFG);
+
return 0;
}
napi_enable(ð->tx_napi);
napi_enable(ð->rx_napi);
- mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+ mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+ mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
}
atomic_inc(ð->dma_refcnt);
- phy_start(mac->phy_dev);
+ phy_start(dev->phydev);
netif_start_queue(dev);
return 0;
static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg)
{
- unsigned long flags;
u32 val;
int i;
/* stop the dma engine */
- spin_lock_irqsave(ð->page_lock, flags);
+ spin_lock_bh(ð->page_lock);
val = mtk_r32(eth, glo_cfg);
mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN),
glo_cfg);
- spin_unlock_irqrestore(ð->page_lock, flags);
+ spin_unlock_bh(ð->page_lock);
/* wait for dma stop */
for (i = 0; i < 10; i++) {
struct mtk_eth *eth = mac->hw;
netif_tx_disable(dev);
- phy_stop(mac->phy_dev);
+ phy_stop(dev->phydev);
/* only shutdown DMA if this is the last user */
if (!atomic_dec_and_test(ð->dma_refcnt))
return 0;
- mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+ mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+ mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
napi_disable(ð->tx_napi);
napi_disable(ð->rx_napi);
return 0;
}
-static int __init mtk_hw_init(struct mtk_eth *eth)
+static void ethsys_reset(struct mtk_eth *eth, u32 reset_bits)
{
- int err, i;
+ regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL,
+ reset_bits,
+ reset_bits);
+
+ usleep_range(1000, 1100);
+ regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL,
+ reset_bits,
+ ~reset_bits);
+ mdelay(10);
+}
+
+static int mtk_hw_init(struct mtk_eth *eth)
+{
+ int i, val;
+
+ if (test_and_set_bit(MTK_HW_INIT, ð->state))
+ return 0;
+
+ pm_runtime_enable(eth->dev);
+ pm_runtime_get_sync(eth->dev);
+
+ clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]);
+ clk_prepare_enable(eth->clks[MTK_CLK_ESW]);
+ clk_prepare_enable(eth->clks[MTK_CLK_GP1]);
+ clk_prepare_enable(eth->clks[MTK_CLK_GP2]);
+ ethsys_reset(eth, RSTCTRL_FE);
+ ethsys_reset(eth, RSTCTRL_PPE);
- /* reset the frame engine */
- reset_control_assert(eth->rstc);
- usleep_range(10, 20);
- reset_control_deassert(eth->rstc);
- usleep_range(10, 20);
+ regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
+ if (!eth->mac[i])
+ continue;
+ val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, eth->mac[i]->id);
+ val |= SYSCFG0_GE_MODE(eth->mac[i]->ge_mode, eth->mac[i]->id);
+ }
+ regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
/* Set GE2 driving and slew rate */
regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00);
/* Enable RX VLan Offloading */
mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
- err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
- dev_name(eth->dev), eth);
- if (err)
- return err;
- err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
- dev_name(eth->dev), eth);
- if (err)
- return err;
-
- err = mtk_mdio_init(eth);
- if (err)
- return err;
-
/* disable delay and normal interrupt */
mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
- mtk_irq_disable(eth, ~0);
+ mtk_w32(eth, 0, MTK_PDMA_DELAY_INT);
+ mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
+ mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
mtk_w32(eth, 0, MTK_RST_GL);
for (i = 0; i < 2; i++) {
u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
- /* setup the forward port to send frame to QDMA */
+ /* setup the forward port to send frame to PDMA */
val &= ~0xffff;
- val |= 0x5555;
/* Enable RX checksum */
val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN;
return 0;
}
+static int mtk_hw_deinit(struct mtk_eth *eth)
+{
+ if (!test_and_clear_bit(MTK_HW_INIT, ð->state))
+ return 0;
+
+ clk_disable_unprepare(eth->clks[MTK_CLK_GP2]);
+ clk_disable_unprepare(eth->clks[MTK_CLK_GP1]);
+ clk_disable_unprepare(eth->clks[MTK_CLK_ESW]);
+ clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]);
+
+ pm_runtime_put_sync(eth->dev);
+ pm_runtime_disable(eth->dev);
+
+ return 0;
+}
+
static int __init mtk_init(struct net_device *dev)
{
struct mtk_mac *mac = netdev_priv(dev);
dev->addr_assign_type = NET_ADDR_RANDOM;
}
- return mtk_phy_connect(mac);
+ return mtk_phy_connect(dev);
}
static void mtk_uninit(struct net_device *dev)
struct mtk_mac *mac = netdev_priv(dev);
struct mtk_eth *eth = mac->hw;
- phy_disconnect(mac->phy_dev);
- mtk_irq_disable(eth, ~0);
+ phy_disconnect(dev->phydev);
+ mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
+ mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
}
static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
- struct mtk_mac *mac = netdev_priv(dev);
-
switch (cmd) {
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- return phy_mii_ioctl(mac->phy_dev, ifr, cmd);
+ return phy_mii_ioctl(dev->phydev, ifr, cmd);
default:
break;
}
rtnl_lock();
+ dev_dbg(eth->dev, "[%s][%d] reset\n", __func__, __LINE__);
+
+ while (test_and_set_bit_lock(MTK_RESETTING, ð->state))
+ cpu_relax();
+
+ dev_dbg(eth->dev, "[%s][%d] mtk_stop starts\n", __func__, __LINE__);
/* stop all devices to make sure that dma is properly shut down */
for (i = 0; i < MTK_MAC_COUNT; i++) {
if (!eth->netdev[i])
mtk_stop(eth->netdev[i]);
__set_bit(i, &restart);
}
+ dev_dbg(eth->dev, "[%s][%d] mtk_stop ends\n", __func__, __LINE__);
+
+ /* restart underlying hardware such as power, clock, pin mux
+ * and the connected phy
+ */
+ mtk_hw_deinit(eth);
+
+ if (eth->dev->pins)
+ pinctrl_select_state(eth->dev->pins->p,
+ eth->dev->pins->default_state);
+ mtk_hw_init(eth);
+
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
+ if (!eth->mac[i] ||
+ of_phy_is_fixed_link(eth->mac[i]->of_node))
+ continue;
+ err = phy_init_hw(eth->netdev[i]->phydev);
+ if (err)
+ dev_err(eth->dev, "%s: PHY init failed.\n",
+ eth->netdev[i]->name);
+ }
/* restart DMA and enable IRQs */
for (i = 0; i < MTK_MAC_COUNT; i++) {
dev_close(eth->netdev[i]);
}
}
+
+ dev_dbg(eth->dev, "[%s][%d] reset done\n", __func__, __LINE__);
+
+ clear_bit_unlock(MTK_RESETTING, ð->state);
+
rtnl_unlock();
}
-static int mtk_cleanup(struct mtk_eth *eth)
+static int mtk_free_dev(struct mtk_eth *eth)
{
int i;
for (i = 0; i < MTK_MAC_COUNT; i++) {
if (!eth->netdev[i])
continue;
+ free_netdev(eth->netdev[i]);
+ }
+
+ return 0;
+}
+
+static int mtk_unreg_dev(struct mtk_eth *eth)
+{
+ int i;
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
+ if (!eth->netdev[i])
+ continue;
unregister_netdev(eth->netdev[i]);
- free_netdev(eth->netdev[i]);
}
+
+ return 0;
+}
+
+static int mtk_cleanup(struct mtk_eth *eth)
+{
+ mtk_unreg_dev(eth);
+ mtk_free_dev(eth);
cancel_work_sync(ð->pending_work);
return 0;
}
-static int mtk_get_settings(struct net_device *dev,
- struct ethtool_cmd *cmd)
+int mtk_get_link_ksettings(struct net_device *ndev,
+ struct ethtool_link_ksettings *cmd)
{
- struct mtk_mac *mac = netdev_priv(dev);
- int err;
+ struct mtk_mac *mac = netdev_priv(ndev);
- err = phy_read_status(mac->phy_dev);
- if (err)
- return -ENODEV;
+ if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ return -EBUSY;
- return phy_ethtool_gset(mac->phy_dev, cmd);
+ return phy_ethtool_ksettings_get(ndev->phydev, cmd);
}
-static int mtk_set_settings(struct net_device *dev,
- struct ethtool_cmd *cmd)
+int mtk_set_link_ksettings(struct net_device *ndev,
+ const struct ethtool_link_ksettings *cmd)
{
- struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_mac *mac = netdev_priv(ndev);
- if (cmd->phy_address != mac->phy_dev->mdio.addr) {
- mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus,
- cmd->phy_address);
- if (!mac->phy_dev)
- return -ENODEV;
- }
+ if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ return -EBUSY;
- return phy_ethtool_sset(mac->phy_dev, cmd);
+ return phy_ethtool_ksettings_set(ndev->phydev, cmd);
}
static void mtk_get_drvinfo(struct net_device *dev,
{
struct mtk_mac *mac = netdev_priv(dev);
- return genphy_restart_aneg(mac->phy_dev);
+ if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ return -EBUSY;
+
+ return genphy_restart_aneg(dev->phydev);
}
static u32 mtk_get_link(struct net_device *dev)
struct mtk_mac *mac = netdev_priv(dev);
int err;
- err = genphy_update_link(mac->phy_dev);
+ if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ return -EBUSY;
+
+ err = genphy_update_link(dev->phydev);
if (err)
return ethtool_op_get_link(dev);
- return mac->phy_dev->link;
+ return dev->phydev->link;
}
static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data)
unsigned int start;
int i;
+ if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ return;
+
if (netif_running(dev) && netif_device_present(dev)) {
if (spin_trylock(&hwstats->stats_lock)) {
mtk_stats_update_mac(mac);
}
}
+ data_src = (u64 *)hwstats;
+
do {
- data_src = (u64*)hwstats;
data_dst = data;
start = u64_stats_fetch_begin_irq(&hwstats->syncp);
} while (u64_stats_fetch_retry_irq(&hwstats->syncp, start));
}
-static struct ethtool_ops mtk_ethtool_ops = {
- .get_settings = mtk_get_settings,
- .set_settings = mtk_set_settings,
+static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ int ret = -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ if (dev->features & NETIF_F_LRO) {
+ cmd->data = MTK_MAX_RX_RING_NUM;
+ ret = 0;
+ }
+ break;
+ case ETHTOOL_GRXCLSRLCNT:
+ if (dev->features & NETIF_F_LRO) {
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ cmd->rule_cnt = mac->hwlro_ip_cnt;
+ ret = 0;
+ }
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ if (dev->features & NETIF_F_LRO)
+ ret = mtk_hwlro_get_fdir_entry(dev, cmd);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ if (dev->features & NETIF_F_LRO)
+ ret = mtk_hwlro_get_fdir_all(dev, cmd,
+ rule_locs);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ int ret = -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ if (dev->features & NETIF_F_LRO)
+ ret = mtk_hwlro_add_ipaddr(dev, cmd);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ if (dev->features & NETIF_F_LRO)
+ ret = mtk_hwlro_del_ipaddr(dev, cmd);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static const struct ethtool_ops mtk_ethtool_ops = {
+ .get_link_ksettings = mtk_get_link_ksettings,
+ .set_link_ksettings = mtk_set_link_ksettings,
.get_drvinfo = mtk_get_drvinfo,
.get_msglevel = mtk_get_msglevel,
.set_msglevel = mtk_set_msglevel,
.get_strings = mtk_get_strings,
.get_sset_count = mtk_get_sset_count,
.get_ethtool_stats = mtk_get_ethtool_stats,
+ .get_rxnfc = mtk_get_rxnfc,
+ .set_rxnfc = mtk_set_rxnfc,
};
static const struct net_device_ops mtk_netdev_ops = {
.ndo_change_mtu = eth_change_mtu,
.ndo_tx_timeout = mtk_tx_timeout,
.ndo_get_stats64 = mtk_get_stats64,
+ .ndo_fix_features = mtk_fix_features,
+ .ndo_set_features = mtk_set_features,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = mtk_poll_controller,
#endif
mac->hw = eth;
mac->of_node = np;
+ memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip));
+ mac->hwlro_ip_cnt = 0;
+
mac->hw_stats = devm_kzalloc(eth->dev,
sizeof(*mac->hw_stats),
GFP_KERNEL);
eth->netdev[id]->watchdog_timeo = 5 * HZ;
eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
eth->netdev[id]->base_addr = (unsigned long)eth->base;
+
+ eth->netdev[id]->hw_features = MTK_HW_FEATURES;
+ if (eth->hwlro)
+ eth->netdev[id]->hw_features |= NETIF_F_LRO;
+
eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
eth->netdev[id]->features |= MTK_HW_FEATURES;
eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops;
- err = register_netdev(eth->netdev[id]);
- if (err) {
- dev_err(eth->dev, "error bringing up device\n");
- goto free_netdev;
- }
eth->netdev[id]->irq = eth->irq[0];
- netif_info(eth, probe, eth->netdev[id],
- "mediatek frame engine at 0x%08lx, irq %d\n",
- eth->netdev[id]->base_addr, eth->irq[0]);
-
return 0;
free_netdev:
return PTR_ERR(eth->pctl);
}
- eth->rstc = devm_reset_control_get(&pdev->dev, "eth");
- if (IS_ERR(eth->rstc)) {
- dev_err(&pdev->dev, "no eth reset found\n");
- return PTR_ERR(eth->rstc);
- }
+ eth->hwlro = of_property_read_bool(pdev->dev.of_node, "mediatek,hwlro");
for (i = 0; i < 3; i++) {
eth->irq[i] = platform_get_irq(pdev, i);
}
}
- clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]);
- clk_prepare_enable(eth->clks[MTK_CLK_ESW]);
- clk_prepare_enable(eth->clks[MTK_CLK_GP1]);
- clk_prepare_enable(eth->clks[MTK_CLK_GP2]);
-
eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE);
INIT_WORK(ð->pending_work, mtk_pending_work);
err = mtk_add_mac(eth, mac_np);
if (err)
- goto err_free_dev;
+ goto err_deinit_hw;
+ }
+
+ err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
+ dev_name(eth->dev), eth);
+ if (err)
+ goto err_free_dev;
+
+ err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
+ dev_name(eth->dev), eth);
+ if (err)
+ goto err_free_dev;
+
+ err = mtk_mdio_init(eth);
+ if (err)
+ goto err_free_dev;
+
+ for (i = 0; i < MTK_MAX_DEVS; i++) {
+ if (!eth->netdev[i])
+ continue;
+
+ err = register_netdev(eth->netdev[i]);
+ if (err) {
+ dev_err(eth->dev, "error bringing up device\n");
+ goto err_deinit_mdio;
+ } else
+ netif_info(eth, probe, eth->netdev[i],
+ "mediatek frame engine at 0x%08lx, irq %d\n",
+ eth->netdev[i]->base_addr, eth->irq[0]);
}
/* we run 2 devices on the same DMA ring so we need a dummy device
return 0;
+err_deinit_mdio:
+ mtk_mdio_cleanup(eth);
err_free_dev:
- mtk_cleanup(eth);
+ mtk_free_dev(eth);
+err_deinit_hw:
+ mtk_hw_deinit(eth);
+
return err;
}
mtk_stop(eth->netdev[i]);
}
- clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]);
- clk_disable_unprepare(eth->clks[MTK_CLK_ESW]);
- clk_disable_unprepare(eth->clks[MTK_CLK_GP1]);
- clk_disable_unprepare(eth->clks[MTK_CLK_GP2]);
+ mtk_hw_deinit(eth);
netif_napi_del(ð->tx_napi);
netif_napi_del(ð->rx_napi);
mtk_cleanup(eth);
mtk_mdio_cleanup(eth);
- platform_set_drvdata(pdev, NULL);
return 0;
}
{ .compatible = "mediatek,mt7623-eth" },
{},
};
+ MODULE_DEVICE_TABLE(of, of_mtk_match);
static struct platform_driver mtk_driver = {
.probe = mtk_probe,
MC_ADDR_CHANGE | \
PROMISC_CHANGE)
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
-
static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
u32 events_mask)
{
- int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)];
- int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
+ int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
void *nic_vport_ctx;
- int err;
-
- memset(out, 0, sizeof(out));
- memset(in, 0, sizeof(in));
MLX5_SET(modify_nic_vport_context_in, in,
opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
MLX5_SET(nic_vport_context, nic_vport_ctx,
event_on_promisc_change, 1);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (err)
- goto ex;
- err = mlx5_cmd_status_to_err_v2(out);
- if (err)
- goto ex;
- return 0;
-ex:
- return err;
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
/* E-Switch vport context HW commands */
-static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport,
- u32 *out, int outlen)
-{
- u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)];
-
- memset(in, 0, sizeof(in));
-
- MLX5_SET(query_nic_vport_context_in, in, opcode,
- MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
-
- MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
- if (vport)
- MLX5_SET(query_esw_vport_context_in, in, other_vport, 1);
-
- return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
-}
-
-static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
- u16 *vlan, u8 *qos)
-{
- u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)];
- int err;
- bool cvlan_strip;
- bool cvlan_insert;
-
- memset(out, 0, sizeof(out));
-
- *vlan = 0;
- *qos = 0;
-
- if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
- !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
- return -ENOTSUPP;
-
- err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out));
- if (err)
- goto out;
-
- cvlan_strip = MLX5_GET(query_esw_vport_context_out, out,
- esw_vport_context.vport_cvlan_strip);
-
- cvlan_insert = MLX5_GET(query_esw_vport_context_out, out,
- esw_vport_context.vport_cvlan_insert);
-
- if (cvlan_strip || cvlan_insert) {
- *vlan = MLX5_GET(query_esw_vport_context_out, out,
- esw_vport_context.cvlan_id);
- *qos = MLX5_GET(query_esw_vport_context_out, out,
- esw_vport_context.cvlan_pcp);
- }
-
- esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n",
- vport, *vlan, *qos);
-out:
- return err;
-}
-
static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
void *in, int inlen)
{
- u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)];
-
- memset(out, 0, sizeof(out));
+ u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0};
+ MLX5_SET(modify_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
if (vport)
MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
-
- MLX5_SET(modify_esw_vport_context_in, in, opcode,
- MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
-
- return mlx5_cmd_exec_check_status(dev, in, inlen,
- out, sizeof(out));
+ return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
u16 vlan, u8 qos, bool set)
{
- u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)];
-
- memset(in, 0, sizeof(in));
+ u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
!MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n",
vport, vlan, qos, set);
-
if (set) {
MLX5_SET(modify_esw_vport_context_in, in,
esw_vport_context.vport_cvlan_strip, 1);
static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
u8 *mac, u8 vlan_valid, u16 vlan)
{
- u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)];
- u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)];
+ u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
u8 *in_mac_addr;
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
-
MLX5_SET(set_l2_table_entry_in, in, opcode,
MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
MLX5_SET(set_l2_table_entry_in, in, table_index, index);
in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
ether_addr_copy(&in_mac_addr[2], mac);
- return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
- out, sizeof(out));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
{
- u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)];
- u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)];
-
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
+ u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
MLX5_SET(delete_l2_table_entry_in, in, opcode,
MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
- return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
- out, sizeof(out));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix)
spec = mlx5_vzalloc(sizeof(*spec));
if (!spec) {
- pr_warn("FDB: Failed to alloc match parameters\n");
+ esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
return NULL;
}
dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
0, &dest);
if (IS_ERR(flow_rule)) {
- pr_warn(
- "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
+ esw_warn(esw->dev,
+ "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
flow_rule = NULL;
}
esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n",
vport_num, promisc_all, promisc_mc);
- if (!vport->trusted || !vport->enabled) {
+ if (!vport->info.trusted || !vport->enabled) {
promisc_uc = 0;
promisc_mc = 0;
promisc_all = 0;
struct mlx5_vport *vport)
{
struct mlx5_flow_spec *spec;
- u8 smac[ETH_ALEN];
int err = 0;
u8 *smac_v;
- if (vport->spoofchk) {
- err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac);
- if (err) {
- esw_warn(esw->dev,
- "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n",
- vport->vport, err);
- return err;
- }
+ if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
+ mlx5_core_warn(esw->dev,
+ "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
+ vport->vport);
+ return -EPERM;
- if (!is_valid_ether_addr(smac)) {
- mlx5_core_warn(esw->dev,
- "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
- vport->vport);
- return -EPERM;
- }
}
esw_vport_cleanup_ingress_rules(esw, vport);
- if (!vport->vlan && !vport->qos && !vport->spoofchk) {
+ if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
esw_vport_disable_ingress_acl(esw, vport);
return 0;
}
esw_debug(esw->dev,
"vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
- vport->vport, vport->vlan, vport->qos);
+ vport->vport, vport->info.vlan, vport->info.qos);
spec = mlx5_vzalloc(sizeof(*spec));
if (!spec) {
goto out;
}
- if (vport->vlan || vport->qos)
+ if (vport->info.vlan || vport->info.qos)
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
- if (vport->spoofchk) {
+ if (vport->info.spoofchk) {
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
smac_v = MLX5_ADDR_OF(fte_match_param,
spec->match_value,
outer_headers.smac_47_16);
- ether_addr_copy(smac_v, smac);
+ ether_addr_copy(smac_v, vport->info.mac);
}
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
0, NULL);
if (IS_ERR(vport->ingress.allow_rule)) {
err = PTR_ERR(vport->ingress.allow_rule);
- pr_warn("vport[%d] configure ingress allow rule, err(%d)\n",
- vport->vport, err);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress allow rule, err(%d)\n",
+ vport->vport, err);
vport->ingress.allow_rule = NULL;
goto out;
}
0, NULL);
if (IS_ERR(vport->ingress.drop_rule)) {
err = PTR_ERR(vport->ingress.drop_rule);
- pr_warn("vport[%d] configure ingress drop rule, err(%d)\n",
- vport->vport, err);
+ esw_warn(esw->dev,
+ "vport[%d] configure ingress drop rule, err(%d)\n",
+ vport->vport, err);
vport->ingress.drop_rule = NULL;
goto out;
}
esw_vport_cleanup_egress_rules(esw, vport);
- if (!vport->vlan && !vport->qos) {
+ if (!vport->info.vlan && !vport->info.qos) {
esw_vport_disable_egress_acl(esw, vport);
return 0;
}
esw_debug(esw->dev,
"vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
- vport->vport, vport->vlan, vport->qos);
+ vport->vport, vport->info.vlan, vport->info.qos);
spec = mlx5_vzalloc(sizeof(*spec));
if (!spec) {
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag);
MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
vport->egress.allowed_vlan =
0, NULL);
if (IS_ERR(vport->egress.allowed_vlan)) {
err = PTR_ERR(vport->egress.allowed_vlan);
- pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
- vport->vport, err);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
+ vport->vport, err);
vport->egress.allowed_vlan = NULL;
goto out;
}
0, NULL);
if (IS_ERR(vport->egress.drop_rule)) {
err = PTR_ERR(vport->egress.drop_rule);
- pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n",
- vport->vport, err);
+ esw_warn(esw->dev,
+ "vport[%d] configure egress drop rule failed, err(%d)\n",
+ vport->vport, err);
vport->egress.drop_rule = NULL;
}
out:
return err;
}
+static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
+{
+ ((u8 *)node_guid)[7] = mac[0];
+ ((u8 *)node_guid)[6] = mac[1];
+ ((u8 *)node_guid)[5] = mac[2];
+ ((u8 *)node_guid)[4] = 0xff;
+ ((u8 *)node_guid)[3] = 0xfe;
+ ((u8 *)node_guid)[2] = mac[3];
+ ((u8 *)node_guid)[1] = mac[4];
+ ((u8 *)node_guid)[0] = mac[5];
+}
+
+static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport)
+{
+ int vport_num = vport->vport;
+
+ if (!vport_num)
+ return;
+
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ vport_num,
+ vport->info.link_state);
+ mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
+ mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+ modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
+ (vport->info.vlan || vport->info.qos));
+
+ /* Only legacy mode needs ACLs */
+ if (esw->mode == SRIOV_LEGACY) {
+ esw_vport_ingress_config(esw, vport);
+ esw_vport_egress_config(esw, vport);
+ }
+}
static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
int enable_events)
{
esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
- /* Only VFs need ACLs for VST and spoofchk filtering */
- if (vport_num && esw->mode == SRIOV_LEGACY) {
- esw_vport_ingress_config(esw, vport);
- esw_vport_egress_config(esw, vport);
- }
-
- mlx5_modify_vport_admin_state(esw->dev,
- MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
- vport_num,
- MLX5_ESW_VPORT_ADMIN_STATE_AUTO);
+ /* Restore old vport configuration */
+ esw_apply_vport_conf(esw, vport);
/* Sync with current vport context */
vport->enabled_events = enable_events;
vport->enabled = true;
/* only PF is trusted by default */
- vport->trusted = (vport_num) ? false : true;
+ if (!vport_num)
+ vport->info.trusted = true;
+
esw_vport_change_handle_locked(vport);
esw->enabled_vports++;
vport->enabled = false;
synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC));
-
- mlx5_modify_vport_admin_state(esw->dev,
- MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
- vport_num,
- MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
/* Wait for current already scheduled events to complete */
flush_workqueue(esw->work_queue);
/* Disable events from this vport */
*/
esw_vport_change_handle_locked(vport);
vport->enabled_events = 0;
+
if (vport_num && esw->mode == SRIOV_LEGACY) {
+ mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ vport_num,
+ MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
esw_vport_disable_egress_acl(esw, vport);
esw_vport_disable_ingress_acl(esw, vport);
}
abort:
esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+ esw->mode = SRIOV_NONE;
return err;
}
esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
}
+void mlx5_eswitch_attach(struct mlx5_eswitch *esw)
+{
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+ MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return;
+
+ esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+ /* VF Vports will be enabled when SRIOV is enabled */
+}
+
+void mlx5_eswitch_detach(struct mlx5_eswitch *esw)
+{
+ if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+ MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ return;
+
+ esw_disable_vport(esw, 0);
+}
+
int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
struct mlx5_vport *vport = &esw->vports[vport_num];
vport->vport = vport_num;
+ vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
vport->dev = dev;
INIT_WORK(&vport->vport_change_handler,
esw_vport_change_handler);
esw->mode = SRIOV_NONE;
dev->priv.eswitch = esw;
- esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
- /* VF Vports will be enabled when SRIOV is enabled */
return 0;
abort:
if (esw->work_queue)
return;
esw_info(esw->dev, "cleanup\n");
- esw_disable_vport(esw, 0);
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
(esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
-static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
-{
- ((u8 *)node_guid)[7] = mac[0];
- ((u8 *)node_guid)[6] = mac[1];
- ((u8 *)node_guid)[5] = mac[2];
- ((u8 *)node_guid)[4] = 0xff;
- ((u8 *)node_guid)[3] = 0xfe;
- ((u8 *)node_guid)[2] = mac[3];
- ((u8 *)node_guid)[1] = mac[4];
- ((u8 *)node_guid)[0] = mac[5];
-}
-
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
int vport, u8 mac[ETH_ALEN])
{
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
+ mutex_lock(&esw->state_lock);
evport = &esw->vports[vport];
- if (evport->spoofchk && !is_valid_ether_addr(mac)) {
+ if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {
mlx5_core_warn(esw->dev,
"MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",
vport);
- return -EPERM;
+ err = -EPERM;
+ goto unlock;
}
err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
mlx5_core_warn(esw->dev,
"Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
vport, err);
- return err;
+ goto unlock;
}
node_guid_gen_from_mac(&node_guid, mac);
"Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
vport, err);
- mutex_lock(&esw->state_lock);
+ ether_addr_copy(evport->info.mac, mac);
+ evport->info.node_guid = node_guid;
if (evport->enabled && esw->mode == SRIOV_LEGACY)
err = esw_vport_ingress_config(esw, evport);
+
+unlock:
mutex_unlock(&esw->state_lock);
return err;
}
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
int vport, int link_state)
{
+ struct mlx5_vport *evport;
+ int err = 0;
+
if (!ESW_ALLOWED(esw))
return -EPERM;
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
- return mlx5_modify_vport_admin_state(esw->dev,
- MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
- vport, link_state);
+ mutex_lock(&esw->state_lock);
+ evport = &esw->vports[vport];
+
+ err = mlx5_modify_vport_admin_state(esw->dev,
+ MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ vport, link_state);
+ if (err) {
+ mlx5_core_warn(esw->dev,
+ "Failed to set vport %d link state, err = %d",
+ vport, err);
+ goto unlock;
+ }
+
+ evport->info.link_state = link_state;
+
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return 0;
}
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
int vport, struct ifla_vf_info *ivi)
{
struct mlx5_vport *evport;
- u16 vlan;
- u8 qos;
if (!ESW_ALLOWED(esw))
return -EPERM;
memset(ivi, 0, sizeof(*ivi));
ivi->vf = vport - 1;
- mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac);
- ivi->linkstate = mlx5_query_vport_admin_state(esw->dev,
- MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
- vport);
- query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos);
- ivi->vlan = vlan;
- ivi->qos = qos;
- ivi->spoofchk = evport->spoofchk;
+ mutex_lock(&esw->state_lock);
+ ether_addr_copy(ivi->mac, evport->info.mac);
+ ivi->linkstate = evport->info.link_state;
+ ivi->vlan = evport->info.vlan;
+ ivi->qos = evport->info.qos;
+ ivi->spoofchk = evport->info.spoofchk;
+ ivi->trusted = evport->info.trusted;
+ mutex_unlock(&esw->state_lock);
return 0;
}
if (vlan || qos)
set = 1;
+ mutex_lock(&esw->state_lock);
evport = &esw->vports[vport];
err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
if (err)
- return err;
+ goto unlock;
- mutex_lock(&esw->state_lock);
- evport->vlan = vlan;
- evport->qos = qos;
+ evport->info.vlan = vlan;
+ evport->info.qos = qos;
if (evport->enabled && esw->mode == SRIOV_LEGACY) {
err = esw_vport_ingress_config(esw, evport);
if (err)
- goto out;
+ goto unlock;
err = esw_vport_egress_config(esw, evport);
}
-out:
+unlock:
mutex_unlock(&esw->state_lock);
return err;
}
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
- evport = &esw->vports[vport];
-
mutex_lock(&esw->state_lock);
- pschk = evport->spoofchk;
- evport->spoofchk = spoofchk;
- if (evport->enabled && esw->mode == SRIOV_LEGACY) {
+ evport = &esw->vports[vport];
+ pschk = evport->info.spoofchk;
+ evport->info.spoofchk = spoofchk;
+ if (evport->enabled && esw->mode == SRIOV_LEGACY)
err = esw_vport_ingress_config(esw, evport);
- if (err)
- evport->spoofchk = pschk;
- }
+ if (err)
+ evport->info.spoofchk = pschk;
mutex_unlock(&esw->state_lock);
return err;
if (!LEGAL_VPORT(esw, vport))
return -EINVAL;
- evport = &esw->vports[vport];
-
mutex_lock(&esw->state_lock);
- evport->trusted = setting;
+ evport = &esw->vports[vport];
+ evport->info.trusted = setting;
if (evport->enabled)
esw_vport_change_handle_locked(evport);
mutex_unlock(&esw->state_lock);
struct ifla_vf_stats *vf_stats)
{
int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
- u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+ u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
int err = 0;
u32 *out;
if (!out)
return -ENOMEM;
- memset(in, 0, sizeof(in));
-
MLX5_SET(query_vport_counter_in, in, opcode,
MLX5_CMD_OP_QUERY_VPORT_COUNTER);
MLX5_SET(query_vport_counter_in, in, op_mod, 0);
int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft)
{
- u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)];
- u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)];
-
- memset(in, 0, sizeof(in));
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
MLX5_SET(set_flow_table_root_in, in, opcode,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
}
- memset(out, 0, sizeof(out));
- return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- sizeof(out));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
u16 vport,
+ enum fs_flow_table_op_mod op_mod,
enum fs_flow_table_type type, unsigned int level,
unsigned int log_size, struct mlx5_flow_table
*next_ft, unsigned int *table_id)
{
- u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
- u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
+ u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0};
int err;
- memset(in, 0, sizeof(in));
-
MLX5_SET(create_flow_table_in, in, opcode,
MLX5_CMD_OP_CREATE_FLOW_TABLE);
- if (next_ft) {
- MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
- MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
- }
MLX5_SET(create_flow_table_in, in, table_type, type);
MLX5_SET(create_flow_table_in, in, level, level);
MLX5_SET(create_flow_table_in, in, log_size, log_size);
MLX5_SET(create_flow_table_in, in, other_vport, 1);
}
- memset(out, 0, sizeof(out));
- err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- sizeof(out));
+ switch (op_mod) {
+ case FS_FT_OP_MOD_NORMAL:
+ if (next_ft) {
+ MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
+ MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
+ }
+ break;
+
+ case FS_FT_OP_MOD_LAG_DEMUX:
+ MLX5_SET(create_flow_table_in, in, op_mod, 0x1);
+ if (next_ft)
+ MLX5_SET(create_flow_table_in, in, lag_master_next_table_id,
+ next_ft->id);
+ break;
+ }
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (!err)
*table_id = MLX5_GET(create_flow_table_out, out,
table_id);
int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft)
{
- u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)];
- u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)];
-
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
MLX5_SET(destroy_flow_table_in, in, opcode,
MLX5_CMD_OP_DESTROY_FLOW_TABLE);
MLX5_SET(destroy_flow_table_in, in, other_vport, 1);
}
- return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- sizeof(out));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
struct mlx5_flow_table *next_ft)
{
- u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)];
- u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)];
-
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
+ u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
MLX5_SET(modify_flow_table_in, in, opcode,
MLX5_CMD_OP_MODIFY_FLOW_TABLE);
MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
- if (ft->vport) {
- MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport);
- MLX5_SET(modify_flow_table_in, in, other_vport, 1);
- }
- MLX5_SET(modify_flow_table_in, in, modify_field_select,
- MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
- if (next_ft) {
- MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
- MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id);
+
+ if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) {
+ MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID);
+ if (next_ft) {
+ MLX5_SET(modify_flow_table_in, in,
+ lag_master_next_table_id, next_ft->id);
+ } else {
+ MLX5_SET(modify_flow_table_in, in,
+ lag_master_next_table_id, 0);
+ }
} else {
- MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+ if (ft->vport) {
+ MLX5_SET(modify_flow_table_in, in, vport_number,
+ ft->vport);
+ MLX5_SET(modify_flow_table_in, in, other_vport, 1);
+ }
+ MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
+ if (next_ft) {
+ MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
+ MLX5_SET(modify_flow_table_in, in, table_miss_id,
+ next_ft->id);
+ } else {
+ MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+ }
}
- return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- sizeof(out));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
u32 *in,
unsigned int *group_id)
{
+ u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- u32 out[MLX5_ST_SZ_DW(create_flow_group_out)];
int err;
- memset(out, 0, sizeof(out));
-
MLX5_SET(create_flow_group_in, in, opcode,
MLX5_CMD_OP_CREATE_FLOW_GROUP);
MLX5_SET(create_flow_group_in, in, table_type, ft->type);
MLX5_SET(create_flow_group_in, in, other_vport, 1);
}
- err = mlx5_cmd_exec_check_status(dev, in,
- inlen, out,
- sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
if (!err)
*group_id = MLX5_GET(create_flow_group_out, out,
group_id);
-
return err;
}
struct mlx5_flow_table *ft,
unsigned int group_id)
{
- u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)];
- u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)];
-
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
+ u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {0};
MLX5_SET(destroy_flow_group_in, in, opcode,
MLX5_CMD_OP_DESTROY_FLOW_GROUP);
MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
}
- return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- sizeof(out));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
{
unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
- u32 out[MLX5_ST_SZ_DW(set_fte_out)];
+ u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
struct mlx5_flow_rule *dst;
void *in_flow_context;
void *in_match_value;
list_size);
}
- memset(out, 0, sizeof(out));
- err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
- sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
kvfree(in);
-
return err;
}
unsigned group_id,
struct fs_fte *fte)
{
- return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
+ return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
}
int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
unsigned int index)
{
- u32 out[MLX5_ST_SZ_DW(delete_fte_out)];
- u32 in[MLX5_ST_SZ_DW(delete_fte_in)];
- int err;
-
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
+ u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {0};
MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
MLX5_SET(delete_fte_in, in, table_type, ft->type);
MLX5_SET(delete_fte_in, in, other_vport, 1);
}
- err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
-
- return err;
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id)
{
- u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)];
- u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)];
+ u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
int err;
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
-
MLX5_SET(alloc_flow_counter_in, in, opcode,
MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
- err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- sizeof(out));
- if (err)
- return err;
-
- *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
-
- return 0;
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+ return err;
}
int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id)
{
- u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)];
- u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)];
-
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
+ u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0};
MLX5_SET(dealloc_flow_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
-
- return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- sizeof(out));
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
u64 *packets, u64 *bytes)
{
u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
- MLX5_ST_SZ_BYTES(traffic_counter)];
- u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
+ MLX5_ST_SZ_BYTES(traffic_counter)] = {0};
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
void *stats;
int err = 0;
- memset(in, 0, sizeof(in));
- memset(out, 0, sizeof(out));
-
MLX5_SET(query_flow_counter_in, in, opcode,
MLX5_CMD_OP_QUERY_FLOW_COUNTER);
MLX5_SET(query_flow_counter_in, in, op_mod, 0);
MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
-
- err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
*packets = MLX5_GET64(traffic_counter, stats, packets);
*bytes = MLX5_GET64(traffic_counter, stats, octets);
-
return 0;
}
mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
{
struct mlx5_cmd_fc_bulk *b;
- int outlen = sizeof(*b) +
+ int outlen =
MLX5_ST_SZ_BYTES(query_flow_counter_out) +
MLX5_ST_SZ_BYTES(traffic_counter) * num;
- b = kzalloc(outlen, GFP_KERNEL);
+ b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
if (!b)
return NULL;
int
mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
{
- u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
-
- memset(in, 0, sizeof(in));
+ u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
MLX5_SET(query_flow_counter_in, in, opcode,
MLX5_CMD_OP_QUERY_FLOW_COUNTER);
MLX5_SET(query_flow_counter_in, in, op_mod, 0);
MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
-
- return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
- b->out, b->outlen);
+ return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen);
}
void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
*packets = MLX5_GET64(traffic_counter, stats, packets);
*bytes = MLX5_GET64(traffic_counter, stats, octets);
}
+
+#define MAX_ENCAP_SIZE (128)
+
+int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+ int header_type,
+ size_t size,
+ void *encap_header,
+ u32 *encap_id)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
+ u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) +
+ (MAX_ENCAP_SIZE / sizeof(u32))];
+ void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in,
+ encap_header);
+ void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in,
+ encap_header);
+ int inlen = header - (void *)in + size;
+ int err;
+
+ if (size > MAX_ENCAP_SIZE)
+ return -EINVAL;
+
+ memset(in, 0, inlen);
+ MLX5_SET(alloc_encap_header_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
+ MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
+ MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
+ memcpy(header, encap_header, size);
+
+ memset(out, 0, sizeof(out));
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+ *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+ return err;
+}
+
+void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
+ u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+
+ memset(in, 0, sizeof(in));
+ MLX5_SET(dealloc_encap_header_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+ MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
#include <linux/ktime.h>
+#include <net/pkt_cls.h>
#include <net/vxlan.h>
#include "nfp_net_ctrl.h"
}
}
-/**
- * nfp_net_set_hash() - Set SKB hash data
- * @netdev: adapter's net_device structure
- * @skb: SKB to set the hash data on
- * @rxd: RX descriptor
- *
- * The RSS hash and hash-type are pre-pended to the packet data.
- * Extract and decode it and set the skb fields.
- */
static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
- struct nfp_net_rx_desc *rxd)
+ unsigned int type, __be32 *hash)
{
- struct nfp_net_rx_hash *rx_hash;
-
- if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) ||
- !(netdev->features & NETIF_F_RXHASH))
+ if (!(netdev->features & NETIF_F_RXHASH))
return;
- rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
-
- switch (be32_to_cpu(rx_hash->hash_type)) {
+ switch (type) {
case NFP_NET_RSS_IPV4:
case NFP_NET_RSS_IPV6:
case NFP_NET_RSS_IPV6_EX:
- skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3);
+ skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3);
break;
default:
- skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4);
+ skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4);
break;
}
}
+static void
+nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
+ struct nfp_net_rx_desc *rxd)
+{
+ struct nfp_net_rx_hash *rx_hash;
+
+ if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
+ return;
+
+ rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
+
+ nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
+ &rx_hash->hash);
+}
+
+static void *
+nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
+ int meta_len)
+{
+ u8 *data = skb->data - meta_len;
+ u32 meta_info;
+
+ meta_info = get_unaligned_be32(data);
+ data += 4;
+
+ while (meta_info) {
+ switch (meta_info & NFP_NET_META_FIELD_MASK) {
+ case NFP_NET_META_HASH:
+ meta_info >>= NFP_NET_META_FIELD_SIZE;
+ nfp_net_set_hash(netdev, skb,
+ meta_info & NFP_NET_META_FIELD_MASK,
+ (__be32 *)data);
+ data += 4;
+ break;
+ case NFP_NET_META_MARK:
+ skb->mark = get_unaligned_be32(data);
+ data += 4;
+ break;
+ default:
+ return NULL;
+ }
+
+ meta_info >>= NFP_NET_META_FIELD_SIZE;
+ }
+
+ return data;
+}
+
/**
* nfp_net_rx() - receive up to @budget packets on @rx_ring
* @rx_ring: RX ring to receive from
skb_reserve(skb, nn->rx_offset);
skb_put(skb, data_len - meta_len);
- nfp_net_set_hash(nn->netdev, skb, rxd);
-
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_pkts++;
r_vec->rx_bytes += skb->len;
u64_stats_update_end(&r_vec->rx_sync);
+ if (nn->fw_ver.major <= 3) {
+ nfp_net_set_hash_desc(nn->netdev, skb, rxd);
+ } else if (meta_len) {
+ void *end;
+
+ end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
+ if (unlikely(end != skb->data)) {
+ u64_stats_update_begin(&r_vec->rx_sync);
+ r_vec->rx_drops++;
+ u64_stats_update_end(&r_vec->rx_sync);
+
+ dev_kfree_skb_any(skb);
+ nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
+ continue;
+ }
+ }
+
skb_record_rx_queue(skb, rx_ring->idx);
skb->protocol = eth_type_trans(skb, nn->netdev);
nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
GFP_KERNEL);
- if (!nn->rx_rings)
+ if (!nn->rx_rings) {
+ err = -ENOMEM;
goto err_free_lsc;
+ }
nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings),
GFP_KERNEL);
- if (!nn->tx_rings)
+ if (!nn->tx_rings) {
+ err = -ENOMEM;
goto err_free_rx_rings;
+ }
for (r = 0; r < nn->num_r_vecs; r++) {
err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
return stats;
}
+static bool nfp_net_ebpf_capable(struct nfp_net *nn)
+{
+ if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
+ nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
+ return true;
+ return false;
+}
+
+static int
+nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+ struct tc_to_netdev *tc)
+{
+ struct nfp_net *nn = netdev_priv(netdev);
+
+ if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+ return -ENOTSUPP;
+ if (proto != htons(ETH_P_ALL))
+ return -ENOTSUPP;
+
+ if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
+ return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
+
+ return -EINVAL;
+}
+
static int nfp_net_set_features(struct net_device *netdev,
netdev_features_t features)
{
new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
}
+ if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+ nn_err(nn, "Cannot disable HW TC offload while in use\n");
+ return -EBUSY;
+ }
+
nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
netdev->features, features, changed);
.ndo_stop = nfp_net_netdev_close,
.ndo_start_xmit = nfp_net_tx,
.ndo_get_stats64 = nfp_net_stat64,
+ .ndo_setup_tc = nfp_net_setup_tc,
.ndo_tx_timeout = nfp_net_tx_timeout,
.ndo_set_rx_mode = nfp_net_set_rx_mode,
.ndo_change_mtu = nfp_net_change_mtu,
nn->fw_ver.resv, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor,
nn->max_mtu);
- nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn->cap,
nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "",
nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "",
nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "",
- nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "");
+ nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "",
+ nfp_net_ebpf_capable(nn) ? "BPF " : "");
}
/**
nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
spin_lock_init(&nn->reconfig_lock);
+ spin_lock_init(&nn->rx_filter_lock);
spin_lock_init(&nn->link_status_lock);
setup_timer(&nn->reconfig_timer,
nfp_net_reconfig_timer, (unsigned long)nn);
+ setup_timer(&nn->rx_filter_stats_timer,
+ nfp_net_filter_stats_timer, (unsigned long)nn);
return nn;
}
netdev->features = netdev->hw_features;
+ if (nfp_net_ebpf_capable(nn))
+ netdev->hw_features |= NETIF_F_HW_TC;
+
/* Advertise but disable TSO by default. */
netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
return true;
}
-void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt)
+void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
PUBLIC_PORT);
p_hwfn->mcp_info->port_addr, MFW_PORT(p_hwfn));
}
-void qed_mcp_read_mb(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt)
+void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
u32 length = MFW_DRV_MSG_MAX_DWORDS(p_hwfn->mcp_info->mfw_mb_length);
u32 tmp, i;
return 0;
}
-static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt)
+static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_mcp_info *p_info = p_hwfn->mcp_info;
u32 drv_mb_offsize, mfw_mb_offsize;
return 0;
}
-int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt)
+int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_mcp_info *p_info;
u32 size;
size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32);
p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL);
- p_info->mfw_mb_shadow =
- kzalloc(sizeof(u32) * MFW_DRV_MSG_MAX_DWORDS(
- p_info->mfw_mb_length), GFP_KERNEL);
+ p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL);
if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
goto err;
return 0;
err:
- DP_NOTICE(p_hwfn, "Failed to allocate mcp memory\n");
qed_mcp_free(p_hwfn);
return -ENOMEM;
}
* access is achieved by setting a blocking flag, which will fail other
* competing contexts to send their mailboxes.
*/
-static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn,
- u32 cmd)
+static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, u32 cmd)
{
spin_lock_bh(&p_hwfn->mcp_info->lock);
return 0;
}
-static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn,
- u32 cmd)
+static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, u32 cmd)
{
if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ)
spin_unlock_bh(&p_hwfn->mcp_info->lock);
}
-int qed_mcp_reset(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt)
+int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
u32 seq = ++p_hwfn->mcp_info->drv_mb_seq;
u8 delay = CHIP_MCP_RESP_ITER_US;
*o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
} else {
/* FW BUG! */
- DP_ERR(p_hwfn, "MFW failed to respond!\n");
+ DP_ERR(p_hwfn, "MFW failed to respond [cmd 0x%x param 0x%x]\n",
+ cmd, param);
*o_mcp_resp = 0;
rc = -EAGAIN;
}
/* MCP not initialized */
if (!qed_mcp_is_init(p_hwfn)) {
- DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+ DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
return -EBUSY;
}
return 0;
}
+int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 cmd,
+ u32 param,
+ u32 *o_mcp_resp,
+ u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf)
+{
+ struct qed_mcp_mb_params mb_params;
+ union drv_union_data union_data;
+ int rc;
+
+ memset(&mb_params, 0, sizeof(mb_params));
+ mb_params.cmd = cmd;
+ mb_params.param = param;
+ mb_params.p_data_dst = &union_data;
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ if (rc)
+ return rc;
+
+ *o_mcp_resp = mb_params.mcp_resp;
+ *o_mcp_param = mb_params.mcp_param;
+
+ *o_txn_size = *o_mcp_param;
+ memcpy(o_buf, &union_data.raw_data, *o_txn_size);
+
+ return 0;
+}
+
int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- u32 *p_load_code)
+ struct qed_ptt *p_ptt, u32 *p_load_code)
{
struct qed_dev *cdev = p_hwfn->cdev;
struct qed_mcp_mb_params mb_params;
"Received transceiver state update [0x%08x] from mfw [Addr 0x%x]\n",
transceiver_state,
(u32)(p_hwfn->mcp_info->port_addr +
- offsetof(struct public_port,
- transceiver_data)));
+ offsetof(struct public_port, transceiver_data)));
transceiver_state = GET_FIELD(transceiver_state,
ETH_TRANSCEIVER_STATE);
}
static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- bool b_reset)
+ struct qed_ptt *p_ptt, bool b_reset)
{
struct qed_mcp_link_state *p_link;
u8 max_bw, min_bw;
"Received link update [0x%08x] from mfw [Addr 0x%x]\n",
status,
(u32)(p_hwfn->mcp_info->port_addr +
- offsetof(struct public_port,
- link_status)));
+ offsetof(struct public_port, link_status)));
} else {
DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
"Resetting link indications\n");
p_link->partner_adv_speed |=
(status & LINK_STATUS_LINK_PARTNER_20G_CAPABLE) ?
QED_LINK_PARTNER_SPEED_20G : 0;
+ p_link->partner_adv_speed |=
+ (status & LINK_STATUS_LINK_PARTNER_25G_CAPABLE) ?
+ QED_LINK_PARTNER_SPEED_25G : 0;
p_link->partner_adv_speed |=
(status & LINK_STATUS_LINK_PARTNER_40G_CAPABLE) ?
QED_LINK_PARTNER_SPEED_40G : 0;
return 0;
}
+static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ enum MFW_DRV_MSG_TYPE type)
+{
+ enum qed_mcp_protocol_type stats_type;
+ union qed_mcp_protocol_stats stats;
+ struct qed_mcp_mb_params mb_params;
+ union drv_union_data union_data;
+ u32 hsi_param;
+
+ switch (type) {
+ case MFW_DRV_MSG_GET_LAN_STATS:
+ stats_type = QED_MCP_LAN_STATS;
+ hsi_param = DRV_MSG_CODE_STATS_TYPE_LAN;
+ break;
+ case MFW_DRV_MSG_GET_FCOE_STATS:
+ stats_type = QED_MCP_FCOE_STATS;
+ hsi_param = DRV_MSG_CODE_STATS_TYPE_FCOE;
+ break;
+ case MFW_DRV_MSG_GET_ISCSI_STATS:
+ stats_type = QED_MCP_ISCSI_STATS;
+ hsi_param = DRV_MSG_CODE_STATS_TYPE_ISCSI;
+ break;
+ case MFW_DRV_MSG_GET_RDMA_STATS:
+ stats_type = QED_MCP_RDMA_STATS;
+ hsi_param = DRV_MSG_CODE_STATS_TYPE_RDMA;
+ break;
+ default:
+ DP_NOTICE(p_hwfn, "Invalid protocol type %d\n", type);
+ return;
+ }
+
+ qed_get_protocol_stats(p_hwfn->cdev, stats_type, &stats);
+
+ memset(&mb_params, 0, sizeof(mb_params));
+ mb_params.cmd = DRV_MSG_CODE_GET_STATS;
+ mb_params.param = hsi_param;
+ memcpy(&union_data, &stats, sizeof(stats));
+ mb_params.p_data_src = &union_data;
+ qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+}
+
static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn,
struct public_func *p_shmem_info)
{
static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
- struct public_func *p_data,
- int pfid)
+ struct public_func *p_data, int pfid)
{
u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
PUBLIC_FUNC);
memset(p_data, 0, sizeof(*p_data));
- size = min_t(u32, sizeof(*p_data),
- QED_SECTION_SIZE(mfw_path_offsize));
+ size = min_t(u32, sizeof(*p_data), QED_SECTION_SIZE(mfw_path_offsize));
for (i = 0; i < size / sizeof(u32); i++)
((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt,
func_addr + (i << 2));
return size;
}
-int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt, u8 *p_pf)
-{
- struct public_func shmem_info;
- int i;
-
- /* Find first Ethernet interface in port */
- for (i = 0; i < NUM_OF_ENG_PFS(p_hwfn->cdev);
- i += p_hwfn->cdev->num_ports_in_engines) {
- qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
- MCP_PF_ID_BY_REL(p_hwfn, i));
-
- if (shmem_info.config & FUNC_MF_CFG_FUNC_HIDE)
- continue;
-
- if ((shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK) ==
- FUNC_MF_CFG_PROTOCOL_ETHERNET) {
- *p_pf = (u8)i;
- return 0;
- }
- }
-
- DP_NOTICE(p_hwfn,
- "Failed to find on port an ethernet interface in MF_SI mode\n");
-
- return -EINVAL;
-}
-
-static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt)
+static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_mcp_function_info *p_info;
struct public_func shmem_info;
u32 resp = 0, param = 0;
- qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
- MCP_PF_ID(p_hwfn));
+ qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
qed_read_pf_bandwidth(p_hwfn, &shmem_info);
case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
break;
+ case MFW_DRV_MSG_GET_LAN_STATS:
+ case MFW_DRV_MSG_GET_FCOE_STATS:
+ case MFW_DRV_MSG_GET_ISCSI_STATS:
+ case MFW_DRV_MSG_GET_RDMA_STATS:
+ qed_mcp_send_protocol_stats(p_hwfn, p_ptt, i);
+ break;
case MFW_DRV_MSG_BW_UPDATE:
qed_mcp_update_bw(p_hwfn, p_ptt);
break;
return 0;
}
-int qed_mcp_get_media_type(struct qed_dev *cdev,
- u32 *p_media_type)
+int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
{
struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
struct qed_ptt *p_ptt;
return -EINVAL;
if (!qed_mcp_is_init(p_hwfn)) {
- DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+ DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
return -EBUSY;
}
struct qed_mcp_function_info *info;
struct public_func shmem_info;
- qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
- MCP_PF_ID(p_hwfn));
+ qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
info = &p_hwfn->mcp_info->func_info;
info->pause_on_host = (shmem_info.config &
FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0;
- if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info,
- &info->protocol)) {
+ if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) {
DP_ERR(p_hwfn, "Unknown personality %08x\n",
(u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK));
return -EINVAL;
return &p_hwfn->mcp_info->link_capabilities;
}
-int qed_mcp_drain(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt)
+int qed_mcp_drain(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
u32 resp = 0, param = 0;
int rc;
rc = qed_mcp_cmd(p_hwfn, p_ptt,
- DRV_MSG_CODE_NIG_DRAIN, 1000,
- &resp, ¶m);
+ DRV_MSG_CODE_NIG_DRAIN, 1000, &resp, ¶m);
/* Wait for the drain to complete before returning */
msleep(1020);
}
int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- u32 *p_flash_size)
+ struct qed_ptt *p_ptt, u32 *p_flash_size)
{
u32 flash_size;
p_drv_version = &union_data.drv_version;
p_drv_version->version = p_ver->version;
- for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) {
- val = cpu_to_be32(p_ver->name[i]);
+ for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) {
+ val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)]));
*(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val;
}
return rc;
}
-int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
- enum qed_led_mode mode)
+int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ u32 resp = 0, param = 0;
+ int rc;
+
+ rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp,
+ ¶m);
+ if (rc)
+ DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+
+ return rc;
+}
+
+int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ u32 value, cpu_mode;
+
+ qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff);
+
+ value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+ value &= ~MCP_REG_CPU_MODE_SOFT_HALT;
+ qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value);
+ cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+
+ return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
+}
+
+int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, enum qed_led_mode mode)
{
u32 resp = 0, param = 0, drv_mb_param;
int rc;
return rc;
}
+int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u32 mask_parities)
+{
+ u32 resp = 0, param = 0;
+ int rc;
+
+ rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES,
+ mask_parities, &resp, ¶m);
+
+ if (rc) {
+ DP_ERR(p_hwfn,
+ "MCP response failure for mask parities, aborting\n");
+ } else if (resp != FW_MSG_CODE_OK) {
+ DP_ERR(p_hwfn,
+ "MCP did not acknowledge mask parity request. Old MFW?\n");
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
u32 drv_mb_param = 0, rsp, param;
numhashregs = 8;
break;
default:
- pr_debug("STMMAC: err in setting mulitcast filter\n");
+ pr_debug("STMMAC: err in setting multicast filter\n");
return;
break;
}
}
if (mode & WAKE_UCAST) {
pr_debug("GMAC: WOL on global unicast\n");
- pmt |= global_unicast;
+ pmt |= power_down | global_unicast | wake_up_frame_en;
}
writel(pmt, ioaddr + GMAC_PMT);
#define NETNEXT_VERSION "08"
/* Information for net */
- #define NET_VERSION "5"
+ #define NET_VERSION "6"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
return -ENODEV;
if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) {
netif_warn(tp, probe, tp->netdev,
- "Invalid buffer when reading pass-thru MAC addr: "
- "(%d, %d)\n",
+ "Invalid buffer for pass-thru MAC addr: (%d, %d)\n",
obj->type, obj->string.length);
goto amacout;
}
ret = hex2bin(buf, obj->string.pointer + 9, 6);
if (!(ret == 0 && is_valid_ether_addr(buf))) {
netif_warn(tp, probe, tp->netdev,
- "Invalid MAC when reading pass-thru MAC addr: "
- "%d, %pM\n", ret, buf);
+ "Invalid MAC for pass-thru MAC addr: %d, %pM\n",
+ ret, buf);
ret = -EINVAL;
goto amacout;
}
struct sockaddr sa;
int ret;
- if (tp->version == RTL_VER_01)
+ if (tp->version == RTL_VER_01) {
ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data);
- else {
+ } else {
/* if this is not an RTL8153-AD, no eFuse mac pass thru set,
* or system doesn't provide valid _SB.AMAC this will be
* be expected to non-zero
}
}
+ static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
+ {
+ ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
+ ocp_reg_write(tp, OCP_EEE_DATA, reg);
+ ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
+ }
+
+ static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
+ {
+ u16 data;
+
+ r8152_mmd_indirect(tp, dev, reg);
+ data = ocp_reg_read(tp, OCP_EEE_DATA);
+ ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
+
+ return data;
+ }
+
+ static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
+ {
+ r8152_mmd_indirect(tp, dev, reg);
+ ocp_reg_write(tp, OCP_EEE_DATA, data);
+ ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
+ }
+
+ static void r8152_eee_en(struct r8152 *tp, bool enable)
+ {
+ u16 config1, config2, config3;
+ u32 ocp_data;
+
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
+ config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
+ config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
+ config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
+
+ if (enable) {
+ ocp_data |= EEE_RX_EN | EEE_TX_EN;
+ config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
+ config1 |= sd_rise_time(1);
+ config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
+ config3 |= fast_snr(42);
+ } else {
+ ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
+ config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
+ RX_QUIET_EN);
+ config1 |= sd_rise_time(7);
+ config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
+ config3 |= fast_snr(511);
+ }
+
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
+ ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
+ ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
+ ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
+ }
+
+ static void r8152b_enable_eee(struct r8152 *tp)
+ {
+ r8152_eee_en(tp, true);
+ r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
+ }
+
+ static void r8152b_enable_fc(struct r8152 *tp)
+ {
+ u16 anar;
+
+ anar = r8152_mdio_read(tp, MII_ADVERTISE);
+ anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
+ r8152_mdio_write(tp, MII_ADVERTISE, anar);
+ }
+
static void rtl8152_disable(struct r8152 *tp)
{
r8152_aldps_en(tp, false);
static void r8152b_hw_phy_cfg(struct r8152 *tp)
{
- u16 data;
-
- data = r8152_mdio_read(tp, MII_BMCR);
- if (data & BMCR_PDOWN) {
- data &= ~BMCR_PDOWN;
- r8152_mdio_write(tp, MII_BMCR, data);
- }
+ r8152b_enable_eee(tp);
+ r8152_aldps_en(tp, true);
+ r8152b_enable_fc(tp);
set_bit(PHY_RESET, &tp->flags);
}
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
}
+ static void r8153_aldps_en(struct r8152 *tp, bool enable)
+ {
+ u16 data;
+
+ data = ocp_reg_read(tp, OCP_POWER_CFG);
+ if (enable) {
+ data |= EN_ALDPS;
+ ocp_reg_write(tp, OCP_POWER_CFG, data);
+ } else {
+ data &= ~EN_ALDPS;
+ ocp_reg_write(tp, OCP_POWER_CFG, data);
+ msleep(20);
+ }
+ }
+
+ static void r8153_eee_en(struct r8152 *tp, bool enable)
+ {
+ u32 ocp_data;
+ u16 config;
+
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
+ config = ocp_reg_read(tp, OCP_EEE_CFG);
+
+ if (enable) {
+ ocp_data |= EEE_RX_EN | EEE_TX_EN;
+ config |= EEE10_EN;
+ } else {
+ ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
+ config &= ~EEE10_EN;
+ }
+
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
+ ocp_reg_write(tp, OCP_EEE_CFG, config);
+ }
+
static void r8153_hw_phy_cfg(struct r8152 *tp)
{
u32 ocp_data;
u16 data;
- if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
- tp->version == RTL_VER_05)
- ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+ /* disable ALDPS before updating the PHY parameters */
+ r8153_aldps_en(tp, false);
- data = r8152_mdio_read(tp, MII_BMCR);
- if (data & BMCR_PDOWN) {
- data &= ~BMCR_PDOWN;
- r8152_mdio_write(tp, MII_BMCR, data);
- }
+ /* disable EEE before updating the PHY parameters */
+ r8153_eee_en(tp, false);
+ ocp_reg_write(tp, OCP_EEE_ADV, 0);
if (tp->version == RTL_VER_03) {
data = ocp_reg_read(tp, OCP_EEE_CFG);
sram_write(tp, SRAM_10M_AMP1, 0x00af);
sram_write(tp, SRAM_10M_AMP2, 0x0208);
+ r8153_eee_en(tp, true);
+ ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
+
+ r8153_aldps_en(tp, true);
+ r8152b_enable_fc(tp);
+
set_bit(PHY_RESET, &tp->flags);
}
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
}
- static void r8153_aldps_en(struct r8152 *tp, bool enable)
- {
- u16 data;
-
- data = ocp_reg_read(tp, OCP_POWER_CFG);
- if (enable) {
- data |= EN_ALDPS;
- ocp_reg_write(tp, OCP_POWER_CFG, data);
- } else {
- data &= ~EN_ALDPS;
- ocp_reg_write(tp, OCP_POWER_CFG, data);
- msleep(20);
- }
- }
-
static void rtl8153_disable(struct r8152 *tp)
{
r8153_aldps_en(tp, false);
return res;
}
- static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
- {
- ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
- ocp_reg_write(tp, OCP_EEE_DATA, reg);
- ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
- }
-
- static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
- {
- u16 data;
-
- r8152_mmd_indirect(tp, dev, reg);
- data = ocp_reg_read(tp, OCP_EEE_DATA);
- ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
-
- return data;
- }
-
- static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
- {
- r8152_mmd_indirect(tp, dev, reg);
- ocp_reg_write(tp, OCP_EEE_DATA, data);
- ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
- }
-
- static void r8152_eee_en(struct r8152 *tp, bool enable)
- {
- u16 config1, config2, config3;
- u32 ocp_data;
-
- ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
- config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
- config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
- config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
-
- if (enable) {
- ocp_data |= EEE_RX_EN | EEE_TX_EN;
- config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
- config1 |= sd_rise_time(1);
- config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
- config3 |= fast_snr(42);
- } else {
- ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
- config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
- RX_QUIET_EN);
- config1 |= sd_rise_time(7);
- config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
- config3 |= fast_snr(511);
- }
-
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
- ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
- ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
- ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
- }
-
- static void r8152b_enable_eee(struct r8152 *tp)
- {
- r8152_eee_en(tp, true);
- r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
- }
-
- static void r8153_eee_en(struct r8152 *tp, bool enable)
- {
- u32 ocp_data;
- u16 config;
-
- ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
- config = ocp_reg_read(tp, OCP_EEE_CFG);
-
- if (enable) {
- ocp_data |= EEE_RX_EN | EEE_TX_EN;
- config |= EEE10_EN;
- } else {
- ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
- config &= ~EEE10_EN;
- }
-
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
- ocp_reg_write(tp, OCP_EEE_CFG, config);
- }
-
- static void r8153_enable_eee(struct r8152 *tp)
- {
- r8153_eee_en(tp, true);
- ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
- }
-
- static void r8152b_enable_fc(struct r8152 *tp)
- {
- u16 anar;
-
- anar = r8152_mdio_read(tp, MII_ADVERTISE);
- anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
- r8152_mdio_write(tp, MII_ADVERTISE, anar);
- }
-
static void rtl_tally_reset(struct r8152 *tp)
{
u32 ocp_data;
static void r8152b_init(struct r8152 *tp)
{
u32 ocp_data;
+ u16 data;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
+ data = r8152_mdio_read(tp, MII_BMCR);
+ if (data & BMCR_PDOWN) {
+ data &= ~BMCR_PDOWN;
+ r8152_mdio_write(tp, MII_BMCR, data);
+ }
+
r8152_aldps_en(tp, false);
if (tp->version == RTL_VER_01) {
SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data);
- r8152b_enable_eee(tp);
- r8152_aldps_en(tp, true);
- r8152b_enable_fc(tp);
rtl_tally_reset(tp);
/* enable rx aggregation */
static void r8153_init(struct r8152 *tp)
{
u32 ocp_data;
+ u16 data;
int i;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
- r8153_aldps_en(tp, false);
r8153_u1u2en(tp, false);
for (i = 0; i < 500; i++) {
msleep(20);
}
+ if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
+ tp->version == RTL_VER_05)
+ ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+
+ data = r8152_mdio_read(tp, MII_BMCR);
+ if (data & BMCR_PDOWN) {
+ data &= ~BMCR_PDOWN;
+ r8152_mdio_write(tp, MII_BMCR, data);
+ }
+
+ for (i = 0; i < 500; i++) {
+ ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK;
+ if (ocp_data == PHY_STAT_LAN_ON)
+ break;
+ msleep(20);
+ }
+
usb_disable_lpm(tp->udev);
r8153_u2p3en(tp, false);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
- r8153_enable_eee(tp);
- r8153_aldps_en(tp, true);
- r8152b_enable_fc(tp);
rtl_tally_reset(tp);
r8153_u2p3en(tp, true);
}
return ret;
}
-static struct ethtool_ops ops = {
+static const struct ethtool_ops ops = {
.get_drvinfo = rtl8152_get_drvinfo,
.get_settings = rtl8152_get_settings,
.set_settings = rtl8152_set_settings,
int hdrlen = ieee80211_hdrlen(hdr->frame_control);
int queue;
+ /* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
+ * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
+ * queue. STATION (HS2.0) uses the auxiliary context of the FW,
+ * and hence needs to be sent on the aux queue
+ */
+ if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
+ skb_info->control.vif->type == NL80211_IFTYPE_STATION)
+ IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
+
memcpy(&info, skb->cb, sizeof(info));
if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU))
/* This holds the amsdu headers length */
skb_info->driver_data[0] = (void *)(uintptr_t)0;
- /*
- * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
- * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
- * queue. STATION (HS2.0) uses the auxiliary context of the FW,
- * and hence needs to be sent on the aux queue
- */
- if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
- info.control.vif->type == NL80211_IFTYPE_STATION)
- IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
-
queue = info.hw_queue;
/*
}
}
+/* Check if there are any timed-out TIDs on a given shared TXQ */
+static bool iwl_mvm_txq_should_update(struct iwl_mvm *mvm, int txq_id)
+{
+ unsigned long queue_tid_bitmap = mvm->queue_info[txq_id].tid_bitmap;
+ unsigned long now = jiffies;
+ int tid;
+
+ for_each_set_bit(tid, &queue_tid_bitmap, IWL_MAX_TID_COUNT + 1) {
+ if (time_before(mvm->queue_info[txq_id].last_frame_time[tid] +
+ IWL_MVM_DQA_QUEUE_TIMEOUT, now))
+ return true;
+ }
+
+ return false;
+}
+
/*
* Sets the fields in the Tx cmd that are crypto related
*/
iwl_trans_free_tx_cmd(mvm->trans, dev_cmd);
spin_unlock(&mvmsta->lock);
return 0;
-
}
/* If we are here - TXQ exists and needs to be re-activated */
txq_id);
}
- /* Keep track of the time of the last frame for this RA/TID */
- mvm->queue_info[txq_id].last_frame_time[tid] = jiffies;
+ if (iwl_mvm_is_dqa_supported(mvm)) {
+ /* Keep track of the time of the last frame for this RA/TID */
+ mvm->queue_info[txq_id].last_frame_time[tid] = jiffies;
+
+ /*
+ * If we have timed-out TIDs - schedule the worker that will
+ * reconfig the queues and update them
+ *
+ * Note that the mvm->queue_info_lock isn't being taken here in
+ * order to not serialize the TX flow. This isn't dangerous
+ * because scheduling mvm->add_stream_wk can't ruin the state,
+ * and if we DON'T schedule it due to some race condition then
+ * next TX we get here we will.
+ */
+ if (unlikely(mvm->queue_info[txq_id].status ==
+ IWL_MVM_QUEUE_SHARED &&
+ iwl_mvm_txq_should_update(mvm, txq_id)))
+ schedule_work(&mvm->add_stream_wk);
+ }
IWL_DEBUG_TX(mvm, "TX to [%d|%d] Q:%d - seq: 0x%x\n", mvmsta->sta_id,
tid, txq_id, IEEE80211_SEQ_TO_SN(seq_number));
return count;
}
-static int xenvif_dump_open(struct inode *inode, struct file *filp)
+static int xenvif_io_ring_open(struct inode *inode, struct file *filp)
{
int ret;
void *queue = NULL;
static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
.owner = THIS_MODULE,
- .open = xenvif_dump_open,
+ .open = xenvif_io_ring_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = xenvif_write_io_ring,
};
+static int xenvif_read_ctrl(struct seq_file *m, void *v)
+{
+ struct xenvif *vif = m->private;
+
+ xenvif_dump_hash_info(vif, m);
+
+ return 0;
+}
+
+static int xenvif_ctrl_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, xenvif_read_ctrl, inode->i_private);
+}
+
+static const struct file_operations xenvif_dbg_ctrl_ops_fops = {
+ .owner = THIS_MODULE,
+ .open = xenvif_ctrl_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static void xenvif_debugfs_addif(struct xenvif *vif)
{
struct dentry *pfile;
pr_warn("Creation of io_ring file returned %ld!\n",
PTR_ERR(pfile));
}
+
+ if (vif->ctrl_task) {
+ pfile = debugfs_create_file("ctrl",
+ S_IRUSR,
+ vif->xenvif_dbg_root,
+ vif,
+ &xenvif_dbg_ctrl_ops_fops);
+ if (IS_ERR_OR_NULL(pfile))
+ pr_warn("Creation of ctrl file returned %ld!\n",
+ PTR_ERR(pfile));
+ }
} else
netdev_warn(vif->dev,
"Creation of vif debugfs dir returned %ld!\n",
be->dev = dev;
dev_set_drvdata(&dev->dev, be);
+ be->state = XenbusStateInitialising;
+ err = xenbus_switch_state(dev, XenbusStateInitialising);
+ if (err)
+ goto fail;
+
sg = 1;
do {
be->hotplug_script = script;
- err = xenbus_switch_state(dev, XenbusStateInitWait);
- if (err)
- goto fail;
-
- be->state = XenbusStateInitWait;
/* This kicks hotplug scripts, so do it immediately. */
err = backend_create_xenvif(be);
/* Handle backend state transitions:
*
- * The backend state starts in InitWait and the following transitions are
+ * The backend state starts in Initialising and the following transitions are
* allowed.
*
- * InitWait -> Connected
- *
- * ^ \ |
- * | \ |
- * | \ |
- * | \ |
- * | \ |
- * | \ |
- * | V V
+ * Initialising -> InitWait -> Connected
+ * \
+ * \ ^ \ |
+ * \ | \ |
+ * \ | \ |
+ * \ | \ |
+ * \ | \ |
+ * \ | \ |
+ * V | V V
*
- * Closed <-> Closing
+ * Closed <-> Closing
*
* The state argument specifies the eventual state of the backend and the
* function transitions to that state via the shortest path.
{
while (be->state != state) {
switch (be->state) {
+ case XenbusStateInitialising:
+ switch (state) {
+ case XenbusStateInitWait:
+ case XenbusStateConnected:
+ case XenbusStateClosing:
+ backend_switch_state(be, XenbusStateInitWait);
+ break;
+ case XenbusStateClosed:
+ backend_switch_state(be, XenbusStateClosed);
+ break;
+ default:
+ BUG();
+ }
+ break;
case XenbusStateClosed:
switch (state) {
case XenbusStateInitWait:
void (*unhash)(struct sock *sk);
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
- void (*clear_sk)(struct sock *sk, int size);
/* Keeping track of sockets in use */
#ifdef CONFIG_PROC_FS
sk_stream_memory_free(sk);
}
+static inline int sk_under_cgroup_hierarchy(struct sock *sk,
+ struct cgroup *ancestor)
+{
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data),
+ ancestor);
+#else
+ return -ENOTSUPP;
+#endif
+}
static inline bool sk_has_memory_pressure(const struct sock *sk)
{
return sk->sk_prot->hash(sk);
}
-void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
-
/* About 10 seconds */
#define SOCK_DESTROY_TIME (10*HZ)
if (!sk_has_account(sk))
return;
sk->sk_forward_alloc += size;
+
+ /* Avoid a possible overflow.
+ * TCP send queues can make this happen, if sk_mem_reclaim()
+ * is not called and more than 2 GBytes are released at once.
+ *
+ * If we reach 2 MBytes, reclaim 1 MBytes right now, there is
+ * no need to hold that much forward allocation anyway.
+ */
+ if (unlikely(sk->sk_forward_alloc >= 1 << 21))
+ __sk_mem_reclaim(sk, 1 << 20);
}
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
struct xfrm_replay_state_esn *preplay_esn;
/* The functions for replay detection. */
- struct xfrm_replay *repl;
+ const struct xfrm_replay *repl;
/* internal flag that only holds state for delayed aevent at the
* moment
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
int xfrm6_extract_header(struct sk_buff *skb);
int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
- int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi);
+ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+ struct ip6_tnl *t);
int xfrm6_transport_finish(struct sk_buff *skb, int async);
+ int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t);
int xfrm6_rcv(struct sk_buff *skb);
int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
xfrm_address_t *saddr, u8 proto);
return 0;
}
- static int perf_event_restart(struct perf_event *event)
+ static int perf_event_stop(struct perf_event *event, int restart)
{
struct stop_event_data sd = {
.event = event,
- .restart = 1,
+ .restart = restart,
};
int ret = 0;
.group = group,
.ret = 0,
};
- ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
- /* The event must have been read from an online CPU: */
- WARN_ON_ONCE(ret);
- ret = ret ? : data.ret;
+ /*
+ * Purposely ignore the smp_call_function_single() return
+ * value.
+ *
+ * If event->oncpu isn't a valid CPU it means the event got
+ * scheduled out and that will have updated the event count.
+ *
+ * Therefore, either way, we'll have an up-to-date event count
+ * after this.
+ */
+ (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+ ret = data.ret;
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
spin_unlock_irqrestore(&rb->event_lock, flags);
}
+ /*
+ * Avoid racing with perf_mmap_close(AUX): stop the event
+ * before swizzling the event::rb pointer; if it's getting
+ * unmapped, its aux_mmap_count will be 0 and it won't
+ * restart. See the comment in __perf_pmu_output_stop().
+ *
+ * Data will inevitably be lost when set_output is done in
+ * mid-air, but then again, whoever does it like this is
+ * not in for the data anyway.
+ */
+ if (has_aux(event))
+ perf_event_stop(event, 0);
+
rcu_assign_pointer(event->rb, rb);
if (old_rb) {
raw_spin_unlock_irqrestore(&ifh->lock, flags);
if (restart)
- perf_event_restart(event);
+ perf_event_stop(event, 1);
}
void perf_event_exec(void)
/*
* In case of inheritance, it will be the parent that links to the
- * ring-buffer, but it will be the child that's actually using it:
+ * ring-buffer, but it will be the child that's actually using it.
+ *
+ * We are using event::rb to determine if the event should be stopped,
+ * however this may race with ring_buffer_attach() (through set_output),
+ * which will make us skip the event that actually needs to be stopped.
+ * So ring_buffer_attach() has to stop an aux event before re-assigning
+ * its rb pointer.
*/
if (rcu_dereference(parent->rb) == rb)
ro->err = __perf_event_stop(&sd);
raw_spin_unlock_irqrestore(&ifh->lock, flags);
if (restart)
- perf_event_restart(event);
+ perf_event_stop(event, 1);
}
/*
irq_work_queue(&event->pending);
}
- event->overflow_handler(event, data, regs);
+ READ_ONCE(event->overflow_handler)(event, data, regs);
if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
ftrace_profile_free_filter(event);
}
+#ifdef CONFIG_BPF_SYSCALL
+static void bpf_overflow_handler(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct bpf_perf_event_data_kern ctx = {
+ .data = data,
+ .regs = regs,
+ };
+ int ret = 0;
+
+ preempt_disable();
+ if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+ goto out;
+ rcu_read_lock();
+ ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+ rcu_read_unlock();
+out:
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+ if (!ret)
+ return;
+
+ event->orig_overflow_handler(event, data, regs);
+}
+
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+ struct bpf_prog *prog;
+
+ if (event->overflow_handler_context)
+ /* hw breakpoint or kernel counter */
+ return -EINVAL;
+
+ if (event->prog)
+ return -EEXIST;
+
+ prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ event->prog = prog;
+ event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+ WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+ return 0;
+}
+
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+ struct bpf_prog *prog = event->prog;
+
+ if (!prog)
+ return;
+
+ WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+ event->prog = NULL;
+ bpf_prog_put(prog);
+}
+#else
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+ return -EOPNOTSUPP;
+}
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+}
+#endif
+
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
bool is_kprobe, is_tracepoint;
struct bpf_prog *prog;
+ if (event->attr.type == PERF_TYPE_HARDWARE ||
+ event->attr.type == PERF_TYPE_SOFTWARE)
+ return perf_event_set_bpf_handler(event, prog_fd);
+
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -EINVAL;
{
struct bpf_prog *prog;
+ perf_event_free_bpf_handler(event);
+
if (!event->tp_event)
return;
mmput(mm);
restart:
- perf_event_restart(event);
+ perf_event_stop(event, 1);
}
/*
if (!overflow_handler && parent_event) {
overflow_handler = parent_event->overflow_handler;
context = parent_event->overflow_handler_context;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
+ if (overflow_handler == bpf_overflow_handler) {
+ struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto err_ns;
+ }
+ event->prog = prog;
+ event->orig_overflow_handler =
+ parent_event->orig_overflow_handler;
+ }
+#endif
}
if (overflow_handler) {
if (!orig_ifinfo)
return;
- rcu_read_lock();
- curr_router = rcu_dereference(orig_ifinfo->router);
- if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
- curr_router = NULL;
- rcu_read_unlock();
+ spin_lock_bh(&orig_node->neigh_list_lock);
+ /* curr_router used earlier may not be the current orig_ifinfo->router
+ * anymore because it was dereferenced outside of the neigh_list_lock
+ * protected region. After the new best neighbor has replace the current
+ * best neighbor the reference counter needs to decrease. Consequently,
+ * the code needs to ensure the curr_router variable contains a pointer
+ * to the replaced best neighbor.
+ */
+ curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
+ /* increase refcount of new best neighbor */
+ if (neigh_node)
+ kref_get(&neigh_node->refcount);
+
+ rcu_assign_pointer(orig_ifinfo->router, neigh_node);
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+ batadv_orig_ifinfo_put(orig_ifinfo);
/* route deleted */
if ((curr_router) && (!neigh_node)) {
curr_router->addr);
}
- if (curr_router)
- batadv_neigh_node_put(curr_router);
-
- spin_lock_bh(&orig_node->neigh_list_lock);
- /* curr_router used earlier may not be the current orig_ifinfo->router
- * anymore because it was dereferenced outside of the neigh_list_lock
- * protected region. After the new best neighbor has replace the current
- * best neighbor the reference counter needs to decrease. Consequently,
- * the code needs to ensure the curr_router variable contains a pointer
- * to the replaced best neighbor.
- */
- curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
-
- /* increase refcount of new best neighbor */
- if (neigh_node)
- kref_get(&neigh_node->refcount);
-
- rcu_assign_pointer(orig_ifinfo->router, neigh_node);
- spin_unlock_bh(&orig_node->neigh_list_lock);
- batadv_orig_ifinfo_put(orig_ifinfo);
-
/* decrease refcount of previous best neighbor */
if (curr_router)
batadv_neigh_node_put(curr_router);
return 0;
}
+ /**
+ * batadv_last_bonding_get - Get last_bonding_candidate of orig_node
+ * @orig_node: originator node whose last bonding candidate should be retrieved
+ *
+ * Return: last bonding candidate of router or NULL if not found
+ *
+ * The object is returned with refcounter increased by 1.
+ */
+ static struct batadv_orig_ifinfo *
+ batadv_last_bonding_get(struct batadv_orig_node *orig_node)
+ {
+ struct batadv_orig_ifinfo *last_bonding_candidate;
+
+ spin_lock_bh(&orig_node->neigh_list_lock);
+ last_bonding_candidate = orig_node->last_bonding_candidate;
+
+ if (last_bonding_candidate)
+ kref_get(&last_bonding_candidate->refcount);
+ spin_unlock_bh(&orig_node->neigh_list_lock);
+
+ return last_bonding_candidate;
+ }
+
/**
* batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
* @orig_node: originator node whose bonding candidates should be replaced
* router - obviously there are no other candidates.
*/
rcu_read_lock();
- last_candidate = orig_node->last_bonding_candidate;
+ last_candidate = batadv_last_bonding_get(orig_node);
if (last_candidate)
last_cand_router = rcu_dereference(last_candidate->router);
batadv_orig_ifinfo_put(next_candidate);
}
+ if (last_candidate)
+ batadv_orig_ifinfo_put(last_candidate);
+
return router;
}
#endif
}
-void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
-{
- unsigned long nulls1, nulls2;
-
- nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
- nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
- if (nulls1 > nulls2)
- swap(nulls1, nulls2);
-
- if (nulls1 != 0)
- memset((char *)sk, 0, nulls1);
- memset((char *)sk + nulls1 + sizeof(void *), 0,
- nulls2 - nulls1 - sizeof(void *));
- memset((char *)sk + nulls2 + sizeof(void *), 0,
- size - nulls2 - sizeof(void *));
-}
-EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
-
static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
int family)
{
sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
if (!sk)
return sk;
- if (priority & __GFP_ZERO) {
- if (prot->clear_sk)
- prot->clear_sk(sk, prot->obj_size);
- else
- sk_prot_clear_nulls(sk, prot->obj_size);
- }
+ if (priority & __GFP_ZERO)
+ sk_prot_clear_nulls(sk, prot->obj_size);
} else
sk = kmalloc(prot->obj_size, priority);
if (!try_module_get(prot->owner))
goto out_free_sec;
sk_tx_queue_clear(sk);
- cgroup_sk_alloc(&sk->sk_cgrp_data);
}
return sk;
sock_net_set(sk, net);
atomic_set(&sk->sk_wmem_alloc, 1);
+ cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
}
newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
+
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
+
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
u32 old = ACCESS_ONCE(*p_tstamp);
u32 now = (u32)jiffies;
- u32 delta = 0;
+ u32 new, delta = 0;
if (old != now && cmpxchg(p_tstamp, old, now) == old)
delta = prandom_u32_max(now - old);
- return atomic_add_return(segs + delta, p_id) - segs;
+ /* Do not use atomic_add_return() as it makes UBSAN unhappy */
+ do {
+ old = (u32)atomic_read(p_id);
+ new = old + delta + segs;
+ } while (atomic_cmpxchg(p_id, old, new) != old);
+
+ return new - segs;
}
EXPORT_SYMBOL(ip_idents_reserve);
mtu = 576;
}
- return min_t(unsigned int, mtu, IP_MAX_MTU);
+ mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
* Now we are ready to route packet.
*/
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
+ fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
return ERR_PTR(-EINVAL);
if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
- if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+ if (ipv4_is_loopback(fl4->saddr) &&
+ !(dev_out->flags & IFF_LOOPBACK) &&
+ !netif_is_l3_master(dev_out))
return ERR_PTR(-EINVAL);
if (ipv4_is_lbcast(fl4->daddr))
unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
- int master_idx;
int orig_oif;
int err = -ENETUNREACH;
orig_oif = fl4->flowi4_oif;
- master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
- if (master_idx)
- fl4->flowi4_oif = master_idx;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_HOST);
}
-
- rth = l3mdev_get_rtable(dev_out, fl4);
- if (rth)
- goto out;
}
if (!fl4->daddr) {
if (err) {
res.fi = NULL;
res.table = NULL;
- if (fl4->flowi4_oif &&
- !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+ if (fl4->flowi4_oif) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
else
fl4->saddr = fl4->daddr;
}
- dev_out = net->loopback_dev;
+
+ /* L3 master device is the loopback for that domain */
+ dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
flags |= RTCF_LOCAL;
goto make_route;
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
- if (netif_index_is_l3_master(net, fl4.flowi4_oif))
- fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
-
if (iif) {
struct net_device *dev;
static void tcp_sndbuf_expand(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
int sndmem, per_mss;
u32 nr_segs;
* Cubic needs 1.7 factor, rounded to 2 to include
* extra cushion (application might react slowly to POLLOUT)
*/
- sndmem = 2 * nr_segs * per_mss;
+ sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
+ sndmem *= nr_segs * per_mss;
if (sk->sk_sndbuf < sndmem)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
}
+/* Sum the number of packets on the wire we have marked as lost.
+ * There are two cases we care about here:
+ * a) Packet hasn't been marked lost (nor retransmitted),
+ * and this is the first loss.
+ * b) Packet has been marked both lost and retransmitted,
+ * and this means we think it was lost again.
+ */
+static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb)
+{
+ __u8 sacked = TCP_SKB_CB(skb)->sacked;
+
+ if (!(sacked & TCPCB_LOST) ||
+ ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS)))
+ tp->lost += tcp_skb_pcount(skb);
+}
+
static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
{
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
tcp_verify_retransmit_hint(tp, skb);
tp->lost_out += tcp_skb_pcount(skb);
+ tcp_sum_lost(tp, skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
}
}
{
tcp_verify_retransmit_hint(tp, skb);
+ tcp_sum_lost(tp, skb);
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
tp->lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
*/
struct skb_mstamp first_sackt;
struct skb_mstamp last_sackt;
+ struct rate_sample *rate;
int flag;
};
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
start_seq, end_seq, dup_sack, pcount,
&skb->skb_mstamp);
+ tcp_rate_skb_delivered(sk, skb, state->rate);
if (skb == tp->lost_skb_hint)
tp->lost_cnt_hint += pcount;
tcp_advance_highest_sack(sk, skb);
tcp_skb_collapse_tstamp(prev, skb);
+ if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
+ TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
dup_sack,
tcp_skb_pcount(skb),
&skb->skb_mstamp);
+ tcp_rate_skb_delivered(sk, skb, state->rate);
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
- if (found_dup_sack)
+ if (found_dup_sack) {
state->flag |= FLAG_DSACKING_ACK;
+ tp->delivered++; /* A spurious retransmission is delivered */
+ }
/* Eliminate too old ACKs, but take into
* account more or less fresh ones, they can
struct sk_buff *skb;
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
bool is_reneg; /* is receiver reneging on SACKs? */
+ bool mark_lost;
/* Reduce ssthresh if it has not yet been made inside this window. */
if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
if (skb == tcp_send_head(sk))
break;
+ mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
+ is_reneg);
+ if (mark_lost)
+ tcp_sum_lost(tp, skb);
TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
- if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
+ if (mark_lost) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
{
struct tcp_sock *tp = tcp_sk(sk);
+ if (inet_csk(sk)->icsk_ca_ops->cong_control)
+ return;
+
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
(tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
*rexmit = REXMIT_LOST;
}
-/* Kathleen Nichols' algorithm for tracking the minimum value of
- * a data stream over some fixed time interval. (E.g., the minimum
- * RTT over the past five minutes.) It uses constant space and constant
- * time per update yet almost always delivers the same minimum as an
- * implementation that has to keep all the data in the window.
- *
- * The algorithm keeps track of the best, 2nd best & 3rd best min
- * values, maintaining an invariant that the measurement time of the
- * n'th best >= n-1'th best. It also makes sure that the three values
- * are widely separated in the time window since that bounds the worse
- * case error when that data is monotonically increasing over the window.
- *
- * Upon getting a new min, we can forget everything earlier because it
- * has no value - the new min is <= everything else in the window by
- * definition and it's the most recent. So we restart fresh on every new min
- * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
- * best.
- */
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{
- const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
- struct rtt_meas *m = tcp_sk(sk)->rtt_min;
- struct rtt_meas rttm = {
- .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
- .ts = now,
- };
- u32 elapsed;
-
- /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
- if (unlikely(rttm.rtt <= m[0].rtt))
- m[0] = m[1] = m[2] = rttm;
- else if (rttm.rtt <= m[1].rtt)
- m[1] = m[2] = rttm;
- else if (rttm.rtt <= m[2].rtt)
- m[2] = rttm;
-
- elapsed = now - m[0].ts;
- if (unlikely(elapsed > wlen)) {
- /* Passed entire window without a new min so make 2nd choice
- * the new min & 3rd choice the new 2nd. So forth and so on.
- */
- m[0] = m[1];
- m[1] = m[2];
- m[2] = rttm;
- if (now - m[0].ts > wlen) {
- m[0] = m[1];
- m[1] = rttm;
- if (now - m[0].ts > wlen)
- m[0] = rttm;
- }
- } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
- /* Passed a quarter of the window without a new min so
- * take 2nd choice from the 2nd quarter of the window.
- */
- m[2] = m[1] = rttm;
- } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
- /* Passed half the window without a new min so take the 3rd
- * choice from the last half of the window.
- */
- m[2] = rttm;
- }
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
+
+ minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+ rtt_us ? : jiffies_to_usecs(1));
}
static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
u32 prior_snd_una, int *acked,
- struct tcp_sacktag_state *sack)
+ struct tcp_sacktag_state *sack,
+ struct skb_mstamp *now)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct skb_mstamp first_ackt, last_ackt, now;
+ struct skb_mstamp first_ackt, last_ackt;
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
acked_pcount = tcp_tso_acked(sk, skb);
if (!acked_pcount)
break;
-
fully_acked = false;
} else {
/* Speedup tcp_unlink_write_queue() and next loop */
tp->packets_out -= acked_pcount;
pkts_acked += acked_pcount;
+ tcp_rate_skb_delivered(sk, skb, sack->rate);
/* Initial outgoing SYN's get put onto the write_queue
* just like anything else we transmit. It is not
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
- skb_mstamp_get(&now);
if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
- seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
- ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
+ ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
}
if (sack->first_sackt.v64) {
- sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
- ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
+ sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
+ ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
}
-
+ sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
ca_rtt_us);
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
- sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
+ sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
* information. All transmission or retransmission are delayed afterwards.
*/
static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
- int flag)
+ int flag, const struct rate_sample *rs)
{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ca_ops->cong_control) {
+ icsk->icsk_ca_ops->cong_control(sk, rs);
+ return;
+ }
+
if (tcp_in_cwnd_reduction(sk)) {
/* Reduce cwnd if state mandates */
tcp_cwnd_reduction(sk, acked_sacked, flag);
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_sacktag_state sack_state;
+ struct rate_sample rs = { .prior_delivered = 0 };
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
u32 prior_fackets;
int prior_packets = tp->packets_out;
- u32 prior_delivered = tp->delivered;
+ u32 delivered = tp->delivered;
+ u32 lost = tp->lost;
int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ struct skb_mstamp now;
sack_state.first_sackt.v64 = 0;
+ sack_state.rate = &rs;
/* We very likely will need to access write queue head. */
prefetchw(sk->sk_write_queue.next);
if (after(ack, tp->snd_nxt))
goto invalid_ack;
+ skb_mstamp_get(&now);
+
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
}
prior_fackets = tp->fackets_out;
+ rs.prior_in_flight = tcp_packets_in_flight(tp);
/* ts_recent update must be made after we are sure that the packet
* is in window.
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
- &sack_state);
+ &sack_state, &now);
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
- tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
+ delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
+ lost = tp->lost - lost; /* freshly marked lost */
+ tcp_rate_gen(sk, delivered, lost, &now, &rs);
+ tcp_cong_control(sk, ack, delivered, flag, &rs);
tcp_xmit_recovery(sk, rexmit);
return 1;
/* It _is_ possible, that we have something out-of-order _after_ FIN.
* Probably, we should reset in this case. For now drop them.
*/
- __skb_queue_purge(&tp->out_of_order_queue);
+ skb_rbtree_purge(&tp->out_of_order_queue);
if (tcp_is_sack(tp))
tcp_sack_reset(&tp->rx_opt);
sk_mem_reclaim(sk);
int this_sack;
/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
- if (skb_queue_empty(&tp->out_of_order_queue)) {
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
tp->rx_opt.num_sacks = 0;
return;
}
{
struct tcp_sock *tp = tcp_sk(sk);
__u32 dsack_high = tp->rcv_nxt;
+ bool fin, fragstolen, eaten;
struct sk_buff *skb, *tail;
- bool fragstolen, eaten;
+ struct rb_node *p;
- while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
+ p = rb_first(&tp->out_of_order_queue);
+ while (p) {
+ skb = rb_entry(p, struct sk_buff, rbnode);
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
dsack_high = TCP_SKB_CB(skb)->end_seq;
tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
}
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, &tp->out_of_order_queue);
- __skb_unlink(skb, &tp->out_of_order_queue);
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+ if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
SOCK_DEBUG(sk, "ofo packet was already received\n");
tcp_drop(sk, skb);
continue;
tail = skb_peek_tail(&sk->sk_receive_queue);
eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
+ fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (!eaten)
__skb_queue_tail(&sk->sk_receive_queue, skb);
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- tcp_fin(sk);
- if (eaten)
+ else
kfree_skb_partial(skb, fragstolen);
+
+ if (unlikely(fin)) {
+ tcp_fin(sk);
+ /* tcp_fin() purges tp->out_of_order_queue,
+ * so we must end this loop right now.
+ */
+ break;
+ }
}
}
if (tcp_prune_queue(sk) < 0)
return -1;
- if (!sk_rmem_schedule(sk, skb, size)) {
+ while (!sk_rmem_schedule(sk, skb, size)) {
if (!tcp_prune_ofo_queue(sk))
return -1;
-
- if (!sk_rmem_schedule(sk, skb, size))
- return -1;
}
}
return 0;
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct rb_node **p, *q, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
+ bool fragstolen;
tcp_ecn_check_ce(tp, skb);
inet_csk_schedule_ack(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
+ seq = TCP_SKB_CB(skb)->seq;
+ end_seq = TCP_SKB_CB(skb)->end_seq;
SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
- tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ tp->rcv_nxt, seq, end_seq);
- skb1 = skb_peek_tail(&tp->out_of_order_queue);
- if (!skb1) {
+ p = &tp->out_of_order_queue.rb_node;
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
/* Initial out of order segment, build 1 SACK. */
if (tcp_is_sack(tp)) {
tp->rx_opt.num_sacks = 1;
- tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
- tp->selective_acks[0].end_seq =
- TCP_SKB_CB(skb)->end_seq;
+ tp->selective_acks[0].start_seq = seq;
+ tp->selective_acks[0].end_seq = end_seq;
}
- __skb_queue_head(&tp->out_of_order_queue, skb);
+ rb_link_node(&skb->rbnode, NULL, p);
+ rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
+ tp->ooo_last_skb = skb;
goto end;
}
- seq = TCP_SKB_CB(skb)->seq;
- end_seq = TCP_SKB_CB(skb)->end_seq;
-
- if (seq == TCP_SKB_CB(skb1)->end_seq) {
- bool fragstolen;
-
- if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
- } else {
- tcp_grow_window(sk, skb);
- kfree_skb_partial(skb, fragstolen);
- skb = NULL;
- }
-
- if (!tp->rx_opt.num_sacks ||
- tp->selective_acks[0].end_seq != seq)
- goto add_sack;
-
- /* Common case: data arrive in order after hole. */
- tp->selective_acks[0].end_seq = end_seq;
- goto end;
- }
-
- /* Find place to insert this segment. */
- while (1) {
- if (!after(TCP_SKB_CB(skb1)->seq, seq))
- break;
- if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
- skb1 = NULL;
- break;
- }
- skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
- }
-
- /* Do skb overlap to previous one? */
- if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
- if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- /* All the bits are present. Drop. */
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
- tcp_drop(sk, skb);
- skb = NULL;
- tcp_dsack_set(sk, seq, end_seq);
- goto add_sack;
+ /* In the typical case, we are adding an skb to the end of the list.
+ * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
+ */
+ if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+coalesce_done:
+ tcp_grow_window(sk, skb);
+ kfree_skb_partial(skb, fragstolen);
+ skb = NULL;
+ goto add_sack;
+ }
+ /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
+ if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
+ parent = &tp->ooo_last_skb->rbnode;
+ p = &parent->rb_right;
+ goto insert;
+ }
+
+ /* Find place to insert this segment. Handle overlaps on the way. */
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+ skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ if (before(seq, TCP_SKB_CB(skb1)->seq)) {
+ p = &parent->rb_left;
+ continue;
}
- if (after(seq, TCP_SKB_CB(skb1)->seq)) {
- /* Partial overlap. */
- tcp_dsack_set(sk, seq,
- TCP_SKB_CB(skb1)->end_seq);
- } else {
- if (skb_queue_is_first(&tp->out_of_order_queue,
- skb1))
- skb1 = NULL;
- else
- skb1 = skb_queue_prev(
- &tp->out_of_order_queue,
- skb1);
+ if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+ if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+ /* All the bits are present. Drop. */
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPOFOMERGE);
+ __kfree_skb(skb);
+ skb = NULL;
+ tcp_dsack_set(sk, seq, end_seq);
+ goto add_sack;
+ }
+ if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+ /* Partial overlap. */
+ tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
+ } else {
+ /* skb's seq == skb1's seq and skb covers skb1.
+ * Replace skb1 with skb.
+ */
+ rb_replace_node(&skb1->rbnode, &skb->rbnode,
+ &tp->out_of_order_queue);
+ tcp_dsack_extend(sk,
+ TCP_SKB_CB(skb1)->seq,
+ TCP_SKB_CB(skb1)->end_seq);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPOFOMERGE);
+ __kfree_skb(skb1);
+ goto merge_right;
+ }
+ } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+ goto coalesce_done;
}
+ p = &parent->rb_right;
}
- if (!skb1)
- __skb_queue_head(&tp->out_of_order_queue, skb);
- else
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+insert:
+ /* Insert segment into RB tree. */
+ rb_link_node(&skb->rbnode, parent, p);
+ rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
- /* And clean segments covered by new one as whole. */
- while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
- skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+merge_right:
+ /* Remove other segments covered by skb. */
+ while ((q = rb_next(&skb->rbnode)) != NULL) {
+ skb1 = rb_entry(q, struct sk_buff, rbnode);
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
break;
end_seq);
break;
}
- __skb_unlink(skb1, &tp->out_of_order_queue);
+ rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
tcp_drop(sk, skb1);
}
+ /* If there is no skb after us, we are the last_skb ! */
+ if (!q)
+ tp->ooo_last_skb = skb;
add_sack:
if (tcp_is_sack(tp))
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk);
- if (!skb_queue_empty(&tp->out_of_order_queue)) {
+ if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
tcp_ofo_queue(sk);
/* RFC2581. 4.2. SHOULD send immediate ACK, when
* gap in queue is filled.
*/
- if (skb_queue_empty(&tp->out_of_order_queue))
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
inet_csk(sk)->icsk_ack.pingpong = 0;
}
tcp_data_queue_ofo(sk, skb);
}
+static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
+{
+ if (list)
+ return !skb_queue_is_last(list, skb) ? skb->next : NULL;
+
+ return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+}
+
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *list)
+ struct sk_buff_head *list,
+ struct rb_root *root)
{
- struct sk_buff *next = NULL;
+ struct sk_buff *next = tcp_skb_next(skb, list);
- if (!skb_queue_is_last(list, skb))
- next = skb_queue_next(list, skb);
+ if (list)
+ __skb_unlink(skb, list);
+ else
+ rb_erase(&skb->rbnode, root);
- __skb_unlink(skb, list);
__kfree_skb(skb);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
return next;
}
+/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
+static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct sk_buff *skb1;
+
+ while (*p) {
+ parent = *p;
+ skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+ rb_link_node(&skb->rbnode, parent, p);
+ rb_insert_color(&skb->rbnode, root);
+}
+
/* Collapse contiguous sequence of skbs head..tail with
* sequence numbers start..end.
*
- * If tail is NULL, this means until the end of the list.
+ * If tail is NULL, this means until the end of the queue.
*
* Segments with FIN/SYN are not collapsed (only because this
* simplifies code)
*/
static void
-tcp_collapse(struct sock *sk, struct sk_buff_head *list,
- struct sk_buff *head, struct sk_buff *tail,
- u32 start, u32 end)
+tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
+ struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
{
- struct sk_buff *skb, *n;
+ struct sk_buff *skb = head, *n;
+ struct sk_buff_head tmp;
bool end_of_skbs;
/* First, check that queue is collapsible and find
- * the point where collapsing can be useful. */
- skb = head;
+ * the point where collapsing can be useful.
+ */
restart:
- end_of_skbs = true;
- skb_queue_walk_from_safe(list, skb, n) {
- if (skb == tail)
- break;
+ for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
+ n = tcp_skb_next(skb, list);
+
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- skb = tcp_collapse_one(sk, skb, list);
+ skb = tcp_collapse_one(sk, skb, list, root);
if (!skb)
break;
goto restart;
break;
}
- if (!skb_queue_is_last(list, skb)) {
- struct sk_buff *next = skb_queue_next(list, skb);
- if (next != tail &&
- TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
- end_of_skbs = false;
- break;
- }
+ if (n && n != tail &&
+ TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
+ end_of_skbs = false;
+ break;
}
/* Decided to skip this, advance start seq. */
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
return;
+ __skb_queue_head_init(&tmp);
+
while (before(start, end)) {
int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
struct sk_buff *nskb;
nskb = alloc_skb(copy, GFP_ATOMIC);
if (!nskb)
- return;
+ break;
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
- __skb_queue_before(list, skb, nskb);
+ if (list)
+ __skb_queue_before(list, skb, nskb);
+ else
+ __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
skb_set_owner_r(nskb, sk);
/* Copy data, releasing collapsed skbs. */
start += size;
}
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- skb = tcp_collapse_one(sk, skb, list);
+ skb = tcp_collapse_one(sk, skb, list, root);
if (!skb ||
skb == tail ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
- return;
+ goto end;
}
}
}
+end:
+ skb_queue_walk_safe(&tmp, skb, n)
+ tcp_rbtree_insert(root, skb);
}
/* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
- struct sk_buff *head;
+ struct sk_buff *skb, *head;
+ struct rb_node *p;
u32 start, end;
- if (!skb)
+ p = rb_first(&tp->out_of_order_queue);
+ skb = rb_entry_safe(p, struct sk_buff, rbnode);
+new_range:
+ if (!skb) {
+ p = rb_last(&tp->out_of_order_queue);
+ /* Note: This is possible p is NULL here. We do not
+ * use rb_entry_safe(), as ooo_last_skb is valid only
+ * if rbtree is not empty.
+ */
+ tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
return;
-
+ }
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
- head = skb;
-
- for (;;) {
- struct sk_buff *next = NULL;
- if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
- next = skb_queue_next(&tp->out_of_order_queue, skb);
- skb = next;
+ for (head = skb;;) {
+ skb = tcp_skb_next(skb, NULL);
- /* Segment is terminated when we see gap or when
- * we are at the end of all the queue. */
+ /* Range is terminated when we see a gap or when
+ * we are at the queue end.
+ */
if (!skb ||
after(TCP_SKB_CB(skb)->seq, end) ||
before(TCP_SKB_CB(skb)->end_seq, start)) {
- tcp_collapse(sk, &tp->out_of_order_queue,
+ tcp_collapse(sk, NULL, &tp->out_of_order_queue,
head, skb, start, end);
- head = skb;
- if (!skb)
- break;
- /* Start new segment */
+ goto new_range;
+ }
+
+ if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
start = TCP_SKB_CB(skb)->seq;
+ if (after(TCP_SKB_CB(skb)->end_seq, end))
end = TCP_SKB_CB(skb)->end_seq;
- } else {
- if (before(TCP_SKB_CB(skb)->seq, start))
- start = TCP_SKB_CB(skb)->seq;
- if (after(TCP_SKB_CB(skb)->end_seq, end))
- end = TCP_SKB_CB(skb)->end_seq;
- }
}
}
/*
- * Purge the out-of-order queue.
- * Return true if queue was pruned.
+ * Clean the out-of-order queue to make room.
+ * We drop high sequences packets to :
+ * 1) Let a chance for holes to be filled.
+ * 2) not add too big latencies if thousands of packets sit there.
+ * (But if application shrinks SO_RCVBUF, we could still end up
+ * freeing whole queue here)
+ *
+ * Return true if queue has shrunk.
*/
static bool tcp_prune_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- bool res = false;
+ struct rb_node *node, *prev;
- if (!skb_queue_empty(&tp->out_of_order_queue)) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
- __skb_queue_purge(&tp->out_of_order_queue);
+ if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
+ return false;
- /* Reset SACK state. A conforming SACK implementation will
- * do the same at a timeout based retransmit. When a connection
- * is in a sad state like this, we care only about integrity
- * of the connection not performance.
- */
- if (tp->rx_opt.sack_ok)
- tcp_sack_reset(&tp->rx_opt);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+ node = &tp->ooo_last_skb->rbnode;
+ do {
+ prev = rb_prev(node);
+ rb_erase(node, &tp->out_of_order_queue);
+ tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
sk_mem_reclaim(sk);
- res = true;
- }
- return res;
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ !tcp_under_memory_pressure(sk))
+ break;
+ node = prev;
+ } while (node);
+ tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+
+ /* Reset SACK state. A conforming SACK implementation will
+ * do the same at a timeout based retransmit. When a connection
+ * is in a sad state like this, we care only about integrity
+ * of the connection not performance.
+ */
+ if (tp->rx_opt.sack_ok)
+ tcp_sack_reset(&tp->rx_opt);
+ return true;
}
/* Reduce allocated memory if we can, trying to get
tcp_collapse_ofo_queue(sk);
if (!skb_queue_empty(&sk->sk_receive_queue))
- tcp_collapse(sk, &sk->sk_receive_queue,
+ tcp_collapse(sk, &sk->sk_receive_queue, NULL,
skb_peek(&sk->sk_receive_queue),
NULL,
tp->copied_seq, tp->rcv_nxt);
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
/* We have out of order data. */
- (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
+ (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
/* Then ack it now */
tcp_send_ack(sk);
} else {
* so release it.
*/
if (req) {
- tp->total_retrans = req->num_retrans;
+ inet_csk(sk)->icsk_retransmits = 0;
reqsk_fastopen_remove(sk, req, false);
} else {
/* Make sure socket is routed, for correct metrics. */
} else
tcp_init_metrics(sk);
- tcp_update_pacing_rate(sk);
+ if (!inet_csk(sk)->icsk_ca_ops->cong_control)
+ tcp_update_pacing_rate(sk);
/* Prevent spurious tcp_cwnd_restart() on first data packet */
tp->lsndtime = tcp_time_stamp;
{
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
- TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
- tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+ TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->lost_out > tp->retrans_out &&
+ tp->snd_cwnd > tcp_packets_in_flight(tp))
+ tcp_xmit_retransmit_queue(sk);
+
+ tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
0, GFP_ATOMIC);
+ }
}
/*
* One tasklet per cpu tries to send more skbs.
skb_mstamp_get(&skb->skb_mstamp);
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
+ tcp_rate_skb_sent(sk, skb);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
tcp_set_skb_tso_segs(skb, mss_now);
tcp_set_skb_tso_segs(buff, mss_now);
+ /* Update delivered info for the new segment */
+ TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx;
+
/* If this packet has been sent out already, we must
* adjust the various packet counters.
*/
}
return mtu;
}
+EXPORT_SYMBOL(tcp_mss_to_mtu);
/* MTU probing init per socket */
void tcp_mtup_init(struct sock *sk)
/* Return how many segs we'd like on a TSO packet,
* to send one TSO packet per ms
*/
-static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
+u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ int min_tso_segs)
{
u32 bytes, segs;
* This preserves ACK clocking and is consistent
* with tcp_tso_should_defer() heuristic.
*/
- segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
+ segs = max_t(u32, bytes / mss_now, min_tso_segs);
return min_t(u32, segs, sk->sk_gso_max_segs);
}
+EXPORT_SYMBOL(tcp_tso_autosize);
+
+/* Return the number of segments we want in the skb we are transmitting.
+ * See if congestion control module wants to decide; otherwise, autosize.
+ */
+static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+{
+ const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+ u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+
+ return tso_segs ? :
+ tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+}
/* Returns the portion of skb which can be sent right away */
static unsigned int tcp_mss_split_point(const struct sock *sk,
return -1;
}
+/* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * (These limits are doubled for retransmits)
+ * This allows for :
+ * - better RTT estimation and ACK scheduling
+ * - faster recovery
+ * - high rates
+ * Alas, some drivers / subsystems require a fair amount
+ * of queued bytes to ensure line rate.
+ * One example is wifi aggregation (802.11 AMPDU)
+ */
+static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+ unsigned int factor)
+{
+ unsigned int limit;
+
+ limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+ limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+ limit <<= factor;
+
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+ set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
+ /* It is possible TX completion already happened
+ * before we set TSQ_THROTTLED, so we must
+ * test again the condition.
+ */
+ smp_mb__after_atomic();
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ return true;
+ }
+ return false;
+}
+
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
}
}
- max_segs = tcp_tso_autosize(sk, mss_now);
+ max_segs = tcp_tso_segs(sk, mss_now);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
break;
- /* TCP Small Queues :
- * Control number of packets in qdisc/devices to two packets / or ~1 ms.
- * This allows for :
- * - better RTT estimation and ACK scheduling
- * - faster recovery
- * - high rates
- * Alas, some drivers / subsystems require a fair amount
- * of queued bytes to ensure line rate.
- * One example is wifi aggregation (802.11 AMPDU)
- */
- limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
- limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
-
- if (atomic_read(&sk->sk_wmem_alloc) > limit) {
- set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- /* It is possible TX completion already happened
- * before we set TSQ_THROTTLED, so we must
- * test again the condition.
- */
- smp_mb__after_atomic();
- if (atomic_read(&sk->sk_wmem_alloc) > limit)
- break;
- }
+ if (tcp_small_queue_check(sk, skb, 0))
+ break;
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
* copying overhead: fragmentation, tunneling, mangling etc.
*/
if (atomic_read(&sk->sk_wmem_alloc) >
- min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
+ min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
+ sk->sk_sndbuf))
return -EAGAIN;
if (skb_still_in_host_queue(sk, skb))
last_lost = tp->snd_una;
}
- max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
+ max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
tcp_for_write_queue_from(skb, sk) {
- __u8 sacked = TCP_SKB_CB(skb)->sacked;
+ __u8 sacked;
int segs;
if (skb == tcp_send_head(sk))
segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
if (segs <= 0)
return;
+ sacked = TCP_SKB_CB(skb)->sacked;
/* In case tcp_shift_skb_data() have aggregated large skbs,
* we need to make sure not sending too bigs TSO packets
*/
if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
continue;
+ if (tcp_small_queue_check(sk, skb, 1))
+ return;
+
if (tcp_retransmit_skb(sk, skb, segs))
return;
- NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += tcp_skb_pcount(skb);
if (!res) {
__TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ if (unlikely(tcp_passive_fastopen(sk)))
+ tcp_sk(sk)->total_retrans++;
}
return res;
}
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_VTI_HASH_SIZE_SHIFT 5
+#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT)
static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT);
}
static int vti6_dev_init(struct net_device *dev);
/* the vti6 tunnel fallback device */
struct net_device *fb_tnl_dev;
/* lists for storing tunnels in use */
- struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE];
struct ip6_tnl __rcu *tnls_wc[1];
struct ip6_tnl __rcu **tnls[2];
};
goto discard;
}
- XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
-
rcu_read_unlock();
- return xfrm6_rcv(skb);
+ return xfrm6_rcv_tnl(skb, t);
}
rcu_read_unlock();
return -EINVAL;
struct net_device *dev;
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
+ struct xfrm_mode *inner_mode;
struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
u32 orig_mark = skb->mark;
int ret;
}
x = xfrm_input_state(skb);
- family = x->inner_mode->afinfo->family;
+
+ inner_mode = x->inner_mode;
+
+ if (x->sel.family == AF_UNSPEC) {
+ inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+ if (inner_mode == NULL) {
+ XFRM_INC_STATS(dev_net(skb->dev),
+ LINUX_MIB_XFRMINSTATEMODEERROR);
+ return -EINVAL;
+ }
+ }
+
+ family = inner_mode->afinfo->family;
skb->mark = be32_to_cpu(t->parms.i_key);
ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
struct ip6_tnl *t;
LIST_HEAD(list);
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
unregister_netdevice_queue(t->dev, &list);
return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
}
-static struct dst_entry *ip6_route_input_lookup(struct net *net,
- struct net_device *dev,
- struct flowi6 *fl6, int flags)
+struct dst_entry *ip6_route_input_lookup(struct net *net,
+ struct net_device *dev,
+ struct flowi6 *fl6, int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;
return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
}
+EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
void ip6_route_input(struct sk_buff *skb)
{
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
- .flowi6_iif = l3mdev_fib_oif(skb->dev),
+ .flowi6_iif = skb->dev->ifindex,
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags)
{
- struct dst_entry *dst;
bool any_src;
- dst = l3mdev_get_rt6_dst(net, fl6);
- if (dst)
- return dst;
+ if (rt6_need_strict(&fl6->daddr)) {
+ struct dst_entry *dst;
+
+ dst = l3mdev_link_scope_lookup(net, fl6);
+ if (dst)
+ return dst;
+ }
fl6->flowi6_iif = LOOPBACK_IFINDEX;
rcu_read_unlock();
out:
- return min_t(unsigned int, mtu, IP6_MAX_MTU);
+ mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static struct dst_entry *icmp6_dst_gc_list;
if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
- if (cfg->fc_table)
+ if (cfg->fc_table) {
grt = ip6_nh_lookup_table(net, cfg, gw_addr);
+ if (grt) {
+ if (grt->rt6i_flags & RTF_GATEWAY ||
+ (dev && dev != grt->dst.dev)) {
+ ip6_rt_put(grt);
+ grt = NULL;
+ }
+ }
+ }
+
if (!grt)
grt = rt6_lookup(net, gw_addr, NULL,
cfg->fc_ifindex, 1);
{
u32 tb_id;
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
- DST_NOCOUNT);
+ struct net_device *dev = net->loopback_dev;
+ struct rt6_info *rt;
+
+ /* use L3 Master device as loopback for host routes if device
+ * is enslaved and address is not link local or multicast
+ */
+ if (!rt6_need_strict(addr))
+ dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
+
+ rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
if (!rt)
return ERR_PTR(-ENOMEM);
} else {
fl6.flowi6_oif = oif;
- if (netif_index_is_l3_master(net, oif)) {
- fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
- FLOWI_FLAG_SKIP_NH_OIF;
- }
-
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}
struct sock *sk = sock->sk;
struct irda_sock *new, *self = irda_sk(sk);
struct sock *newsk;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
int err;
err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
if (sock->state != SS_UNCONNECTED)
goto out;
- if ((sk = sock->sk) == NULL)
- goto out;
-
err = -EOPNOTSUPP;
if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
(sk->sk_type != SOCK_DGRAM))
err = -EPERM; /* value does not seem to make sense. -arnd */
if (!new->tsap) {
pr_debug("%s(), dup failed!\n", __func__);
- kfree_skb(skb);
goto out;
}
/* Clean up the original one to keep it in listen state */
irttp_listen(self->tsap);
- kfree_skb(skb);
sk->sk_ack_backlog--;
newsock->state = SS_CONNECTED;
irda_connect_response(new);
err = 0;
out:
+ kfree_skb(skb);
release_sock(sk);
return err;
}
.timeout = timeout,
.ssn = start_seq_num,
};
-
int i, ret = -EOPNOTSUPP;
u16 status = WLAN_STATUS_REQUEST_DECLINED;
+ if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
+ ht_dbg(sta->sdata,
+ "STA %pM requests BA session on unsupported tid %d\n",
+ sta->sta.addr, tid);
+ goto end_no_lock;
+ }
+
if (!sta->sta.ht_cap.ht_supported) {
ht_dbg(sta->sdata,
"STA %pM erroneously requests BA session on tid %d w/o QoS\n",
buf_size = IEEE80211_MAX_AMPDU_BUF;
/* make sure the size doesn't exceed the maximum supported by the hw */
- if (buf_size > local->hw.max_rx_aggregation_subframes)
- buf_size = local->hw.max_rx_aggregation_subframes;
+ if (buf_size > sta->sta.max_rx_aggregation_subframes)
+ buf_size = sta->sta.max_rx_aggregation_subframes;
params.buf_size = buf_size;
+ ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n",
+ buf_size, sta->sta.addr);
+
/* examine state machine */
mutex_lock(&sta->ampdu_mlme.mtx);
}
end:
- if (status == WLAN_STATUS_SUCCESS)
+ if (status == WLAN_STATUS_SUCCESS) {
__set_bit(tid, sta->ampdu_mlme.agg_session_valid);
+ __clear_bit(tid, sta->ampdu_mlme.unexpected_agg);
+ }
mutex_unlock(&sta->ampdu_mlme.mtx);
end_no_lock:
u32 tx_time, estimated_retx;
u64 result;
- if (sta->mesh->fail_avg >= 100)
- return MAX_METRIC;
+ /* Try to get rate based on HW/SW RC algorithm.
+ * Rate is returned in units of Kbps, correct this
+ * to comply with airtime calculation units
+ * Round up in case we get rate < 100Kbps
+ */
+ rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100);
+
+ if (rate) {
+ err = 0;
+ } else {
+ if (sta->mesh->fail_avg >= 100)
+ return MAX_METRIC;
- sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
- rate = cfg80211_calculate_bitrate(&rinfo);
- if (WARN_ON(!rate))
- return MAX_METRIC;
+ sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+ rate = cfg80211_calculate_bitrate(&rinfo);
+ if (WARN_ON(!rate))
+ return MAX_METRIC;
- err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+ err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+ }
/* bitrate is in units of 100 Kbps, while we need rate in units of
* 1Mbps. This will be corrected on tx_time computation.
*/
tx_time = (device_constant + 10 * test_frame_len / rate);
estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
- result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ;
+ result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
return (u32)result;
}
sta = next_hop_deref_protected(mpath);
if (mpath->flags & MESH_PATH_ACTIVE &&
ether_addr_equal(ta, sta->sta.addr) &&
+ !(mpath->flags & MESH_PATH_FIXED) &&
(!(mpath->flags & MESH_PATH_SN_VALID) ||
SN_GT(target_sn, mpath->sn) || target_sn == 0)) {
mpath->flags &= ~MESH_PATH_ACTIVE;
goto enddiscovery;
spin_lock_bh(&mpath->state_lock);
- if (mpath->flags & MESH_PATH_DELETED) {
+ if (mpath->flags & (MESH_PATH_DELETED | MESH_PATH_FIXED)) {
spin_unlock_bh(&mpath->state_lock);
goto enddiscovery;
}
static const struct rhashtable_params sta_rht_params = {
.nelem_hint = 3, /* start small */
- .insecure_elasticity = true, /* Disable chain-length checks. */
.automatic_shrinking = true,
.head_offset = offsetof(struct sta_info, hash_node),
.key_offset = offsetof(struct sta_info, addr),
.key_len = ETH_ALEN,
- .hashfn = sta_addr_hash,
.max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
};
static int sta_info_hash_del(struct ieee80211_local *local,
struct sta_info *sta)
{
- return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node,
- sta_rht_params);
+ return rhltable_remove(&local->sta_hash, &sta->hash_node,
+ sta_rht_params);
}
static void __cleanup_single_sta(struct sta_info *sta)
sta_info_free(local, sta);
}
+struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local,
+ const u8 *addr)
+{
+ return rhltable_lookup(&local->sta_hash, addr, sta_rht_params);
+}
+
/* protected by RCU */
struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
const u8 *addr)
{
struct ieee80211_local *local = sdata->local;
+ struct rhlist_head *tmp;
struct sta_info *sta;
- struct rhash_head *tmp;
- const struct bucket_table *tbl;
rcu_read_lock();
- tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
-
- for_each_sta_info(local, tbl, addr, sta, tmp) {
+ for_each_sta_info(local, addr, sta, tmp) {
if (sta->sdata == sdata) {
rcu_read_unlock();
/* this is safe as the caller must already hold
const u8 *addr)
{
struct ieee80211_local *local = sdata->local;
+ struct rhlist_head *tmp;
struct sta_info *sta;
- struct rhash_head *tmp;
- const struct bucket_table *tbl;
rcu_read_lock();
- tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
-
- for_each_sta_info(local, tbl, addr, sta, tmp) {
+ for_each_sta_info(local, addr, sta, tmp) {
if (sta->sdata == sdata ||
(sta->sdata->bss && sta->sdata->bss == sdata->bss)) {
rcu_read_unlock();
static int sta_info_hash_add(struct ieee80211_local *local,
struct sta_info *sta)
{
- return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
- sta_rht_params);
+ return rhltable_insert(&local->sta_hash, &sta->hash_node,
+ sta_rht_params);
}
static void sta_deliver_ps_frames(struct work_struct *wk)
memcpy(sta->addr, addr, ETH_ALEN);
memcpy(sta->sta.addr, addr, ETH_ALEN);
+ sta->sta.max_rx_aggregation_subframes =
+ local->hw.max_rx_aggregation_subframes;
+
sta->local = local;
sta->sdata = sdata;
sta->rx_stats.last_rx = jiffies;
is_multicast_ether_addr(sta->sta.addr)))
return -EINVAL;
- /* Strictly speaking this isn't necessary as we hold the mutex, but
- * the rhashtable code can't really deal with that distinction. We
- * do require the mutex for correctness though.
+ /* The RCU read lock is required by rhashtable due to
+ * asynchronous resize/rehash. We also require the mutex
+ * for correctness.
*/
rcu_read_lock();
lockdep_assert_held(&sdata->local->sta_mtx);
}
/* No need to do anything if the driver does all */
- if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
+ if (!local->ops->set_tim)
return;
if (sta->dead)
round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL));
}
-u32 sta_addr_hash(const void *key, u32 length, u32 seed)
-{
- return jhash(key, ETH_ALEN, seed);
-}
-
int sta_info_init(struct ieee80211_local *local)
{
int err;
- err = rhashtable_init(&local->sta_hash, &sta_rht_params);
+ err = rhltable_init(&local->sta_hash, &sta_rht_params);
if (err)
return err;
void sta_info_stop(struct ieee80211_local *local)
{
del_timer_sync(&local->sta_cleanup);
- rhashtable_destroy(&local->sta_hash);
+ rhltable_destroy(&local->sta_hash);
}
const u8 *localaddr)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct rhlist_head *tmp;
struct sta_info *sta;
- struct rhash_head *tmp;
- const struct bucket_table *tbl;
-
- tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
/*
* Just return a random station if localaddr is NULL
* ... first in list.
*/
- for_each_sta_info(local, tbl, addr, sta, tmp) {
+ for_each_sta_info(local, addr, sta, tmp) {
if (localaddr &&
!ether_addr_equal(sta->sdata->vif.addr, localaddr))
continue;
sta_info_recalc_tim(sta);
} else {
- unsigned long tids = sta->txq_buffered_tids & driver_release_tids;
int tid;
/*
for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
- if (!(tids & BIT(tid)) || txqi->tin.backlog_packets)
+ if (!(driver_release_tids & BIT(tid)) ||
+ txqi->tin.backlog_packets)
continue;
sta_info_recalc_tim(sta);
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
- /* check if the driver has a SW RC implementation */
- if (ref && ref->ops->get_expected_throughput)
- thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
- else
- thr = drv_get_expected_throughput(local, &sta->sta);
+ thr = sta_get_expected_throughput(sta);
if (thr != 0) {
sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
}
}
+u32 sta_get_expected_throughput(struct sta_info *sta)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_local *local = sdata->local;
+ struct rate_control_ref *ref = NULL;
+ u32 thr = 0;
+
+ if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+ ref = local->rate_ctrl;
+
+ /* check if the driver has a SW RC implementation */
+ if (ref && ref->ops->get_expected_throughput)
+ thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+ else
+ thr = drv_get_expected_throughput(local, sta);
+
+ return thr;
+}
+
unsigned long ieee80211_sta_last_active(struct sta_info *sta)
{
struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
return ret;
}
+ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *pubsta,
+ struct sk_buff *skb)
+ {
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_txq *txq = NULL;
+
+ if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
+ (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
+ return NULL;
+
+ if (!ieee80211_is_data(hdr->frame_control))
+ return NULL;
+
+ if (pubsta) {
+ u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+
+ txq = pubsta->txq[tid];
+ } else if (vif) {
+ txq = vif->txq;
+ }
+
+ if (!txq)
+ return NULL;
+
+ return to_txq_info(txq);
+ }
+
static ieee80211_tx_result debug_noinline
ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
{
tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
tx->sta->tx_stats.msdu[tid]++;
- if (!tx->sta->sta.txq[0])
+ if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta,
+ tx->skb))
hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
return TX_CONTINUE;
return TX_CONTINUE;
}
- static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
- struct ieee80211_vif *vif,
- struct ieee80211_sta *pubsta,
- struct sk_buff *skb)
- {
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct ieee80211_txq *txq = NULL;
-
- if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
- (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
- return NULL;
-
- if (!ieee80211_is_data(hdr->frame_control))
- return NULL;
-
- if (pubsta) {
- u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
-
- txq = pubsta->txq[tid];
- } else if (vif) {
- txq = vif->txq;
- }
-
- if (!txq)
- return NULL;
-
- return to_txq_info(txq);
- }
-
static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
{
IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
local = container_of(fq, struct ieee80211_local, fq);
txqi = container_of(tin, struct txq_info, tin);
cparams = &local->cparams;
- cstats = &local->cstats;
+ cstats = &txqi->cstats;
if (flow == &txqi->def_flow)
cvars = &txqi->def_cvars;
fq_tin_init(&txqi->tin);
fq_flow_init(&txqi->def_flow);
codel_vars_init(&txqi->def_cvars);
+ codel_stats_init(&txqi->cstats);
txqi->txq.vif = &sdata->vif;
return ret;
codel_params_init(&local->cparams);
- codel_stats_init(&local->cstats);
local->cparams.interval = MS2TIME(100);
local->cparams.target = MS2TIME(20);
local->cparams.ecn = true;
spin_unlock_bh(&fq->lock);
if (skb && skb_has_frag_list(skb) &&
- !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
- skb_linearize(skb);
+ !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
+ if (skb_linearize(skb)) {
+ ieee80211_free_txskb(&local->hw, skb);
+ return NULL;
+ }
+ }
return skb;
}
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
- if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) {
+ if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
vif = &sdata->vif;
break;
}
case NL80211_IFTYPE_STATION:
if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) {
sta = sta_info_get(sdata, skb->data);
- if (sta) {
- bool tdls_peer, tdls_auth;
-
- tdls_peer = test_sta_flag(sta,
- WLAN_STA_TDLS_PEER);
- tdls_auth = test_sta_flag(sta,
- WLAN_STA_TDLS_PEER_AUTH);
-
- if (tdls_peer && tdls_auth) {
+ if (sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ if (test_sta_flag(sta,
+ WLAN_STA_TDLS_PEER_AUTH)) {
*sta_out = sta;
return 0;
}
* after a TDLS sta is removed due to being
* unreachable.
*/
- if (tdls_peer && !tdls_auth &&
- !ieee80211_is_tdls_setup(skb))
+ if (!ieee80211_is_tdls_setup(skb))
return -EINVAL;
}
struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL;
const u8 *encaps_data;
int encaps_len, skip_header_bytes;
- int nh_pos, h_pos;
bool wme_sta = false, authorized = false;
bool tdls_peer;
bool multicast;
encaps_len = 0;
}
- nh_pos = skb_network_header(skb) - skb->data;
- h_pos = skb_transport_header(skb) - skb->data;
-
skb_pull(skb, skip_header_bytes);
- nh_pos -= skip_header_bytes;
- h_pos -= skip_header_bytes;
-
head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
/*
}
}
- if (encaps_data) {
+ if (encaps_data)
memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
- nh_pos += encaps_len;
- h_pos += encaps_len;
- }
#ifdef CONFIG_MAC80211_MESH
- if (meshhdrlen > 0) {
+ if (meshhdrlen > 0)
memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
- nh_pos += meshhdrlen;
- h_pos += meshhdrlen;
- }
#endif
if (ieee80211_is_data_qos(fc)) {
} else
memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
- nh_pos += hdrlen;
- h_pos += hdrlen;
-
- /* Update skb pointers to various headers since this modified frame
- * is going to go through Linux networking code that may potentially
- * need things like pointer to IP header. */
skb_reset_mac_header(skb);
- skb_set_network_header(skb, nh_pos);
- skb_set_transport_header(skb, h_pos);
info = IEEE80211_SKB_CB(skb);
memset(info, 0, sizeof(*info));
if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
*ieee80211_get_qos_ctl(hdr) = tid;
- if (!sta->sta.txq[0])
+ if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb))
hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
} else {
info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
int ac = ieee802_1d_to_ac[tid & 7];
skb_reset_mac_header(skb);
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
-
skb_set_queue_mapping(skb, ac);
skb->priority = tid;
struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+struct conntrack_gc_work {
+ struct delayed_work dwork;
+ u32 last_bucket;
+ bool exiting;
+};
+
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly seqcount_t nf_conntrack_generation;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+#define GC_MAX_BUCKETS_DIV 64u
+#define GC_MAX_BUCKETS 8192u
+#define GC_INTERVAL (5 * HZ)
+#define GC_MAX_EVICTS 256u
+
+static struct conntrack_gc_work conntrack_gc_work;
+
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
spin_lock(lock);
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
+seqcount_t nf_conntrack_generation __read_mostly;
DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
pr_debug("destroy_conntrack(%p)\n", ct);
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
- NF_CT_ASSERT(!timer_pending(&ct->timeout));
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
{
struct nf_conn_tstamp *tstamp;
+ if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+ return false;
+
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_get_real_ns();
- if (nf_ct_is_dying(ct))
- goto delete;
-
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0) {
- /* destroy event was not delivered */
+ /* destroy event was not delivered. nf_ct_put will
+ * be done by event cache worker on redelivery.
+ */
nf_ct_delete_from_lists(ct);
nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
return false;
}
nf_conntrack_ecache_work(nf_ct_net(ct));
- set_bit(IPS_DYING_BIT, &ct->status);
- delete:
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
return true;
}
EXPORT_SYMBOL_GPL(nf_ct_delete);
-static void death_by_timeout(unsigned long ul_conntrack)
-{
- nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
-}
-
static inline bool
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
const struct nf_conntrack_tuple *tuple,
net_eq(net, nf_ct_net(ct));
}
-/* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+/* caller must hold rcu readlock and none of the nf_conntrack_locks */
+static void nf_ct_gc_expired(struct nf_conn *ct)
{
- struct hlist_nulls_head *hptr;
- unsigned int sequence, hsz;
+ if (!atomic_inc_not_zero(&ct->ct_general.use))
+ return;
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hsz = nf_conntrack_htable_size;
- hptr = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ if (nf_ct_should_gc(ct))
+ nf_ct_kill(ct);
- *hash = hptr;
- *hsize = hsz;
+ nf_ct_put(ct);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
/*
* Warning :
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int bucket, sequence;
+ unsigned int bucket, hsize;
begin:
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- bucket = scale_hash(hash);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ bucket = reciprocal_scale(hash, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+ struct nf_conn *ct;
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_is_dying(ct))
+ continue;
+
if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
return h;
zone, net))
goto out;
- add_timer(&ct->timeout);
smp_wmb();
/* The caller holds a reference to this object */
atomic_set(&ct->ct_general.use, 2);
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
+ ct->timeout += nfct_time_stamp;
atomic_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
- unsigned int hash, sequence;
+ unsigned int hash, hsize;
struct hlist_nulls_node *n;
struct nf_conn *ct;
zone = nf_ct_zone(ignored_conntrack);
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = hash_conntrack(net, tuple);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ begin:
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = __hash_conntrack(net, tuple, hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
- if (ct != ignored_conntrack &&
- nf_ct_key_equal(h, tuple, zone, net)) {
+
+ if (ct == ignored_conntrack)
+ continue;
+
+ if (nf_ct_is_expired(ct)) {
+ nf_ct_gc_expired(ct);
+ continue;
+ }
+
+ if (nf_ct_key_equal(h, tuple, zone, net)) {
NF_CT_STAT_INC_ATOMIC(net, found);
rcu_read_unlock();
return 1;
}
NF_CT_STAT_INC_ATOMIC(net, searched);
}
+
+ if (get_nulls_value(n) != hash) {
+ NF_CT_STAT_INC_ATOMIC(net, search_restart);
+ goto begin;
+ }
+
rcu_read_unlock();
return 0;
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ continue;
+ }
+
if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
!net_eq(nf_ct_net(tmp), net) ||
nf_ct_is_dying(tmp))
*/
if (net_eq(nf_ct_net(tmp), net) &&
nf_ct_is_confirmed(tmp) &&
- del_timer(&tmp->timeout) &&
nf_ct_delete(tmp, 0, 0))
drops++;
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
- unsigned hash, sequence, drops;
+ unsigned int hash, hsize, drops;
rcu_read_lock();
- do {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- hash = scale_hash(_hash++);
- ct_hash = nf_conntrack_hash;
- } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ nf_conntrack_get_ht(&ct_hash, &hsize);
+ hash = reciprocal_scale(_hash++, hsize);
drops = early_drop_list(net, &ct_hash[hash]);
rcu_read_unlock();
return false;
}
+static void gc_worker(struct work_struct *work)
+{
+ unsigned int i, goal, buckets = 0, expired_count = 0;
+ unsigned long next_run = GC_INTERVAL;
+ unsigned int ratio, scanned = 0;
+ struct conntrack_gc_work *gc_work;
+
+ gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+ goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ i = gc_work->last_bucket;
+
+ do {
+ struct nf_conntrack_tuple_hash *h;
+ struct hlist_nulls_head *ct_hash;
+ struct hlist_nulls_node *n;
+ unsigned int hashsz;
+ struct nf_conn *tmp;
+
+ i++;
+ rcu_read_lock();
+
+ nf_conntrack_get_ht(&ct_hash, &hashsz);
+ if (i >= hashsz)
+ i = 0;
+
+ hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
+
+ scanned++;
+ if (nf_ct_is_expired(tmp)) {
+ nf_ct_gc_expired(tmp);
+ expired_count++;
+ continue;
+ }
+ }
+
+ /* could check get_nulls_value() here and restart if ct
+ * was moved to another chain. But given gc is best-effort
+ * we will just continue with next hash slot.
+ */
+ rcu_read_unlock();
+ cond_resched_rcu_qs();
+ } while (++buckets < goal &&
+ expired_count < GC_MAX_EVICTS);
+
+ if (gc_work->exiting)
+ return;
+
+ ratio = scanned ? expired_count * 100 / scanned : 0;
+ if (ratio >= 90)
+ next_run = 0;
+
+ gc_work->last_bucket = i;
+ schedule_delayed_work(&gc_work->dwork, next_run);
+}
+
+static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+{
+ INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ gc_work->exiting = false;
+}
+
static struct nf_conn *
__nf_conntrack_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
/* save hash for reusing when confirming */
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
ct->status = 0;
- /* Don't set timer yet: wait for confirmation */
- setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
write_pnet(&ct->ct_net, net);
memset(&ct->__nfct_init_offset[0], 0,
offsetof(struct nf_conn, proto) -
if (IS_ERR(ct))
return (struct nf_conntrack_tuple_hash *)ct;
- if (tmpl && nfct_synproxy(tmpl)) {
- nfct_seqadj_ext_add(ct);
- nfct_synproxy_ext_add(ct);
+ if (!nf_ct_add_synproxy(ct, tmpl)) {
+ nf_conntrack_free(ct);
+ return ERR_PTR(-ENOMEM);
}
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
unsigned long extra_jiffies,
int do_acct)
{
- NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
NF_CT_ASSERT(skb);
/* Only update if this is not a fixed timeout */
goto acct;
/* If not in hash table, timer will not be active yet */
- if (!nf_ct_is_confirmed(ct)) {
- ct->timeout.expires = extra_jiffies;
- } else {
- unsigned long newtime = jiffies + extra_jiffies;
-
- /* Only update the timeout if the new timeout is at least
- HZ jiffies from the old timeout. Need del_timer for race
- avoidance (may already be dying). */
- if (newtime - ct->timeout.expires >= HZ)
- mod_timer_pending(&ct->timeout, newtime);
- }
+ if (nf_ct_is_confirmed(ct))
+ extra_jiffies += nfct_time_stamp;
+ ct->timeout = extra_jiffies;
acct:
if (do_acct)
nf_ct_acct_update(ct, ctinfo, skb->len);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
-bool __nf_ct_kill_acct(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- int do_acct)
+bool nf_ct_kill_acct(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct sk_buff *skb)
{
- if (do_acct)
- nf_ct_acct_update(ct, ctinfo, skb->len);
+ nf_ct_acct_update(ct, ctinfo, skb->len);
- if (del_timer(&ct->timeout)) {
- ct->timeout.function((unsigned long)ct);
- return true;
- }
- return false;
+ return nf_ct_delete(ct, 0, 0);
}
-EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
- if (del_timer(&ct->timeout))
- nf_ct_delete(ct, portid, report);
-
- /* ... else the timer will get him soon. */
+ nf_ct_delete(ct, portid, report);
nf_ct_put(ct);
cond_resched();
}
void nf_conntrack_cleanup_start(void)
{
+ conntrack_gc_work.exiting = true;
RCU_INIT_POINTER(ip_ct_attach, NULL);
}
while (untrack_refs() > 0)
schedule();
+ cancel_delayed_work_sync(&conntrack_gc_work.dwork);
nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
nf_conntrack_proto_fini();
}
/* - and look it like as a confirmed connection */
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
+
+ conntrack_gc_work_init(&conntrack_gc_work);
+ schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+
return 0;
err_proto:
ct->status |= IPS_DST_NAT;
if (nfct_help(ct))
- nfct_seqadj_ext_add(ct);
+ if (!nfct_seqadj_ext_add(ct))
+ return NF_DROP;
}
if (maniptype == NF_NAT_MANIP_SRC) {
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
- if (!del_timer(&ct->timeout))
- return 1;
-
ct->status &= ~IPS_NAT_DONE_MASK;
-
rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
nf_nat_bysource_params);
- add_timer(&ct->timeout);
-
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
*/
if (err < 0)
return err;
- return nf_nat_setup_info(ct, &range, manip);
+ return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
}
#else
static int
/* PMTU discovery (RFC1191) */
if (ICMP_FRAG_NEEDED == code) {
sctp_icmp_frag_needed(sk, asoc, transport,
- WORD_TRUNC(info));
+ SCTP_TRUNC4(info));
goto out_unlock;
} else {
if (ICMP_PROT_UNREACH == code) {
if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
break;
- ch_end = offset + WORD_ROUND(ntohs(ch->length));
+ ch_end = offset + SCTP_PAD4(ntohs(ch->length));
if (ch_end > skb->len)
break;
static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
+ struct sctp_transport *t = (struct sctp_transport *)ptr;
const struct sctp_hash_cmp_arg *x = arg->key;
- const struct sctp_transport *t = ptr;
- struct sctp_association *asoc = t->asoc;
- const struct net *net = x->net;
+ struct sctp_association *asoc;
+ int err = 1;
if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
- return 1;
- if (!net_eq(sock_net(asoc->base.sk), net))
- return 1;
+ return err;
+ if (!sctp_transport_hold(t))
+ return err;
+
+ asoc = t->asoc;
+ if (!net_eq(sock_net(asoc->base.sk), x->net))
+ goto out;
if (x->ep) {
if (x->ep != asoc->ep)
- return 1;
+ goto out;
} else {
if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
- return 1;
+ goto out;
if (!sctp_bind_addr_match(&asoc->base.bind_addr,
x->laddr, sctp_sk(asoc->base.sk)))
- return 1;
+ goto out;
}
- return 0;
+ err = 0;
+ out:
+ sctp_transport_put(t);
+ return err;
}
static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
break;
- ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+ ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
if (ch_end > skb_tail_pointer(skb))
break;
* that the chunk length doesn't cause overflow. Otherwise, we'll
* walk off the end.
*/
- if (WORD_ROUND(ntohs(ch->length)) > skb->len)
+ if (SCTP_PAD4(ntohs(ch->length)) > skb->len)
return NULL;
/* If this is INIT/INIT-ACK look inside the chunk too. */
struct nlattr *key;
struct cfg80211_cached_keys *result;
int rem, err, def = 0;
+ bool have_key = false;
+
+ nla_for_each_nested(key, keys, rem) {
+ have_key = true;
+ break;
+ }
+
+ if (!have_key)
+ return NULL;
result = kzalloc(sizeof(*result), GFP_KERNEL);
if (!result)
return ERR_PTR(-ENOMEM);
result->def = -1;
- result->defmgmt = -1;
nla_for_each_nested(key, keys, rem) {
memset(&parse, 0, sizeof(parse));
err = -EINVAL;
if (!parse.p.key)
goto error;
- if (parse.idx < 0 || parse.idx > 4)
+ if (parse.idx < 0 || parse.idx > 3)
goto error;
if (parse.def) {
if (def)
parse.idx, false, NULL);
if (err)
goto error;
+ if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 &&
+ parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) {
+ err = -EINVAL;
+ goto error;
+ }
result->params[parse.idx].cipher = parse.p.cipher;
result->params[parse.idx].key_len = parse.p.key_len;
result->params[parse.idx].key = result->data[parse.idx];
memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len);
- if (parse.p.cipher == WLAN_CIPHER_SUITE_WEP40 ||
- parse.p.cipher == WLAN_CIPHER_SUITE_WEP104) {
- if (no_ht)
- *no_ht = true;
- }
+ /* must be WEP key if we got here */
+ if (no_ht)
+ *no_ht = true;
+ }
+
+ if (result->def < 0) {
+ err = -EINVAL;
+ goto error;
}
return result;
int if_idx = 0;
int wp_start = cb->args[0];
int if_start = cb->args[1];
+ int filter_wiphy = -1;
struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
rtnl_lock();
+ if (!cb->args[2]) {
+ struct nl80211_dump_wiphy_state state = {
+ .filter_wiphy = -1,
+ };
+ int ret;
+
+ ret = nl80211_dump_wiphy_parse(skb, cb, &state);
+ if (ret)
+ return ret;
+
+ filter_wiphy = state.filter_wiphy;
+
+ /*
+ * if filtering, set cb->args[2] to +1 since 0 is the default
+ * value needed to determine that parsing is necessary.
+ */
+ if (filter_wiphy >= 0)
+ cb->args[2] = filter_wiphy + 1;
+ else
+ cb->args[2] = -1;
+ } else if (cb->args[2] > 0) {
+ filter_wiphy = cb->args[2] - 1;
+ }
+
list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk)))
continue;
wp_idx++;
continue;
}
+
+ if (filter_wiphy >= 0 && filter_wiphy != rdev->wiphy_idx)
+ continue;
+
if_idx = 0;
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct vif_params params;
struct wireless_dev *wdev;
- struct sk_buff *msg, *event;
+ struct sk_buff *msg;
int err;
enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
u32 flags;
return -ENOBUFS;
}
- event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (event) {
- if (nl80211_send_iface(event, 0, 0, 0,
- rdev, wdev, false) < 0) {
- nlmsg_free(event);
- goto out;
- }
-
- genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
- event, 0, NL80211_MCGRP_CONFIG,
- GFP_KERNEL);
- }
+ /*
+ * For wdevs which have no associated netdev object (e.g. of type
+ * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here.
+ * For all other types, the event will be generated from the
+ * netdev notifier
+ */
+ if (!wdev->netdev)
+ nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
-out:
return genlmsg_reply(msg, info);
}
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct wireless_dev *wdev = info->user_ptr[1];
- struct sk_buff *msg;
- int status;
if (!rdev->ops->del_virtual_intf)
return -EOPNOTSUPP;
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) {
- nlmsg_free(msg);
- msg = NULL;
- }
-
/*
* If we remove a wireless device without a netdev then clear
* user_ptr[1] so that nl80211_post_doit won't dereference it
if (!wdev->netdev)
info->user_ptr[1] = NULL;
- status = rdev_del_virtual_intf(rdev, wdev);
- if (status >= 0 && msg)
- genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
- msg, 0, NL80211_MCGRP_CONFIG,
- GFP_KERNEL);
- else
- nlmsg_free(msg);
-
- return status;
+ return rdev_del_virtual_intf(rdev, wdev);
}
static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
return 0;
}
+static int nl80211_check_power_mode(const struct nlattr *nla,
+ enum nl80211_mesh_power_mode min,
+ enum nl80211_mesh_power_mode max,
+ enum nl80211_mesh_power_mode *out)
+{
+ u32 val = nla_get_u32(nla);
+ if (val < min || val > max)
+ return -EINVAL;
+ *out = val;
+ return 0;
+}
+
static int nl80211_parse_mesh_config(struct genl_info *info,
struct mesh_config *cfg,
u32 *mask_out)
NL80211_MESH_POWER_ACTIVE,
NL80211_MESH_POWER_MAX,
mask, NL80211_MESHCONF_POWER_MODE,
- nl80211_check_u32);
+ nl80211_check_power_mode);
FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
0, 65535, mask,
NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
params.n_counter_offsets_presp = len / sizeof(u16);
if (rdev->wiphy.max_num_csa_counters &&
- (params.n_counter_offsets_beacon >
+ (params.n_counter_offsets_presp >
rdev->wiphy.max_num_csa_counters))
return -EINVAL;
(key.p.cipher != WLAN_CIPHER_SUITE_WEP104 ||
key.p.key_len != WLAN_KEY_LEN_WEP104))
return -EINVAL;
- if (key.idx > 4)
+ if (key.idx > 3)
return -EINVAL;
} else {
key.p.key_len = 0;
ibss.beacon_interval = 100;
- if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+ if (info->attrs[NL80211_ATTR_BEACON_INTERVAL])
ibss.beacon_interval =
nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
- if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000)
- return -EINVAL;
- }
+
+ err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval);
+ if (err)
+ return err;
if (!rdev->ops->join_ibss)
return -EOPNOTSUPP;
}
data = nla_nest_start(skb, attr);
+ if (!data)
+ goto nla_put_failure;
((void **)skb->cb)[0] = rdev;
((void **)skb->cb)[1] = hdr;
if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
setup.beacon_interval =
nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
- if (setup.beacon_interval < 10 ||
- setup.beacon_interval > 10000)
- return -EINVAL;
+
+ err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval);
+ if (err)
+ return err;
}
if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
if (!freqs)
return -ENOBUFS;
- for (i = 0; i < req->n_channels; i++)
- nla_put_u32(msg, i, req->channels[i]->center_freq);
+ for (i = 0; i < req->n_channels; i++) {
+ if (nla_put_u32(msg, i, req->channels[i]->center_freq))
+ return -ENOBUFS;
+ }
nla_nest_end(msg, freqs);
if (req->n_match_sets) {
matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH);
+ if (!matches)
+ return -ENOBUFS;
+
for (i = 0; i < req->n_match_sets; i++) {
match = nla_nest_start(msg, i);
- nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
- req->match_sets[i].ssid.ssid_len,
- req->match_sets[i].ssid.ssid);
+ if (!match)
+ return -ENOBUFS;
+
+ if (nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
+ req->match_sets[i].ssid.ssid_len,
+ req->match_sets[i].ssid.ssid))
+ return -ENOBUFS;
nla_nest_end(msg, match);
}
nla_nest_end(msg, matches);
for (i = 0; i < req->n_scan_plans; i++) {
scan_plan = nla_nest_start(msg, i + 1);
+ if (!scan_plan)
+ return -ENOBUFS;
+
if (!scan_plan ||
nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL,
req->scan_plans[i].interval) ||
NL80211_MCGRP_CONFIG, GFP_KERNEL);
}
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
+ enum nl80211_commands cmd)
+{
+ struct sk_buff *msg;
+
+ WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
+ cmd != NL80211_CMD_DEL_INTERFACE);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev,
+ cmd == NL80211_CMD_DEL_INTERFACE) < 0) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ NL80211_MCGRP_CONFIG, GFP_KERNEL);
+}
+
static int nl80211_add_scan_req(struct sk_buff *msg,
struct cfg80211_registered_device *rdev)
{
#include "xfrm_hash.h"
+#define xfrm_state_deref_prot(table, net) \
+ rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
+
+static void xfrm_state_gc_task(struct work_struct *work);
+
/* Each xfrm_state may be linked to two tables:
1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
*/
static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
+
+static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
+static HLIST_HEAD(xfrm_state_gc_list);
+
+static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
+{
+ return atomic_inc_not_zero(&x->refcnt);
+}
static inline unsigned int xfrm_dst_hash(struct net *net,
const xfrm_address_t *daddr,
h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family,
nhashmask);
- hlist_add_head(&x->bydst, ndsttable+h);
+ hlist_add_head_rcu(&x->bydst, ndsttable + h);
h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
x->props.family,
nhashmask);
- hlist_add_head(&x->bysrc, nsrctable+h);
+ hlist_add_head_rcu(&x->bysrc, nsrctable + h);
if (x->id.spi) {
h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
x->id.proto, x->props.family,
nhashmask);
- hlist_add_head(&x->byspi, nspitable+h);
+ hlist_add_head_rcu(&x->byspi, nspitable + h);
}
}
}
}
spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ write_seqcount_begin(&xfrm_state_hash_generation);
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+ odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
for (i = net->xfrm.state_hmask; i >= 0; i--)
- xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
- nhashmask);
+ xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
- odst = net->xfrm.state_bydst;
- osrc = net->xfrm.state_bysrc;
- ospi = net->xfrm.state_byspi;
+ osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
+ ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
ohashmask = net->xfrm.state_hmask;
- net->xfrm.state_bydst = ndst;
- net->xfrm.state_bysrc = nsrc;
- net->xfrm.state_byspi = nspi;
+ rcu_assign_pointer(net->xfrm.state_bydst, ndst);
+ rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
+ rcu_assign_pointer(net->xfrm.state_byspi, nspi);
net->xfrm.state_hmask = nhashmask;
+ write_seqcount_end(&xfrm_state_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
osize = (ohashmask + 1) * sizeof(struct hlist_head);
+
+ synchronize_rcu();
+
xfrm_hash_free(odst, osize);
xfrm_hash_free(osrc, osize);
xfrm_hash_free(ospi, osize);
{
tasklet_hrtimer_cancel(&x->mtimer);
del_timer_sync(&x->rtimer);
+ kfree(x->aead);
kfree(x->aalg);
kfree(x->ealg);
kfree(x->calg);
static void xfrm_state_gc_task(struct work_struct *work)
{
- struct net *net = container_of(work, struct net, xfrm.state_gc_work);
struct xfrm_state *x;
struct hlist_node *tmp;
struct hlist_head gc_list;
spin_lock_bh(&xfrm_state_gc_lock);
- hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
+ hlist_move_list(&xfrm_state_gc_list, &gc_list);
spin_unlock_bh(&xfrm_state_gc_lock);
+ synchronize_rcu();
+
hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
xfrm_state_gc_destroy(x);
}
void __xfrm_state_destroy(struct xfrm_state *x)
{
- struct net *net = xs_net(x);
-
WARN_ON(x->km.state != XFRM_STATE_DEAD);
spin_lock_bh(&xfrm_state_gc_lock);
- hlist_add_head(&x->gclist, &net->xfrm.state_gc_list);
+ hlist_add_head(&x->gclist, &xfrm_state_gc_list);
spin_unlock_bh(&xfrm_state_gc_lock);
- schedule_work(&net->xfrm.state_gc_work);
+ schedule_work(&xfrm_state_gc_work);
}
EXPORT_SYMBOL(__xfrm_state_destroy);
x->km.state = XFRM_STATE_DEAD;
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
- hlist_del(&x->bydst);
- hlist_del(&x->bysrc);
+ hlist_del_rcu(&x->bydst);
+ hlist_del_rcu(&x->bysrc);
if (x->id.spi)
- hlist_del(&x->byspi);
+ hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
spin_unlock(&net->xfrm.xfrm_state_lock);
unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
struct xfrm_state *x;
- hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
+ hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
if (x->props.family != family ||
x->id.spi != spi ||
x->id.proto != proto ||
if ((mark & x->mark.m) != x->mark.v)
continue;
- xfrm_state_hold(x);
+ if (!xfrm_state_hold_rcu(x))
+ continue;
return x;
}
unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
struct xfrm_state *x;
- hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
+ hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
if (x->props.family != family ||
x->id.proto != proto ||
!xfrm_addr_equal(&x->id.daddr, daddr, family) ||
if ((mark & x->mark.m) != x->mark.v)
continue;
- xfrm_state_hold(x);
+ if (!xfrm_state_hold_rcu(x))
+ continue;
return x;
}
struct xfrm_state *best = NULL;
u32 mark = pol->mark.v & pol->mark.m;
unsigned short encap_family = tmpl->encap_family;
+ unsigned int sequence;
struct km_event c;
to_put = NULL;
- spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ sequence = read_seqcount_begin(&xfrm_state_hash_generation);
+
+ rcu_read_lock();
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
goto found;
h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
- hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
+ hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v &&
}
if (km_query(x, tmpl, pol) == 0) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
x->km.state = XFRM_STATE_ACQ;
list_add(&x->km.all, &net->xfrm.state_all);
- hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+ hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, daddr, saddr, encap_family);
- hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+ hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
- hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+ hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
net->xfrm.state_num++;
xfrm_hash_grow_check(net, x->bydst.next != NULL);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
} else {
x->km.state = XFRM_STATE_DEAD;
to_put = x;
}
}
out:
- if (x)
- xfrm_state_hold(x);
- else
+ if (x) {
+ if (!xfrm_state_hold_rcu(x)) {
+ *err = -EAGAIN;
+ x = NULL;
+ }
+ } else {
*err = acquire_in_progress ? -EAGAIN : error;
- spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+ rcu_read_unlock();
if (to_put)
xfrm_state_put(to_put);
+
+ if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
+ *err = -EAGAIN;
+ if (x) {
+ xfrm_state_put(x);
+ x = NULL;
+ }
+ }
+
return x;
}
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family);
- hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+ hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
- hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+ hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
x->props.family);
- hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+ hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
xfrm_state_hold(x);
tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
list_add(&x->km.all, &net->xfrm.state_all);
- hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+ hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, daddr, saddr, family);
- hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+ hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
net->xfrm.state_num++;
if (x->id.spi) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
- hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+ hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = 0;
net->xfrm.state_num = 0;
INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
- INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
- INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
spin_lock_init(&net->xfrm.xfrm_state_lock);
return 0;
flush_work(&net->xfrm.state_hash_work);
xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
- flush_work(&net->xfrm.state_gc_work);
+ flush_work(&xfrm_state_gc_work);
WARN_ON(!list_empty(&net->xfrm.state_all));