1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /* drivers/net/ifb.c:
3*4882a593Smuzhiyun
4*4882a593Smuzhiyun The purpose of this driver is to provide a device that allows
5*4882a593Smuzhiyun for sharing of resources:
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun 1) qdiscs/policies that are per device as opposed to system wide.
8*4882a593Smuzhiyun ifb allows for a device which can be redirected to thus providing
9*4882a593Smuzhiyun an impression of sharing.
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun 2) Allows for queueing incoming traffic for shaping instead of
12*4882a593Smuzhiyun dropping.
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun The original concept is based on what is known as the IMQ
15*4882a593Smuzhiyun driver initially written by Martin Devera, later rewritten
16*4882a593Smuzhiyun by Patrick McHardy and then maintained by Andre Correa.
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun You need the tc action mirror or redirect to feed this device
19*4882a593Smuzhiyun packets.
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun Authors: Jamal Hadi Salim (2005)
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun */
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun #include <linux/module.h>
28*4882a593Smuzhiyun #include <linux/kernel.h>
29*4882a593Smuzhiyun #include <linux/netdevice.h>
30*4882a593Smuzhiyun #include <linux/etherdevice.h>
31*4882a593Smuzhiyun #include <linux/init.h>
32*4882a593Smuzhiyun #include <linux/interrupt.h>
33*4882a593Smuzhiyun #include <linux/moduleparam.h>
34*4882a593Smuzhiyun #include <net/pkt_sched.h>
35*4882a593Smuzhiyun #include <net/net_namespace.h>
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun #define TX_Q_LIMIT 32
38*4882a593Smuzhiyun struct ifb_q_private {
39*4882a593Smuzhiyun struct net_device *dev;
40*4882a593Smuzhiyun struct tasklet_struct ifb_tasklet;
41*4882a593Smuzhiyun int tasklet_pending;
42*4882a593Smuzhiyun int txqnum;
43*4882a593Smuzhiyun struct sk_buff_head rq;
44*4882a593Smuzhiyun u64 rx_packets;
45*4882a593Smuzhiyun u64 rx_bytes;
46*4882a593Smuzhiyun struct u64_stats_sync rsync;
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun struct u64_stats_sync tsync;
49*4882a593Smuzhiyun u64 tx_packets;
50*4882a593Smuzhiyun u64 tx_bytes;
51*4882a593Smuzhiyun struct sk_buff_head tq;
52*4882a593Smuzhiyun } ____cacheline_aligned_in_smp;
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun struct ifb_dev_private {
55*4882a593Smuzhiyun struct ifb_q_private *tx_private;
56*4882a593Smuzhiyun };
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
59*4882a593Smuzhiyun static int ifb_open(struct net_device *dev);
60*4882a593Smuzhiyun static int ifb_close(struct net_device *dev);
61*4882a593Smuzhiyun
ifb_ri_tasklet(unsigned long _txp)62*4882a593Smuzhiyun static void ifb_ri_tasklet(unsigned long _txp)
63*4882a593Smuzhiyun {
64*4882a593Smuzhiyun struct ifb_q_private *txp = (struct ifb_q_private *)_txp;
65*4882a593Smuzhiyun struct netdev_queue *txq;
66*4882a593Smuzhiyun struct sk_buff *skb;
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
69*4882a593Smuzhiyun skb = skb_peek(&txp->tq);
70*4882a593Smuzhiyun if (!skb) {
71*4882a593Smuzhiyun if (!__netif_tx_trylock(txq))
72*4882a593Smuzhiyun goto resched;
73*4882a593Smuzhiyun skb_queue_splice_tail_init(&txp->rq, &txp->tq);
74*4882a593Smuzhiyun __netif_tx_unlock(txq);
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
78*4882a593Smuzhiyun skb->redirected = 0;
79*4882a593Smuzhiyun #ifdef CONFIG_NET_CLS_ACT
80*4882a593Smuzhiyun skb->tc_skip_classify = 1;
81*4882a593Smuzhiyun #endif
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun u64_stats_update_begin(&txp->tsync);
84*4882a593Smuzhiyun txp->tx_packets++;
85*4882a593Smuzhiyun txp->tx_bytes += skb->len;
86*4882a593Smuzhiyun u64_stats_update_end(&txp->tsync);
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun rcu_read_lock();
89*4882a593Smuzhiyun skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
90*4882a593Smuzhiyun if (!skb->dev) {
91*4882a593Smuzhiyun rcu_read_unlock();
92*4882a593Smuzhiyun dev_kfree_skb(skb);
93*4882a593Smuzhiyun txp->dev->stats.tx_dropped++;
94*4882a593Smuzhiyun if (skb_queue_len(&txp->tq) != 0)
95*4882a593Smuzhiyun goto resched;
96*4882a593Smuzhiyun break;
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun rcu_read_unlock();
99*4882a593Smuzhiyun skb->skb_iif = txp->dev->ifindex;
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun if (!skb->from_ingress) {
102*4882a593Smuzhiyun dev_queue_xmit(skb);
103*4882a593Smuzhiyun } else {
104*4882a593Smuzhiyun skb_pull_rcsum(skb, skb->mac_len);
105*4882a593Smuzhiyun netif_receive_skb(skb);
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun }
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun if (__netif_tx_trylock(txq)) {
110*4882a593Smuzhiyun skb = skb_peek(&txp->rq);
111*4882a593Smuzhiyun if (!skb) {
112*4882a593Smuzhiyun txp->tasklet_pending = 0;
113*4882a593Smuzhiyun if (netif_tx_queue_stopped(txq))
114*4882a593Smuzhiyun netif_tx_wake_queue(txq);
115*4882a593Smuzhiyun } else {
116*4882a593Smuzhiyun __netif_tx_unlock(txq);
117*4882a593Smuzhiyun goto resched;
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun __netif_tx_unlock(txq);
120*4882a593Smuzhiyun } else {
121*4882a593Smuzhiyun resched:
122*4882a593Smuzhiyun txp->tasklet_pending = 1;
123*4882a593Smuzhiyun tasklet_schedule(&txp->ifb_tasklet);
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun
ifb_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)128*4882a593Smuzhiyun static void ifb_stats64(struct net_device *dev,
129*4882a593Smuzhiyun struct rtnl_link_stats64 *stats)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun struct ifb_dev_private *dp = netdev_priv(dev);
132*4882a593Smuzhiyun struct ifb_q_private *txp = dp->tx_private;
133*4882a593Smuzhiyun unsigned int start;
134*4882a593Smuzhiyun u64 packets, bytes;
135*4882a593Smuzhiyun int i;
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun for (i = 0; i < dev->num_tx_queues; i++,txp++) {
138*4882a593Smuzhiyun do {
139*4882a593Smuzhiyun start = u64_stats_fetch_begin_irq(&txp->rsync);
140*4882a593Smuzhiyun packets = txp->rx_packets;
141*4882a593Smuzhiyun bytes = txp->rx_bytes;
142*4882a593Smuzhiyun } while (u64_stats_fetch_retry_irq(&txp->rsync, start));
143*4882a593Smuzhiyun stats->rx_packets += packets;
144*4882a593Smuzhiyun stats->rx_bytes += bytes;
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun do {
147*4882a593Smuzhiyun start = u64_stats_fetch_begin_irq(&txp->tsync);
148*4882a593Smuzhiyun packets = txp->tx_packets;
149*4882a593Smuzhiyun bytes = txp->tx_bytes;
150*4882a593Smuzhiyun } while (u64_stats_fetch_retry_irq(&txp->tsync, start));
151*4882a593Smuzhiyun stats->tx_packets += packets;
152*4882a593Smuzhiyun stats->tx_bytes += bytes;
153*4882a593Smuzhiyun }
154*4882a593Smuzhiyun stats->rx_dropped = dev->stats.rx_dropped;
155*4882a593Smuzhiyun stats->tx_dropped = dev->stats.tx_dropped;
156*4882a593Smuzhiyun }
157*4882a593Smuzhiyun
ifb_dev_init(struct net_device * dev)158*4882a593Smuzhiyun static int ifb_dev_init(struct net_device *dev)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun struct ifb_dev_private *dp = netdev_priv(dev);
161*4882a593Smuzhiyun struct ifb_q_private *txp;
162*4882a593Smuzhiyun int i;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
165*4882a593Smuzhiyun if (!txp)
166*4882a593Smuzhiyun return -ENOMEM;
167*4882a593Smuzhiyun dp->tx_private = txp;
168*4882a593Smuzhiyun for (i = 0; i < dev->num_tx_queues; i++,txp++) {
169*4882a593Smuzhiyun txp->txqnum = i;
170*4882a593Smuzhiyun txp->dev = dev;
171*4882a593Smuzhiyun __skb_queue_head_init(&txp->rq);
172*4882a593Smuzhiyun __skb_queue_head_init(&txp->tq);
173*4882a593Smuzhiyun u64_stats_init(&txp->rsync);
174*4882a593Smuzhiyun u64_stats_init(&txp->tsync);
175*4882a593Smuzhiyun tasklet_init(&txp->ifb_tasklet, ifb_ri_tasklet,
176*4882a593Smuzhiyun (unsigned long)txp);
177*4882a593Smuzhiyun netif_tx_start_queue(netdev_get_tx_queue(dev, i));
178*4882a593Smuzhiyun }
179*4882a593Smuzhiyun return 0;
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun static const struct net_device_ops ifb_netdev_ops = {
183*4882a593Smuzhiyun .ndo_open = ifb_open,
184*4882a593Smuzhiyun .ndo_stop = ifb_close,
185*4882a593Smuzhiyun .ndo_get_stats64 = ifb_stats64,
186*4882a593Smuzhiyun .ndo_start_xmit = ifb_xmit,
187*4882a593Smuzhiyun .ndo_validate_addr = eth_validate_addr,
188*4882a593Smuzhiyun .ndo_init = ifb_dev_init,
189*4882a593Smuzhiyun };
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \
192*4882a593Smuzhiyun NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6 | \
193*4882a593Smuzhiyun NETIF_F_GSO_ENCAP_ALL | \
194*4882a593Smuzhiyun NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \
195*4882a593Smuzhiyun NETIF_F_HW_VLAN_STAG_TX)
196*4882a593Smuzhiyun
ifb_dev_free(struct net_device * dev)197*4882a593Smuzhiyun static void ifb_dev_free(struct net_device *dev)
198*4882a593Smuzhiyun {
199*4882a593Smuzhiyun struct ifb_dev_private *dp = netdev_priv(dev);
200*4882a593Smuzhiyun struct ifb_q_private *txp = dp->tx_private;
201*4882a593Smuzhiyun int i;
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun for (i = 0; i < dev->num_tx_queues; i++,txp++) {
204*4882a593Smuzhiyun tasklet_kill(&txp->ifb_tasklet);
205*4882a593Smuzhiyun __skb_queue_purge(&txp->rq);
206*4882a593Smuzhiyun __skb_queue_purge(&txp->tq);
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun kfree(dp->tx_private);
209*4882a593Smuzhiyun }
210*4882a593Smuzhiyun
ifb_setup(struct net_device * dev)211*4882a593Smuzhiyun static void ifb_setup(struct net_device *dev)
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun /* Initialize the device structure. */
214*4882a593Smuzhiyun dev->netdev_ops = &ifb_netdev_ops;
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun /* Fill in device structure with ethernet-generic values. */
217*4882a593Smuzhiyun ether_setup(dev);
218*4882a593Smuzhiyun dev->tx_queue_len = TX_Q_LIMIT;
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun dev->features |= IFB_FEATURES;
221*4882a593Smuzhiyun dev->hw_features |= dev->features;
222*4882a593Smuzhiyun dev->hw_enc_features |= dev->features;
223*4882a593Smuzhiyun dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
224*4882a593Smuzhiyun NETIF_F_HW_VLAN_STAG_TX);
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun dev->flags |= IFF_NOARP;
227*4882a593Smuzhiyun dev->flags &= ~IFF_MULTICAST;
228*4882a593Smuzhiyun dev->priv_flags &= ~IFF_TX_SKB_SHARING;
229*4882a593Smuzhiyun netif_keep_dst(dev);
230*4882a593Smuzhiyun eth_hw_addr_random(dev);
231*4882a593Smuzhiyun dev->needs_free_netdev = true;
232*4882a593Smuzhiyun dev->priv_destructor = ifb_dev_free;
233*4882a593Smuzhiyun
234*4882a593Smuzhiyun dev->min_mtu = 0;
235*4882a593Smuzhiyun dev->max_mtu = 0;
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun
ifb_xmit(struct sk_buff * skb,struct net_device * dev)238*4882a593Smuzhiyun static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun struct ifb_dev_private *dp = netdev_priv(dev);
241*4882a593Smuzhiyun struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun u64_stats_update_begin(&txp->rsync);
244*4882a593Smuzhiyun txp->rx_packets++;
245*4882a593Smuzhiyun txp->rx_bytes += skb->len;
246*4882a593Smuzhiyun u64_stats_update_end(&txp->rsync);
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun if (!skb->redirected || !skb->skb_iif) {
249*4882a593Smuzhiyun dev_kfree_skb(skb);
250*4882a593Smuzhiyun dev->stats.rx_dropped++;
251*4882a593Smuzhiyun return NETDEV_TX_OK;
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
255*4882a593Smuzhiyun netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun __skb_queue_tail(&txp->rq, skb);
258*4882a593Smuzhiyun if (!txp->tasklet_pending) {
259*4882a593Smuzhiyun txp->tasklet_pending = 1;
260*4882a593Smuzhiyun tasklet_schedule(&txp->ifb_tasklet);
261*4882a593Smuzhiyun }
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun return NETDEV_TX_OK;
264*4882a593Smuzhiyun }
265*4882a593Smuzhiyun
ifb_close(struct net_device * dev)266*4882a593Smuzhiyun static int ifb_close(struct net_device *dev)
267*4882a593Smuzhiyun {
268*4882a593Smuzhiyun netif_tx_stop_all_queues(dev);
269*4882a593Smuzhiyun return 0;
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun
ifb_open(struct net_device * dev)272*4882a593Smuzhiyun static int ifb_open(struct net_device *dev)
273*4882a593Smuzhiyun {
274*4882a593Smuzhiyun netif_tx_start_all_queues(dev);
275*4882a593Smuzhiyun return 0;
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun
ifb_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)278*4882a593Smuzhiyun static int ifb_validate(struct nlattr *tb[], struct nlattr *data[],
279*4882a593Smuzhiyun struct netlink_ext_ack *extack)
280*4882a593Smuzhiyun {
281*4882a593Smuzhiyun if (tb[IFLA_ADDRESS]) {
282*4882a593Smuzhiyun if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
283*4882a593Smuzhiyun return -EINVAL;
284*4882a593Smuzhiyun if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
285*4882a593Smuzhiyun return -EADDRNOTAVAIL;
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun return 0;
288*4882a593Smuzhiyun }
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun static struct rtnl_link_ops ifb_link_ops __read_mostly = {
291*4882a593Smuzhiyun .kind = "ifb",
292*4882a593Smuzhiyun .priv_size = sizeof(struct ifb_dev_private),
293*4882a593Smuzhiyun .setup = ifb_setup,
294*4882a593Smuzhiyun .validate = ifb_validate,
295*4882a593Smuzhiyun };
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun /* Number of ifb devices to be set up by this module.
298*4882a593Smuzhiyun * Note that these legacy devices have one queue.
299*4882a593Smuzhiyun * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
300*4882a593Smuzhiyun */
301*4882a593Smuzhiyun static int numifbs = 2;
302*4882a593Smuzhiyun module_param(numifbs, int, 0);
303*4882a593Smuzhiyun MODULE_PARM_DESC(numifbs, "Number of ifb devices");
304*4882a593Smuzhiyun
ifb_init_one(int index)305*4882a593Smuzhiyun static int __init ifb_init_one(int index)
306*4882a593Smuzhiyun {
307*4882a593Smuzhiyun struct net_device *dev_ifb;
308*4882a593Smuzhiyun int err;
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
311*4882a593Smuzhiyun NET_NAME_UNKNOWN, ifb_setup);
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun if (!dev_ifb)
314*4882a593Smuzhiyun return -ENOMEM;
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun dev_ifb->rtnl_link_ops = &ifb_link_ops;
317*4882a593Smuzhiyun err = register_netdevice(dev_ifb);
318*4882a593Smuzhiyun if (err < 0)
319*4882a593Smuzhiyun goto err;
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun return 0;
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun err:
324*4882a593Smuzhiyun free_netdev(dev_ifb);
325*4882a593Smuzhiyun return err;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
ifb_init_module(void)328*4882a593Smuzhiyun static int __init ifb_init_module(void)
329*4882a593Smuzhiyun {
330*4882a593Smuzhiyun int i, err;
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun down_write(&pernet_ops_rwsem);
333*4882a593Smuzhiyun rtnl_lock();
334*4882a593Smuzhiyun err = __rtnl_link_register(&ifb_link_ops);
335*4882a593Smuzhiyun if (err < 0)
336*4882a593Smuzhiyun goto out;
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun for (i = 0; i < numifbs && !err; i++) {
339*4882a593Smuzhiyun err = ifb_init_one(i);
340*4882a593Smuzhiyun cond_resched();
341*4882a593Smuzhiyun }
342*4882a593Smuzhiyun if (err)
343*4882a593Smuzhiyun __rtnl_link_unregister(&ifb_link_ops);
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun out:
346*4882a593Smuzhiyun rtnl_unlock();
347*4882a593Smuzhiyun up_write(&pernet_ops_rwsem);
348*4882a593Smuzhiyun
349*4882a593Smuzhiyun return err;
350*4882a593Smuzhiyun }
351*4882a593Smuzhiyun
ifb_cleanup_module(void)352*4882a593Smuzhiyun static void __exit ifb_cleanup_module(void)
353*4882a593Smuzhiyun {
354*4882a593Smuzhiyun rtnl_link_unregister(&ifb_link_ops);
355*4882a593Smuzhiyun }
356*4882a593Smuzhiyun
357*4882a593Smuzhiyun module_init(ifb_init_module);
358*4882a593Smuzhiyun module_exit(ifb_cleanup_module);
359*4882a593Smuzhiyun MODULE_LICENSE("GPL");
360*4882a593Smuzhiyun MODULE_AUTHOR("Jamal Hadi Salim");
361*4882a593Smuzhiyun MODULE_ALIAS_RTNL_LINK("ifb");
362