summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/net/netfilter/nf_flow_table.h60
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h2
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c2
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c2
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/nf_flow_table_core.c173
-rw-r--r--net/netfilter/nf_flow_table_inet.c2
-rw-r--r--net/netfilter/nf_flow_table_offload.c758
-rw-r--r--net/netfilter/nf_tables_api.c21
-rw-r--r--net/netfilter/nft_flow_offload.c5
11 files changed, 955 insertions, 74 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f857f01234f7..9e6fb8524d91 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -848,6 +848,7 @@ enum tc_setup_type {
TC_SETUP_ROOT_QDISC,
TC_SETUP_QDISC_GRED,
TC_SETUP_QDISC_TAPRIO,
+ TC_SETUP_FT,
};
/* These structures hold the attributes of bpf state that are being passed
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 158514281a75..eea66de328d3 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -8,25 +8,43 @@
#include <linux/rcupdate.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/flow_offload.h>
#include <net/dst.h>
struct nf_flowtable;
+struct nf_flow_rule;
+struct flow_offload;
+enum flow_offload_tuple_dir;
struct nf_flowtable_type {
struct list_head list;
int family;
int (*init)(struct nf_flowtable *ft);
+ int (*setup)(struct nf_flowtable *ft,
+ struct net_device *dev,
+ enum flow_block_command cmd);
+ int (*action)(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule);
void (*free)(struct nf_flowtable *ft);
nf_hookfn *hook;
struct module *owner;
};
+enum nf_flowtable_flags {
+ NF_FLOWTABLE_HW_OFFLOAD = 0x1,
+};
+
struct nf_flowtable {
struct list_head list;
struct rhashtable rhashtable;
int priority;
const struct nf_flowtable_type *type;
struct delayed_work gc_work;
+ unsigned int flags;
+ struct flow_block flow_block;
+ possible_net_t net;
};
enum flow_offload_tuple_dir {
@@ -69,14 +87,22 @@ struct flow_offload_tuple_rhash {
#define FLOW_OFFLOAD_DNAT 0x2
#define FLOW_OFFLOAD_DYING 0x4
#define FLOW_OFFLOAD_TEARDOWN 0x8
+#define FLOW_OFFLOAD_HW 0x10
+#define FLOW_OFFLOAD_HW_DYING 0x20
+#define FLOW_OFFLOAD_HW_DEAD 0x40
+
+enum flow_offload_type {
+ NF_FLOW_OFFLOAD_UNSPEC = 0,
+ NF_FLOW_OFFLOAD_ROUTE,
+};
struct flow_offload {
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
- u32 flags;
- union {
- /* Your private driver data here. */
- u32 timeout;
- };
+ struct nf_conn *ct;
+ u16 flags;
+ u16 type;
+ u32 timeout;
+ struct rcu_head rcu_head;
};
#define NF_FLOW_TIMEOUT (30 * HZ)
@@ -87,10 +113,12 @@ struct nf_flow_route {
} tuple[FLOW_OFFLOAD_DIR_MAX];
};
-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
- struct nf_flow_route *route);
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
void flow_offload_free(struct flow_offload *flow);
+int flow_offload_route_init(struct flow_offload *flow,
+ const struct nf_flow_route *route);
+
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
@@ -124,4 +152,22 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
#define MODULE_ALIAS_NF_FLOWTABLE(family) \
MODULE_ALIAS("nf-flowtable-" __stringify(family))
+void nf_flow_offload_add(struct nf_flowtable *flowtable,
+ struct flow_offload *flow);
+void nf_flow_offload_del(struct nf_flowtable *flowtable,
+ struct flow_offload *flow);
+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
+ struct flow_offload *flow);
+
+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd);
+int nf_flow_rule_route(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule);
+
+int nf_flow_table_offload_init(void);
+void nf_flow_table_offload_exit(void);
+
#endif /* _NF_FLOW_TABLE_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 81fed16fe2b2..bb9b049310df 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1518,6 +1518,7 @@ enum nft_object_attributes {
* @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
* @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
* @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
+ * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32)
*/
enum nft_flowtable_attributes {
NFTA_FLOWTABLE_UNSPEC,
@@ -1527,6 +1528,7 @@ enum nft_flowtable_attributes {
NFTA_FLOWTABLE_USE,
NFTA_FLOWTABLE_HANDLE,
NFTA_FLOWTABLE_PAD,
+ NFTA_FLOWTABLE_FLAGS,
__NFTA_FLOWTABLE_MAX
};
#define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1)
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index 012c4047c788..168b72e18be0 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -9,6 +9,8 @@
static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
.init = nf_flow_table_init,
+ .setup = nf_flow_table_offload_setup,
+ .action = nf_flow_rule_route,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index f6d9a48c7a2a..f069bc0dc056 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -10,6 +10,8 @@
static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
.init = nf_flow_table_init,
+ .setup = nf_flow_table_offload_setup,
+ .action = nf_flow_rule_route,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 4fc075b612fe..5e9b2eb24349 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
+ nf_flow_table_offload.o
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 128245efe84a..8468d2d02284 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -14,24 +14,15 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-struct flow_offload_entry {
- struct flow_offload flow;
- struct nf_conn *ct;
- struct rcu_head rcu_head;
-};
-
static DEFINE_MUTEX(flowtable_lock);
static LIST_HEAD(flowtables);
static void
flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
- struct nf_flow_route *route,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
- struct dst_entry *other_dst = route->tuple[!dir].dst;
- struct dst_entry *dst = route->tuple[dir].dst;
ft->dir = dir;
@@ -39,12 +30,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
case NFPROTO_IPV4:
ft->src_v4 = ctt->src.u3.in;
ft->dst_v4 = ctt->dst.u3.in;
- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
break;
case NFPROTO_IPV6:
ft->src_v6 = ctt->src.u3.in6;
ft->dst_v6 = ctt->dst.u3.in6;
- ft->mtu = ip6_dst_mtu_forward(dst);
break;
}
@@ -52,37 +41,24 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
ft->l4proto = ctt->dst.protonum;
ft->src_port = ctt->src.u.tcp.port;
ft->dst_port = ctt->dst.u.tcp.port;
-
- ft->iifidx = other_dst->dev->ifindex;
- ft->dst_cache = dst;
}
-struct flow_offload *
-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
{
- struct flow_offload_entry *entry;
struct flow_offload *flow;
if (unlikely(nf_ct_is_dying(ct) ||
!atomic_inc_not_zero(&ct->ct_general.use)))
return NULL;
- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry)
+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+ if (!flow)
goto err_ct_refcnt;
- flow = &entry->flow;
+ flow->ct = ct;
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
- goto err_dst_cache_original;
-
- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
- goto err_dst_cache_reply;
-
- entry->ct = ct;
-
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
+ flow_offload_fill_dir(flow, ct, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_fill_dir(flow, ct, FLOW_OFFLOAD_DIR_REPLY);
if (ct->status & IPS_SRC_NAT)
flow->flags |= FLOW_OFFLOAD_SNAT;
@@ -91,10 +67,6 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
return flow;
-err_dst_cache_reply:
- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
-err_dst_cache_original:
- kfree(entry);
err_ct_refcnt:
nf_ct_put(ct);
@@ -102,6 +74,56 @@ err_ct_refcnt:
}
EXPORT_SYMBOL_GPL(flow_offload_alloc);
+static int flow_offload_fill_route(struct flow_offload *flow,
+ const struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
+ struct dst_entry *other_dst = route->tuple[!dir].dst;
+ struct dst_entry *dst = route->tuple[dir].dst;
+
+ if (!dst_hold_safe(route->tuple[dir].dst))
+ return -1;
+
+ switch (flow_tuple->l3proto) {
+ case NFPROTO_IPV4:
+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
+ break;
+ case NFPROTO_IPV6:
+ flow_tuple->mtu = ip6_dst_mtu_forward(dst);
+ break;
+ }
+
+ flow_tuple->iifidx = other_dst->dev->ifindex;
+ flow_tuple->dst_cache = dst;
+
+ return 0;
+}
+
+int flow_offload_route_init(struct flow_offload *flow,
+ const struct nf_flow_route *route)
+{
+ int err;
+
+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+ if (err < 0)
+ return err;
+
+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
+ if (err < 0)
+ goto err_route_reply;
+
+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
+
+ return 0;
+
+err_route_reply:
+ dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(flow_offload_route_init);
+
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{
tcp->state = TCP_CONNTRACK_ESTABLISHED;
@@ -150,17 +172,25 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
flow_offload_fixup_ct_timeout(ct);
}
-void flow_offload_free(struct flow_offload *flow)
+static void flow_offload_route_release(struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
- e = container_of(flow, struct flow_offload_entry, flow);
+}
+
+void flow_offload_free(struct flow_offload *flow)
+{
+ switch (flow->type) {
+ case NF_FLOW_OFFLOAD_ROUTE:
+ flow_offload_route_release(flow);
+ break;
+ default:
+ break;
+ }
if (flow->flags & FLOW_OFFLOAD_DYING)
- nf_ct_delete(e->ct, 0, 0);
- nf_ct_put(e->ct);
- kfree_rcu(e, rcu_head);
+ nf_ct_delete(flow->ct, 0, 0);
+ nf_ct_put(flow->ct);
+ kfree_rcu(flow, rcu_head);
}
EXPORT_SYMBOL_GPL(flow_offload_free);
@@ -220,6 +250,9 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
return err;
}
+ if (flow_table->flags & NF_FLOWTABLE_HW_OFFLOAD)
+ nf_flow_offload_add(flow_table, flow);
+
return 0;
}
EXPORT_SYMBOL_GPL(flow_offload_add);
@@ -232,8 +265,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
static void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
nf_flow_offload_rhash_params);
@@ -241,25 +272,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
nf_flow_offload_rhash_params);
- e = container_of(flow, struct flow_offload_entry, flow);
- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
if (nf_flow_has_expired(flow))
- flow_offload_fixup_ct(e->ct);
+ flow_offload_fixup_ct(flow->ct);
else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
- flow_offload_fixup_ct_timeout(e->ct);
+ flow_offload_fixup_ct_timeout(flow->ct);
flow_offload_free(flow);
}
void flow_offload_teardown(struct flow_offload *flow)
{
- struct flow_offload_entry *e;
-
flow->flags |= FLOW_OFFLOAD_TEARDOWN;
- e = container_of(flow, struct flow_offload_entry, flow);
- flow_offload_fixup_ct_state(e->ct);
+ flow_offload_fixup_ct_state(flow->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);
@@ -269,7 +296,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
{
struct flow_offload_tuple_rhash *tuplehash;
struct flow_offload *flow;
- struct flow_offload_entry *e;
int dir;
tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
@@ -282,8 +308,7 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
return NULL;
- e = container_of(flow, struct flow_offload_entry, flow);
- if (unlikely(nf_ct_is_dying(e->ct)))
+ if (unlikely(nf_ct_is_dying(flow->ct)))
return NULL;
return tuplehash;
@@ -327,12 +352,21 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
{
struct nf_flowtable *flow_table = data;
- struct flow_offload_entry *e;
- e = container_of(flow, struct flow_offload_entry, flow);
- if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) ||
- (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
- flow_offload_del(flow_table, flow);
+ if (flow->flags & FLOW_OFFLOAD_HW)
+ nf_flow_offload_stats(flow_table, flow);
+
+ if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) ||
+ (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) {
+ if (flow->flags & FLOW_OFFLOAD_HW) {
+ if (!(flow->flags & FLOW_OFFLOAD_HW_DYING))
+ nf_flow_offload_del(flow_table, flow);
+ else if (flow->flags & FLOW_OFFLOAD_HW_DEAD)
+ flow_offload_del(flow_table, flow);
+ } else {
+ flow_offload_del(flow_table, flow);
+ }
+ }
}
static void nf_flow_offload_work_gc(struct work_struct *work)
@@ -465,6 +499,7 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
int err;
INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+ flow_block_init(&flowtable->flow_block);
err = rhashtable_init(&flowtable->rhashtable,
&nf_flow_offload_rhash_params);
@@ -485,15 +520,13 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
{
struct net_device *dev = data;
- struct flow_offload_entry *e;
-
- e = container_of(flow, struct flow_offload_entry, flow);
if (!dev) {
flow_offload_teardown(flow);
return;
}
- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
+
+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
(flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
flow->tuplehash[1].tuple.iifidx == dev->ifindex))
flow_offload_dead(flow);
@@ -502,6 +535,7 @@ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
struct net_device *dev)
{
+ nf_flow_table_offload_flush(flowtable);
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
flush_delayed_work(&flowtable->gc_work);
}
@@ -529,5 +563,18 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
+static int __init nf_flow_table_module_init(void)
+{
+ return nf_flow_table_offload_init();
+}
+
+static void __exit nf_flow_table_module_exit(void)
+{
+ nf_flow_table_offload_exit();
+}
+
+module_init(nf_flow_table_module_init);
+module_exit(nf_flow_table_module_exit);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 593357aedb36..bfb910b874ce 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -24,6 +24,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
static struct nf_flowtable_type flowtable_inet = {
.family = NFPROTO_INET,
.init = nf_flow_table_init,
+ .setup = nf_flow_table_offload_setup,
+ .action = nf_flow_rule_route,
.free = nf_flow_table_free,
.hook = nf_flow_offload_inet_hook,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
new file mode 100644
index 000000000000..9be61f47303a
--- /dev/null
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -0,0 +1,758 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <linux/tc_act/tc_csum.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+
+static struct work_struct nf_flow_offload_work;
+static DEFINE_SPINLOCK(flow_offload_pending_list_lock);
+static LIST_HEAD(flow_offload_pending_list);
+
+struct flow_offload_work {
+ struct list_head list;
+ enum flow_cls_command cmd;
+ int priority;
+ struct nf_flowtable *flowtable;
+ struct flow_offload *flow;
+};
+
+struct nf_flow_key {
+ struct flow_dissector_key_control control;
+ struct flow_dissector_key_basic basic;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ };
+ struct flow_dissector_key_tcp tcp;
+ struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct nf_flow_match {
+ struct flow_dissector dissector;
+ struct nf_flow_key key;
+ struct nf_flow_key mask;
+};
+
+struct nf_flow_rule {
+ struct nf_flow_match match;
+ struct flow_rule *rule;
+};
+
+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
+ (__match)->dissector.offset[__type] = \
+ offsetof(struct nf_flow_key, __field)
+
+static int nf_flow_rule_match(struct nf_flow_match *match,
+ const struct flow_offload_tuple *tuple)
+{
+ struct nf_flow_key *mask = &match->mask;
+ struct nf_flow_key *key = &match->key;
+
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
+
+ switch (tuple->l3proto) {
+ case AF_INET:
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ key->basic.n_proto = htons(ETH_P_IP);
+ key->ipv4.src = tuple->src_v4.s_addr;
+ mask->ipv4.src = 0xffffffff;
+ key->ipv4.dst = tuple->dst_v4.s_addr;
+ mask->ipv4.dst = 0xffffffff;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ mask->basic.n_proto = 0xffff;
+
+ switch (tuple->l4proto) {
+ case IPPROTO_TCP:
+ key->tcp.flags = 0;
+ mask->tcp.flags = TCP_FLAG_RST | TCP_FLAG_FIN;
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
+ break;
+ case IPPROTO_UDP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ key->basic.ip_proto = tuple->l4proto;
+ mask->basic.ip_proto = 0xff;
+
+ key->tp.src = tuple->src_port;
+ mask->tp.src = 0xffff;
+ key->tp.dst = tuple->dst_port;
+ mask->tp.dst = 0xffff;
+
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS);
+ return 0;
+}
+
+static void flow_offload_mangle(struct flow_action_entry *entry,
+ enum flow_action_mangle_base htype,
+ u32 offset, u8 *value, u8 *mask)
+{
+ entry->id = FLOW_ACTION_MANGLE;
+ entry->mangle.htype = htype;
+ entry->mangle.offset = offset;
+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
+ memcpy(&entry->mangle.val, value, sizeof(u32));
+}
+
+static int flow_offload_eth_src(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct flow_action_entry *entry0,
+ struct flow_action_entry *entry1)
+{
+ const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
+ struct net_device *dev;
+ u32 mask, val;
+ u16 val16;
+
+ dev = dev_get_by_index(net, tuple->iifidx);
+ if (!dev)
+ return -ENOENT;
+
+ mask = ~0xffff0000;
+ memcpy(&val16, dev->dev_addr, 2);
+ val = val16 << 16;
+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
+ (u8 *)&val, (u8 *)&mask);
+
+ mask = ~0xffffffff;
+ memcpy(&val, dev->dev_addr + 2, 4);
+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
+ (u8 *)&val, (u8 *)&mask);
+ dev_put(dev);
+
+ return 0;
+}
+
+static int flow_offload_eth_dst(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct flow_action_entry *entry0,
+ struct flow_action_entry *entry1)
+{
+ const struct flow_offload_tuple *tuple = &flow->tuplehash[dir].tuple;
+ struct neighbour *n;
+ u32 mask, val;
+ u16 val16;
+
+ n = dst_neigh_lookup(tuple->dst_cache, &tuple->dst_v4);
+ if (!n)
+ return -ENOENT;
+
+ mask = ~0xffffffff;
+ memcpy(&val, n->ha, 4);
+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
+ (u8 *)&val, (u8 *)&mask);
+
+ mask = ~0x0000ffff;
+ memcpy(&val16, n->ha + 4, 2);
+ val = val16;
+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
+ (u8 *)&val, (u8 *)&mask);
+ neigh_release(n);
+
+ return 0;
+}
+
+static void flow_offload_ipv4_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct flow_action_entry *entry)
+{
+ u32 mask = ~htonl(0xffffffff);
+ __be32 addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
+ offset = offsetof(struct iphdr, saddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ offset = offsetof(struct iphdr, daddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
+ (u8 *)&addr, (u8 *)&mask);
+}
+
+static void flow_offload_ipv4_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct flow_action_entry *entry)
+{
+ u32 mask = ~htonl(0xffffffff);
+ __be32 addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
+ offset = offsetof(struct iphdr, daddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ offset = offsetof(struct iphdr, saddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
+ (u8 *)&addr, (u8 *)&mask);
+}
+
+static int flow_offload_l4proto(const struct flow_offload *flow)
+{
+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
+ u8 type = 0;
+
+ switch (protonum) {
+ case IPPROTO_TCP:
+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
+ break;
+ case IPPROTO_UDP:
+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
+ break;
+ default:
+ break;
+ }
+
+ return type;
+}
+
+static void flow_offload_port_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct flow_action_entry *entry)
+{
+ u32 mask = ~htonl(0xffff0000);
+ __be16 port;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ offset = 0; /* offsetof(struct tcphdr, source); */
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ offset = 0; /* offsetof(struct tcphdr, dest); */
+ break;
+ default:
+ break;
+ }
+
+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
+ (u8 *)&port, (u8 *)&mask);
+}
+
+static void flow_offload_port_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct flow_action_entry *entry)
+{
+ u32 mask = ~htonl(0xffff);
+ __be16 port;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ offset = 0; /* offsetof(struct tcphdr, source); */
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ offset = 0; /* offsetof(struct tcphdr, dest); */
+ break;
+ default:
+ break;
+ }
+
+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
+ (u8 *)&port, (u8 *)&mask);
+}
+
+static void flow_offload_ipv4_checksum(struct net *net,
+ const struct flow_offload *flow,
+ struct flow_action_entry *entry)
+{
+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
+
+ entry->id = FLOW_ACTION_CSUM;
+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
+
+ switch (protonum) {
+ case IPPROTO_TCP:
+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
+ break;
+ case IPPROTO_UDP:
+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
+ break;
+ }
+}
+
+static void flow_offload_redirect(const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct flow_action_entry *entry)
+{
+ struct rtable *rt;
+
+ rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ entry->id = FLOW_ACTION_REDIRECT;
+ entry->dev = rt->dst.dev;
+ dev_hold(rt->dst.dev);
+}
+
+int nf_flow_rule_route(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ int i;
+
+ if (flow_offload_eth_src(net, flow, dir,
+ &flow_rule->rule->action.entries[0],
+ &flow_rule->rule->action.entries[1]) < 0)
+ return -1;
+
+ if (flow_offload_eth_dst(net, flow, dir,
+ &flow_rule->rule->action.entries[2],
+ &flow_rule->rule->action.entries[3]) < 0)
+ return -1;
+
+ i = 4;
+ if (flow->flags & FLOW_OFFLOAD_SNAT) {
+ flow_offload_ipv4_snat(net, flow, dir,
+ &flow_rule->rule->action.entries[i++]);
+ flow_offload_port_snat(net, flow, dir,
+ &flow_rule->rule->action.entries[i++]);
+ }
+ if (flow->flags & FLOW_OFFLOAD_DNAT) {
+ flow_offload_ipv4_dnat(net, flow, dir,
+ &flow_rule->rule->action.entries[i++]);
+ flow_offload_port_dnat(net, flow, dir,
+ &flow_rule->rule->action.entries[i++]);
+ }
+ if (flow->flags & FLOW_OFFLOAD_SNAT ||
+ flow->flags & FLOW_OFFLOAD_DNAT)
+ flow_offload_ipv4_checksum(net, flow,
+ &flow_rule->rule->action.entries[i++]);
+
+ flow_offload_redirect(flow, dir, &flow_rule->rule->action.entries[i++]);
+
+ return i;
+}
+EXPORT_SYMBOL_GPL(nf_flow_rule_route);
+
+static struct nf_flow_rule *
+nf_flow_offload_rule_alloc(struct net *net,
+ const struct flow_offload_work *offload,
+ enum flow_offload_tuple_dir dir)
+{
+ const struct nf_flowtable *flowtable = offload->flowtable;
+ const struct flow_offload *flow = offload->flow;
+ const struct flow_offload_tuple *tuple;
+ struct nf_flow_rule *flow_rule;
+ int err = -ENOMEM, num_actions;
+
+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
+ if (!flow_rule)
+ goto err_flow;
+
+ flow_rule->rule = flow_rule_alloc(10);
+ if (!flow_rule->rule)
+ goto err_flow_rule;
+
+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
+ flow_rule->rule->match.mask = &flow_rule->match.mask;
+ flow_rule->rule->match.key = &flow_rule->match.key;
+
+ tuple = &flow->tuplehash[dir].tuple;
+ err = nf_flow_rule_match(&flow_rule->match, tuple);
+ if (err < 0)
+ goto err_flow_match;
+
+ num_actions = flowtable->type->action(net, flow, dir, flow_rule);
+ if (num_actions < 0)
+ goto err_flow_match;
+
+ flow_rule->rule->action.num_entries = num_actions;
+
+ return flow_rule;
+
+err_flow_match:
+ kfree(flow_rule->rule);
+err_flow_rule:
+ kfree(flow_rule);
+err_flow:
+ return NULL;
+}
+
+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
+{
+ struct flow_action_entry *entry;
+ int i;
+
+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
+ entry = &flow_rule->rule->action.entries[i];
+ if (entry->id != FLOW_ACTION_REDIRECT)
+ continue;
+
+ dev_put(entry->dev);
+ }
+ kfree(flow_rule->rule);
+ kfree(flow_rule);
+}
+
+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
+{
+ int i;
+
+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
+ __nf_flow_offload_destroy(flow_rule[i]);
+}
+
+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
+ struct nf_flow_rule *flow_rule[])
+{
+ struct net *net = read_pnet(&offload->flowtable->net);
+
+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
+ FLOW_OFFLOAD_DIR_ORIGINAL);
+ if (!flow_rule[0])
+ return -ENOMEM;
+
+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
+ FLOW_OFFLOAD_DIR_REPLY);
+ if (!flow_rule[1]) {
+ __nf_flow_offload_destroy(flow_rule[0]);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
+ __be16 proto, int priority,
+ enum flow_cls_command cmd,
+ const struct flow_offload_tuple *tuple,
+ struct netlink_ext_ack *extack)
+{
+ cls_flow->common.protocol = proto;
+ cls_flow->common.prio = priority;
+ cls_flow->common.extack = extack;
+ cls_flow->command = cmd;
+ cls_flow->cookie = (unsigned long)tuple;
+}
+
+static int flow_offload_tuple_add(struct flow_offload_work *offload,
+ struct nf_flow_rule *flow_rule,
+ enum flow_offload_tuple_dir dir)
+{
+ struct nf_flowtable *flowtable = offload->flowtable;
+ struct flow_cls_offload cls_flow = {};
+ struct flow_block_cb *block_cb;
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+ int err, i = 0;
+
+ nf_flow_offload_init(&cls_flow, proto, offload->priority,
+ FLOW_CLS_REPLACE,
+ &offload->flow->tuplehash[dir].tuple, &extack);
+ cls_flow.rule = flow_rule->rule;
+
+ list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) {
+ err = block_cb->cb(TC_SETUP_FT, &cls_flow,
+ block_cb->cb_priv);
+ if (err < 0)
+ continue;
+
+ i++;
+ }
+
+ return i;
+}
+
+static void flow_offload_tuple_del(struct flow_offload_work *offload,
+ enum flow_offload_tuple_dir dir)
+{
+ struct nf_flowtable *flowtable = offload->flowtable;
+ struct flow_cls_offload cls_flow = {};
+ struct flow_block_cb *block_cb;
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+
+ nf_flow_offload_init(&cls_flow, proto, offload->priority,
+ FLOW_CLS_DESTROY,
+ &offload->flow->tuplehash[dir].tuple, &extack);
+
+ list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
+ block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
+
+ offload->flow->flags |= FLOW_OFFLOAD_HW_DEAD;
+}
+
+static int flow_offload_rule_add(struct flow_offload_work *offload,
+ struct nf_flow_rule *flow_rule[])
+{
+ int ok_count = 0;
+
+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
+ FLOW_OFFLOAD_DIR_ORIGINAL);
+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
+ FLOW_OFFLOAD_DIR_REPLY);
+ if (ok_count == 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+static int flow_offload_work_add(struct flow_offload_work *offload)
+{
+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
+ int err;
+
+ err = nf_flow_offload_alloc(offload, flow_rule);
+ if (err < 0)
+ return -ENOMEM;
+
+ err = flow_offload_rule_add(offload, flow_rule);
+
+ nf_flow_offload_destroy(flow_rule);
+
+ return err;
+}
+
+static void flow_offload_work_del(struct flow_offload_work *offload)
+{
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+}
+
+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
+ enum flow_offload_tuple_dir dir,
+ struct flow_stats *stats)
+{
+ struct nf_flowtable *flowtable = offload->flowtable;
+ struct flow_cls_offload cls_flow = {};
+ struct flow_block_cb *block_cb;
+ struct netlink_ext_ack extack;
+ __be16 proto = ETH_P_ALL;
+
+ nf_flow_offload_init(&cls_flow, proto, offload->priority,
+ FLOW_CLS_STATS,
+ &offload->flow->tuplehash[dir].tuple, &extack);
+
+ list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
+ block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
+}
+
+static void flow_offload_work_stats(struct flow_offload_work *offload)
+{
+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
+ u64 lastused;
+
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
+
+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
+ lastused + NF_FLOW_TIMEOUT);
+}
+
+static void flow_offload_work_handler(struct work_struct *work)
+{
+ struct flow_offload_work *offload, *next;
+ LIST_HEAD(offload_pending_list);
+ int ret;
+
+ spin_lock_bh(&flow_offload_pending_list_lock);
+ list_replace_init(&flow_offload_pending_list, &offload_pending_list);
+ spin_unlock_bh(&flow_offload_pending_list_lock);
+
+ list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
+ switch (offload->cmd) {
+ case FLOW_CLS_REPLACE:
+ ret = flow_offload_work_add(offload);
+ if (ret < 0)
+ offload->flow->flags &= ~FLOW_OFFLOAD_HW;
+ break;
+ case FLOW_CLS_DESTROY:
+ flow_offload_work_del(offload);
+ break;
+ case FLOW_CLS_STATS:
+ flow_offload_work_stats(offload);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ list_del(&offload->list);
+ kfree(offload);
+ }
+}
+
+static void flow_offload_queue_work(struct flow_offload_work *offload)
+{
+ spin_lock_bh(&flow_offload_pending_list_lock);
+ list_add_tail(&offload->list, &flow_offload_pending_list);
+ spin_unlock_bh(&flow_offload_pending_list_lock);
+
+ schedule_work(&nf_flow_offload_work);
+}
+
+void nf_flow_offload_add(struct nf_flowtable *flowtable,
+ struct flow_offload *flow)
+{
+ struct flow_offload_work *offload;
+
+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
+ if (!offload)
+ return;
+
+ offload->cmd = FLOW_CLS_REPLACE;
+ offload->flow = flow;
+ offload->priority = flowtable->priority;
+ offload->flowtable = flowtable;
+ flow->flags |= FLOW_OFFLOAD_HW;
+
+ flow_offload_queue_work(offload);
+}
+
+void nf_flow_offload_del(struct nf_flowtable *flowtable,
+ struct flow_offload *flow)
+{
+ struct flow_offload_work *offload;
+
+ offload = kzalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
+ if (!offload)
+ return;
+
+ offload->cmd = FLOW_CLS_DESTROY;
+ offload->flow = flow;
+ offload->flow->flags |= FLOW_OFFLOAD_HW_DYING;
+ offload->flowtable = flowtable;
+
+ flow_offload_queue_work(offload);
+}
+
+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
+ struct flow_offload *flow)
+{
+ struct flow_offload_work *offload;
+ s64 delta;
+
+ delta = flow->timeout - jiffies;
+ if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10) ||
+ flow->flags & FLOW_OFFLOAD_HW_DYING)
+ return;
+
+ offload = kzalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
+ if (!offload)
+ return;
+
+ offload->cmd = FLOW_CLS_STATS;
+ offload->flow = flow;
+ offload->flowtable = flowtable;
+
+ flow_offload_queue_work(offload);
+}
+
+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
+{
+ if (flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD)
+ flush_work(&nf_flow_offload_work);
+}
+
+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
+ struct flow_block_offload *bo,
+ enum flow_block_command cmd)
+{
+ struct flow_block_cb *block_cb, *next;
+ int err = 0;
+
+ switch (cmd) {
+ case FLOW_BLOCK_BIND:
+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
+ break;
+ case FLOW_BLOCK_UNBIND:
+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+ list_del(&block_cb->list);
+ flow_block_cb_free(block_cb);
+ }
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
+ struct net_device *dev,
+ enum flow_block_command cmd)
+{
+ struct netlink_ext_ack extack = {};
+ struct flow_block_offload bo = {};
+ int err;
+
+ if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD))
+ return 0;
+
+ bo.net = dev_net(dev);
+ bo.block = &flowtable->flow_block;
+ bo.command = cmd;
+ bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ bo.extack = &extack;
+ INIT_LIST_HEAD(&bo.cb_list);
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ if (err < 0)
+ return err;
+
+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
+
+int nf_flow_table_offload_init(void)
+{
+ INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler);
+
+ return 0;
+}
+
+void nf_flow_table_offload_exit(void)
+{
+ struct flow_offload_work *offload, *next;
+ LIST_HEAD(offload_pending_list);
+
+ cancel_work_sync(&nf_flow_offload_work);
+
+ list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
+ list_del(&offload->list);
+ kfree(offload);
+ }
+}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0d2243945f1d..2dc636faa322 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5835,6 +5835,7 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
.len = NFT_NAME_MAXLEN - 1 },
[NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED },
[NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 },
+ [NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 },
};
struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
@@ -5968,8 +5969,11 @@ static void nft_unregister_flowtable_net_hooks(struct net *net,
{
struct nft_hook *hook;
- list_for_each_entry(hook, &flowtable->hook_list, list)
+ list_for_each_entry(hook, &flowtable->hook_list, list) {
nf_unregister_net_hook(net, &hook->ops);
+ flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
+ }
}
static int nft_register_flowtable_net_hooks(struct net *net,
@@ -5991,6 +5995,8 @@ static int nft_register_flowtable_net_hooks(struct net *net,
}
}
+ flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+ FLOW_BLOCK_BIND);
err = nf_register_net_hook(net, &hook->ops);
if (err < 0)
goto err_unregister_net_hooks;
@@ -6006,6 +6012,8 @@ err_unregister_net_hooks:
break;
nf_unregister_net_hook(net, &hook->ops);
+ flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
list_del_rcu(&hook->list);
kfree_rcu(hook, rcu);
}
@@ -6080,6 +6088,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
goto err2;
}
+ if (nla[NFTA_FLOWTABLE_FLAGS]) {
+ flowtable->data.flags =
+ ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
+ if (flowtable->data.flags & ~NF_FLOWTABLE_HW_OFFLOAD)
+ goto err3;
+ }
+
+ write_pnet(&flowtable->data.net, net);
flowtable->data.type = type;
err = type->init(&flowtable->data);
if (err < 0)
@@ -6191,7 +6207,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
- NFTA_FLOWTABLE_PAD))
+ NFTA_FLOWTABLE_PAD) ||
+ nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
goto nla_put_failure;
nest = nla_nest_start_noflag(skb, NFTA_FLOWTABLE_HOOK);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index f29bbc74c4bf..dd82ff2ee19f 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -115,10 +115,13 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (nft_flow_route(pkt, ct, &route, dir) < 0)
goto err_flow_route;
- flow = flow_offload_alloc(ct, &route);
+ flow = flow_offload_alloc(ct);
if (!flow)
goto err_flow_alloc;
+ if (flow_offload_route_init(flow, &route) < 0)
+ goto err_flow_add;
+
if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;