Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions kernel/include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,22 @@ enum {
#include <linux/cache.h>
#include <linux/skbuff.h>

#define RPS_CONTINUE 0x1 /* Continue traversing RPS list */
#define RPS_STOP 0x2 /* Stop traversing RPS list */

struct netif_rps_entry
{
unsigned char proto;
unsigned char flags;
int (*rps_process)(struct sk_buff *skb);
int (*rps_init)(void);
void (*rps_uninit)(void);
struct list_head list;
};

extern int rps_register(struct netif_rps_entry *re);
extern int rps_unregister(struct netif_rps_entry *re);

struct neighbour;
struct neigh_parms;
struct sk_buff;
Expand Down Expand Up @@ -1731,6 +1747,7 @@ extern int dev_forward_skb(struct net_device *dev,
struct sk_buff *skb);

extern int netdev_budget;
extern int sysctl_rps_framework;

/* Called by rtnetlink.c:rtnl_unlock() */
extern void netdev_run_todo(void);
Expand Down
95 changes: 92 additions & 3 deletions kernel/net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -2256,7 +2256,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,

map = rcu_dereference(rxqueue->rps_map);
if (map) {
tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 16];

if (cpu_online(tcpu)) {
cpu = tcpu;
Expand Down Expand Up @@ -2736,6 +2736,89 @@ int __netif_receive_skb(struct sk_buff *skb)
return ret;
}

int sysctl_rps_framework __read_mostly = 0;
EXPORT_SYMBOL(sysctl_rps_framework);

static DEFINE_SPINLOCK(rps_table_lock);
static struct list_head rps_table[IPPROTO_MAX]__read_mostly;

static void init_rps_framework(void)
{
int i = 0;

for (; i < IPPROTO_MAX; i++)
INIT_LIST_HEAD(&rps_table[i]);
}

int rps_register(struct netif_rps_entry *re)
{
int ret = 0;

if (re->proto >= IPPROTO_MAX)
return -EINVAL;

if (re->rps_init) {
ret = re->rps_init();
if (ret < 0)
return ret;
}

spin_lock(&rps_table_lock);

/* RPS_CONTINUE entries are in the head of the list */
if (re->flags == RPS_CONTINUE)
list_add_rcu(&re->list, &rps_table[re->proto]);
/* RPS_STOP entries are in the tail of the list */
else if (re->flags == RPS_STOP)
list_add_tail_rcu(&re->list, &rps_table[re->proto]);

spin_unlock(&rps_table_lock);

return ret;
}
EXPORT_SYMBOL(rps_register);

int rps_unregister(struct netif_rps_entry *re)
{
spin_lock(&rps_table_lock);
list_del_rcu(&re->list);
spin_unlock(&rps_table_lock);

if (re->rps_uninit)
re->rps_uninit();

return 0;
}
EXPORT_SYMBOL(rps_unregister);

static int netif_rps_process(struct sk_buff *skb)
{
int ret = -1;

/* Only support IPV4 */
if (skb->protocol != htons(ETH_P_IP)) {
goto out;
}

if (pskb_may_pull(skb, sizeof(struct iphdr))) {
struct iphdr *iph = (struct iphdr *)skb->data;
u8 ip_proto = iph->protocol;
struct netif_rps_entry *p;

list_for_each_entry_rcu(p, &rps_table[ip_proto], list) {
ret = p->rps_process(skb);
/* Loop will break after a RPS_STOP entry returns no less than 0 */
if (ret >= 0) {
//printk(KERN_INFO "%s,cpu %d,sip %pI4,dip %pI4\n", __func__, ret, &iph->saddr, &iph->daddr);
goto out;
}
}
}

out:
return ret;
}

/**
* netif_receive_skb - process receive buffer from network
* @skb: buffer to process
Expand All @@ -2754,9 +2837,13 @@ int __netif_receive_skb(struct sk_buff *skb)
int netif_receive_skb(struct sk_buff *skb)
{
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu, ret;
int cpu = -1, ret;

cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (sysctl_rps_framework)
cpu = netif_rps_process(skb);

if (-1 == cpu)
cpu = get_rps_cpu(skb->dev, skb, &rflow);

if (cpu >= 0)
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
Expand Down Expand Up @@ -6152,6 +6239,8 @@ static int __init net_dev_init(void)
if (register_pernet_subsys(&netdev_net_ops))
goto out;

init_rps_framework();

/*
* Initialise the packet receive queues.
*/
Expand Down
7 changes: 7 additions & 0 deletions kernel/net/core/sysctl_net_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "rps_framework",
.data = &sysctl_rps_framework,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.ctl_name = NET_CORE_WARNINGS,
.procname = "warnings",
Expand Down
122 changes: 121 additions & 1 deletion kernel/net/netfilter/ipvs/ip_vs_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1164,14 +1164,66 @@ static inline int laddr_to_cpuid(int af, const union nf_inet_addr *addr)
sysctl_ip_vs_reserve_core;
}

struct laddr_cpu_t
{
struct hlist_node hlist;
struct rcu_head rcu_head;
int cpuid;
int refcnt;
__be32 addr;
};

#define LADDR_CPU_HASH_SIZE LADDR_MASK + 1
static struct hlist_head laddr_cpu_map[LADDR_CPU_HASH_SIZE];
static DEFINE_SPINLOCK(laddr_cpu_map_lock);

static struct laddr_cpu_t *lookup_laddr_cpu(__be32 addr)
{
struct hlist_head *head;
struct hlist_node *node;
struct laddr_cpu_t *p;

head = &laddr_cpu_map[ntohl(addr) & LADDR_MASK];
hlist_for_each_entry_rcu(p, node, head, hlist) {
if (p->addr == addr)
return p;
}

return NULL;
}

void ip_vs_laddr_hold(struct ip_vs_laddr *laddr)
{
atomic_inc(&laddr->refcnt);
}

static void rcu_free_laddr_cpu(struct rcu_head *head)
{
struct laddr_cpu_t *laddr_cpu = container_of(head, struct laddr_cpu_t, rcu_head);
#ifdef CONFIG_IP_VS_DEBUG
IP_VS_DBG(0, "%s,%pI4,cpu %d,refcnt %d\n", __func__, &laddr_cpu->addr, laddr_cpu->cpuid, laddr_cpu->refcnt);
#endif
kfree(laddr_cpu);
}

void ip_vs_laddr_put(struct ip_vs_laddr *laddr)
{
if (atomic_dec_and_test(&laddr->refcnt)) {
struct laddr_cpu_t *laddr_cpu;

spin_lock(&laddr_cpu_map_lock);
laddr_cpu = lookup_laddr_cpu(laddr->addr.ip);
if (laddr_cpu) {
if ((--laddr_cpu->refcnt) <= 0) {
hlist_del_rcu(&laddr_cpu->hlist);
call_rcu(&laddr_cpu->rcu_head, rcu_free_laddr_cpu);
}
#ifdef CONFIG_IP_VS_DEBUG
IP_VS_DBG(0, "%s,%pI4,cpu %d,refcnt %d\n", __func__, &laddr_cpu->addr, laddr_cpu->cpuid, laddr_cpu->refcnt);
#endif
}
spin_unlock(&laddr_cpu_map_lock);

kfree(laddr);
}
}
Expand Down Expand Up @@ -1235,6 +1287,7 @@ ip_vs_add_laddr(struct ip_vs_service *svc, struct ip_vs_laddr_user_kern *uladdr)
struct ip_vs_service *this_svc;
int cpu;
int ret;
struct laddr_cpu_t *laddr_cpu;

IP_VS_DBG_BUF(0, "vip %s:%d add local address %s\n",
IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port),
Expand All @@ -1257,6 +1310,30 @@ ip_vs_add_laddr(struct ip_vs_service *svc, struct ip_vs_laddr_user_kern *uladdr)
return ret;
}

spin_lock(&laddr_cpu_map_lock);
laddr_cpu = lookup_laddr_cpu(laddr->addr.ip);
if (laddr_cpu) {
++(laddr_cpu->refcnt);
}
spin_unlock(&laddr_cpu_map_lock);

if (!laddr_cpu) {
laddr_cpu = kzalloc(sizeof(struct laddr_cpu_t), GFP_ATOMIC);
if (!laddr_cpu) {
kfree(laddr);
pr_err("%s():no memory.\n", __func__);
return -ENOMEM;
}

laddr_cpu->cpuid = laddr->cpuid;
laddr_cpu->addr = laddr->addr.ip;
laddr_cpu->refcnt = 1;

spin_lock(&laddr_cpu_map_lock);
hlist_add_head_rcu(&laddr_cpu->hlist, &laddr_cpu_map[ntohl(laddr_cpu->addr) & LADDR_MASK]);
spin_unlock(&laddr_cpu_map_lock);
}

/*
* Add the local adress entry into the list
*/
Expand Down Expand Up @@ -1551,7 +1628,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
{
struct ip_vs_dest *dest, *nxt;
struct ip_vs_laddr *laddr, *laddr_next;
struct ip_vs_scheduler *old_sched;
struct ip_vs_scheduler *old_sched = NULL;
struct ip_vs_service *this_svc;
int cpu = 0;

Expand Down Expand Up @@ -4387,6 +4464,39 @@ static int __init alloc_svc_tab(void)

/* End of Generic Netlink interface definitions */

static int process_laddr_rps(struct sk_buff *skb)
{
struct iphdr *iph = (struct iphdr *)skb->data;
struct laddr_cpu_t *laddr_cpu;
int cpu = -1;

rcu_read_lock();
laddr_cpu = lookup_laddr_cpu(iph->daddr);
if (laddr_cpu)
cpu = laddr_cpu->cpuid;
rcu_read_unlock();

return cpu;
}

static struct netif_rps_entry tcp_laddr_rps_entry = {
.proto = IPPROTO_TCP,
.flags = RPS_CONTINUE,
.rps_process = process_laddr_rps,
.rps_init = NULL,
.rps_uninit = NULL,
.list = LIST_HEAD_INIT(tcp_laddr_rps_entry.list),
};

static struct netif_rps_entry udp_laddr_rps_entry = {
.proto = IPPROTO_UDP,
.flags = RPS_STOP,
.rps_process = process_laddr_rps,
.rps_init = NULL,
.rps_uninit = NULL,
.list = LIST_HEAD_INIT(udp_laddr_rps_entry.list),
};

int __init ip_vs_control_init(void)
{
int ret;
Expand Down Expand Up @@ -4447,6 +4557,12 @@ int __init ip_vs_control_init(void)
INIT_LIST_HEAD(&per_cpu(ip_vs_dest_trash_percpu, cpu));
}

for (idx = 0; idx < LADDR_CPU_HASH_SIZE; ++idx)
INIT_HLIST_HEAD(&laddr_cpu_map[idx]);

rps_register(&tcp_laddr_rps_entry);
rps_register(&udp_laddr_rps_entry);

LeaveFunction(2);
return 0;

Expand All @@ -4465,6 +4581,8 @@ int __init ip_vs_control_init(void)
void ip_vs_control_cleanup(void)
{
EnterFunction(2);
rps_unregister(&udp_laddr_rps_entry);
rps_unregister(&tcp_laddr_rps_entry);
ip_vs_trash_cleanup();
ip_vs_del_stats(ip_vs_stats);
unregister_sysctl_table(sysctl_header);
Expand All @@ -4475,5 +4593,7 @@ void ip_vs_control_cleanup(void)
ip_vs_genl_unregister();
nf_unregister_sockopt(&ip_vs_sockopts);
free_svc_tab();
/* Wait all rcu callback to complete */
rcu_barrier();
LeaveFunction(2);
}