【Linux内核剖析】深入分析inet_init的处理机制
inet_init
是 Linux 内核中用于初始化 TCP/IP 协议栈的函数。它在内核启动时被调用,完成各种协议和数据结构的注册和初始化。
主要功能:
- 注册 TCP、UDP、ICMP 等协议。
- 初始化 ARP、IP 和其他网络协议模块。
- 设置 socket 操作和协议处理。
前后调用关系链:
start_kernel()
└── rest_init()
└── kernel_init()
└── do_basic_setup()
└── do_initcalls()
└── inet_init()
├── proto_register(&tcp_prot, 1)
├── proto_register(&udp_prot, 1)
├── proto_register(&raw_prot, 1)
├── proto_register(&ping_prot, 1)
├── sock_register(&inet_family_ops)
├── inet_add_protocol(&icmp_protocol, IPPROTO_ICMP)
├── inet_add_protocol(&udp_protocol, IPPROTO_UDP)
├── inet_add_protocol(&tcp_protocol, IPPROTO_TCP)
├── arp_init()
├── ip_init()
├── tcp_v4_init()
├── udp_init()
├── ping_init()
├── icmp_init()
└── init_ipv4_mibs()
再来看源代码:
static int __init inet_init(void)
{
struct sk_buff *dummy_skb;
struct inet_protosw *q;
struct list_head *r;
int rc = -EINVAL;
BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
if (!sysctl_local_reserved_ports)
goto out;
rc = proto_register(&tcp_prot, 1);
if (rc)
goto out_free_reserved_ports;
rc = proto_register(&udp_prot, 1);
if (rc)
goto out_unregister_tcp_proto;
rc = proto_register(&raw_prot, 1);
if (rc)
goto out_unregister_udp_proto;
rc = proto_register(&ping_prot, 1);
if (rc)
goto out_unregister_raw_proto;
/*
* Tell SOCKET that we are alive...
*/
(void)sock_register(&inet_family_ops);
#ifdef CONFIG_SYSCTL
ip_static_sysctl_init();
#endif
/*
* Add all the base protocols.
*/
if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
#ifdef CONFIG_IP_MULTICAST
if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
#endif
/* Register the socket-side information for inet_create. */
for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
INIT_LIST_HEAD(r);
for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
inet_register_protosw(q);
/*
* Set the ARP module up
*/
arp_init();
/*
* Set the IP module up
*/
ip_init();
tcp_v4_init();
/* Setup TCP slab cache for open requests. */
tcp_init();
/* Setup UDP memory threshold */
udp_init();
/* Add UDP-Lite (RFC 3828) */
udplite4_register();
ping_init();
/*
* Set the ICMP layer up
*/
if (icmp_init() < 0)
panic("Failed to create the ICMP control socket.\n");
/*
* Initialise the multicast router
*/
#if defined(CONFIG_IP_MROUTE)
if (ip_mr_init())
printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
#endif
/*
* Initialise per-cpu ipv4 mibs
*/
if (init_ipv4_mibs())
printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");
ipv4_proc_init();
ipfrag_init();
dev_add_pack(&ip_packet_type);
rc = 0;
out:
return rc;
out_unregister_raw_proto:
proto_unregister(&raw_prot);
out_unregister_udp_proto:
proto_unregister(&udp_prot);
out_unregister_tcp_proto:
proto_unregister(&tcp_prot);
out_free_reserved_ports:
kfree(sysctl_local_reserved_ports);
goto out;
}
fs_initcall(inet_init);
调用路径
start_kernel()
:内核的入口函数,位于 init/main.c 中,完成内核的基本初始化工作。rest_init()
:在start_kernel()
中被调用,创建内核初始化线程 kernel_init。kernel_init()
:内核初始化线程的主函数,负责内核的后续初始化工作。do_basic_setup()
:在kernel_init()
中被调用,执行所有的初始化调用(initcall)。do_initcalls()
:在do_basic_setup()
中被调用,遍历所有的 initcall 函数,并依次执行它们。inet_init()
:作为一个 __initcall 函数被调用,用于初始化 TCP/IP 协议栈。
proto_register()
:用于注册不同的协议(TCP、UDP、RAW、PING),将它们添加到协议列表中,以便后续处理。
sock_register()
:注册 socket 操作,包括创建和管理 socket 的方法。
inet_add_protocol()
:将各个传输层协议(如 ICMP、UDP、TCP)添加到网络层,以便接收和处理数据包。
模块初始化函数(如 arp_init()
, ip_init()
, tcp_v4_init()
, 等):这些函数负责初始化各个网络模块,为后续的数据传输做好准备。
在看最后一行的处理函数
/**
* dev_add_pack - add packet handler
* @pt: packet type declaration
*
* Add a protocol handler to the networking stack. The passed &packet_type
* is linked into kernel lists and may not be freed until it has been
* removed from the kernel lists.
*
* This call does not sleep therefore it can not
* guarantee all CPU's that are in middle of receiving packets
* will see the new packet type (until the next received packet).
*/
void dev_add_pack(struct packet_type *pt)
{
struct list_head *head = ptype_head(pt);
spin_lock(&ptype_lock);
list_add_rcu(&pt->list, head);
spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(dev_add_pack);
dev_add_pack
是 Linux 内核中用于添加数据包处理程序的函数。它将一个协议处理程序(packet handler)注册到网络栈中,以便在接收到特定类型的数据包时能够正确处理这些数据包。
具体来说:
- 添加协议处理程序:将传入的
packet_type
结构体链接到内核的链表中,允许内核在接收到相应类型的数据包时调用相应的处理程序。struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ int (*func) (struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); struct sk_buff *(*gso_segment)(struct sk_buff *skb, u32 features); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb); void *af_packet_priv; struct list_head list; };
- 非阻塞操作:该函数在执行过程中不会导致线程睡眠,这意味着它可以在任何上下文中被调用,包括中断上下文。
这么做有什么好处呢?
- 网络协议处理:当网络设备接收到数据包时,内核会检查数据包类型,并调用相应的处理程序。通过
dev_add_pack()
注册的数据包处理程序会在接收到匹配的数据包时被触发。 - 动态协议支持:可以在运行时动态地添加新的协议处理程序,而不需要重启内核或修改内核代码。这使得内核能够灵活地支持多种网络协议和功能。
那么,它如何如何根据数据包的类型来找到对应的链表头呢?
/*******************************************************************************
Protocol management and registration routines
*******************************************************************************/
/*
* Add a protocol ID to the list. Now that the input handler is
* smarter we can dispense with all the messy stuff that used to be
* here.
*
* BEWARE!!! Protocol handlers, mangling input packets,
* MUST BE last in hash buckets and checking protocol handlers
* MUST start from promiscuous ptype_all chain in net_bh.
* It is true now, do not change it.
* Explanation follows: if protocol handler, mangling packet, will
* be the first on list, it is not able to sense, that packet
* is cloned and should be copied-on-write, so that it will
* change it and subsequent readers will get broken packet.
* --ANK (980803)
*/
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
return &ptype_all;
else
return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}