openbmc dbus架构简析(二)
1.说明
以前看内核代码觉得难,是因为内核代码涉及到硬件原理与算法结构和层次递进的代码逻辑,现在的应用层因为业务的复杂与代码和内核的交互接口复杂,也变得有些难度了。
这篇文章是继:openbmc dbus架构简析的第二篇文章。
首先贴出来前篇文章的图,与简述内容:
* 1.inherit_fds()使用systemd机制获取到socket描述符(请先了解systemd的socket机制原理,会先接手socket服务,accept默认为no,因此需要sd_listen_fds()方法获取socket描述符)
* 2.父子进程通过socketpair(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, controller);中的controller[0]和controller[1]进行连接
* 3.sd_bus_set_fd(launcher->bus_controller, controller[0], controller[0]);使用该函数,实际bus->input_fd = input_fd;bus->output_fd = output_fd; 实际即为controller[0]
* 4.sd_bus_call(launcher->bus_controller, m, 0, NULL, NULL);发送socket数据实际依靠的是controller[0],而子进程使用的是controller[1],通道打通,数据可以传递给子进程
* 5.sd_bus_message_append(m, "oh","/org/bus1/DBus/Listener/0",launcher->fd_listen);把launcher->fd_listen 为总体的systemd的监听的socket描述符传递给了子进程。
2.分析
2.1 代码使用的描述符
2.1.1 dbus-broker-launch
使用的描述符
根据代码,列出来使用到的描述符:
static int run(void)
---> launcher_new()
---> r = launcher_open_log(launcher);
---> fd = socket(PF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0); //新建了描述符, fd=4
---> sd_event_default(&launcher->event);
--->...
---> e->epoll_fd = epoll_create1(EPOLL_CLOEXEC); //创建了描述, epoll_fd=5.
---> r = sd_event_add_signal(launcher->event, NULL, SIGTERM, NULL, NULL);
---> r = sd_event_add_signal(launcher->event, NULL, SIGTERM, NULL, NULL);
---> r = sd_event_add_signal(launcher->event, NULL, SIGTERM, NULL, NULL);
---> r = event_make_signal_data(e, sig, &d);
---> r = signalfd(d->fd >= 0 ? d->fd : -1,...) 使用了signalfd创建的fd=6
---> r = launcher_run(launcher);
---> r = launcher_parse_config(launcher, &root, &nss_cache);
---> r = dirwatch_new(&dirwatch);
---> dw->inotify_fd = inotify_init1(IN_CLOEXEC | IN_NONBLOCK); //新建fd=7
---> r = config_parser_read(&parser, rootp, configfile, nss_cache, dirwatch);
---> r = config_parser_include(parser, root, node, nss_cache, dirwatch);
---> _c_cleanup_(c_closep) int fd = -1;
---> r = open(node->include.file->path, O_RDONLY | O_CLOEXEC | O_NOCTTY);
---> ... //这里 fd=8, 实际需要调用close() ? 使用_c_cleanup_语法实际关闭了
---> r = socketpair(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0, controller);
---> ...返回了2个套接字 分别为 controller[0] = 8 和 controller[0] = 9
如下图,可以看到基本的文件描述符均为默认状态:
参考文档: https://www.jinbuguo.com/systemd/systemd.exec.html#, 描述了如下内容:
因此,fd=0,fd=1,fd=2
的结果如下:
因此,对于openbmc,常见的printf,fprintf(stderr…)均会输出到journal中。
- 对于
fd=3
继承于systemd
,接收外部的服务。 - 对于
fd=4
创建socket
,用来将数据传递到:/run/systemd/journal/socket
- 对于
fd=5
, 创建e->epoll_fd
- 对于
fd=6
,使用signalfd()
创建fd
- 对于
fd=7
,使用inotify_init1()
创建fd
- 对于
fd=8,fd=9
,使用socketpair()
创建fd
,用来父子进程数据通信
再回到开头的那副图,可以看看子进程与父进程之间,父进程通过sd-bus
接口调用将fd(systemd监控转储给dbus-broker的socket)
描述符传递给子进程。
继续分析文件:src\launch\launcher.c
:
static int launcher_add_listener(Launcher *launcher, Policy *policy,...)
---> r = sd_bus_message_new_method_call(launcher->bus_controller, &m,NULL,"/org/bus1/DBus/Broker","org.bus1.DBus.Broker","AddListener");
---> _cleanup_(sd_bus_message_unrefp) sd_bus_message *t = NULL;
---> r = sd_bus_message_new(bus, &t, SD_BUS_MESSAGE_METHOD_CALL);
---> t->header->endian = BUS_NATIVE_ENDIAN;
---> t->header->type = type;
---> t->header->version = bus->message_version;
---> t->allow_fds = bus->can_fds || !IN_SET(bus->state, BUS_HELLO, BUS_RUNNING);
---> r = sd_bus_message_append(m, "oh","/org/bus1/DBus/Listener/0",launcher->fd_listen);
---> r = policy_export(policy, m, system_console_users, n_system_console_users);
---> r = sd_bus_call(launcher->bus_controller, m, 0, NULL, NULL);
实际上,这里面的重点是函数:sd_bus_call()
,在文件:src\libsystemd\sd-bus\sd-bus.c
中:
int sd_bus_call(...)
---> bus_write_message()
---> bus_socket_write_message()
---> if (m->n_fds > 0 && *idx == 0)
---> mh.msg_controllen = CMSG_SPACE(sizeof(int) * m->n_fds);
---> mh.msg_control = alloca0(mh.msg_controllen);
---> control = CMSG_FIRSTHDR(&mh);
---> control->cmsg_len = CMSG_LEN(sizeof(int) * m->n_fds);
---> control->cmsg_level = SOL_SOCKET;
---> control->cmsg_type = SCM_RIGHTS;
---> memcpy(CMSG_DATA(control), m->fds, sizeof(int) * m->n_fds);
---> k = sendmsg(bus->output_fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
使用如上调用步骤传递文件描述符给另外一个进程,使用的方法是父子进程之间通过sendmsg()
调用。
继续查看函数sd_bus_message_append()
的调用关系:
int sd_bus_message_append(sd_bus_message *m, const char *types, ...)
---> r = sd_bus_message_appendv(m, types, ap); //type是字符串"oh"
---> switch (*t)
---> case SD_BUS_TYPE_UNIX_FD:
---> uint32_t x;
---> x = va_arg(ap, uint32_t);
---> r = sd_bus_message_append_basic(m, *t, &x);
---> u32 = m->n_fds;
---> case SD_BUS_TYPE_OBJECT_PATH:
---> const char *x;
---> x = va_arg(ap, const char*);
---> r = sd_bus_message_append_basic(m, *t, x);
这些sd-bus
类型有必要列出来,在文件:src\systemd\sd-bus-protocol.h
中:
enum {
_SD_BUS_TYPE_INVALID = 0,
SD_BUS_TYPE_BYTE = 'y',
SD_BUS_TYPE_BOOLEAN = 'b',
SD_BUS_TYPE_INT16 = 'n',
SD_BUS_TYPE_UINT16 = 'q',
SD_BUS_TYPE_INT32 = 'i',
SD_BUS_TYPE_UINT32 = 'u',
SD_BUS_TYPE_INT64 = 'x',
SD_BUS_TYPE_UINT64 = 't',
SD_BUS_TYPE_DOUBLE = 'd',
SD_BUS_TYPE_STRING = 's',
SD_BUS_TYPE_OBJECT_PATH = 'o',
SD_BUS_TYPE_SIGNATURE = 'g',
SD_BUS_TYPE_UNIX_FD = 'h',
SD_BUS_TYPE_ARRAY = 'a',
SD_BUS_TYPE_VARIANT = 'v',
SD_BUS_TYPE_STRUCT = 'r', /* not actually used in signatures */
SD_BUS_TYPE_STRUCT_BEGIN = '(',
SD_BUS_TYPE_STRUCT_END = ')',
SD_BUS_TYPE_DICT_ENTRY = 'e', /* not actually used in signatures */
SD_BUS_TYPE_DICT_ENTRY_BEGIN = '{',
SD_BUS_TYPE_DICT_ENTRY_END = '}'
};
在文件:src\broker\controller.c
中,函数:controller_init()
调用:
int controller_init(Controller *c, Broker *broker, int controller_fd)
---> r = connection_init_server(&controller->connection,
&broker->dispatcher,controller_dispatch_connection,
broker->bus.user,"0123456789abcdef",controller_fd);
其中,函数controller_dispatch_connection()
定义:
static int controller_dispatch_connection(DispatchFile *file)
---> r = connection_dispatch(&controller->connection, dispatch_file_events(file));
---> r = connection_dequeue(&controller->connection, &m);
---> r = socket_dequeue(&connection->socket, &message);
---> r = message_new_incoming(&message, socket->in.header);
---> r = message_parse_metadata(m);
---> r = message_parse_header(message, &message->metadata);
---> switch (field)
---> case DBUS_MESSAGE_FIELD_PATH:
---> case DBUS_MESSAGE_FIELD_INTERFACE:
---> case DBUS_MESSAGE_FIELD_MEMBER:
---> case DBUS_MESSAGE_FIELD_REPLY_SERIAL:
---> case DBUS_MESSAGE_FIELD_UNIX_FDS:
---> c_dvar_read(&v, "<u>)", c_dvar_type_u, &metadata->fields.unix_fds);
---> r = message_parse_body(message, &message->metadata);
---> r = controller_dbus_dispatch(controller, m);
---> switch (message->header->type) {
---> case DBUS_MESSAGE_TYPE_METHOD_CALL:
---> r = controller_dispatch_object(controller,
message_read_serial(message),message->metadata.fields.interface,
message->metadata.fields.member,message->metadata.fields.path,
message->metadata.fields.signature,message);
---> if (strcmp(path, "/org/bus1/DBus/Broker") == 0)
---> controller_dispatch_controller(controller, serial, member, path, signature, message);
---> controller_method_add_listener( ... )
---> listener_fd = fdlist_get(fds, fd_index);
---> r = controller_add_listener(controller, &listener, path, listener_fd, policy);
---> listener_init_with_fd(...)
---> listener->bus = bus;
---> dispatch_file_init(.., listener_dispatch,... )
这里面的判断:message->header->type
来自于src\launch\launcher.c
的sd_bus_message_new_method_call( )
最后,调用到函数listener_dispatch()
:
static int listener_dispatch(DispatchFile *file)
---> fd = accept4(listener->socket_fd, NULL, NULL, SOCK_CLOEXEC | SOCK_NONBLOCK);
2.1.2 socket takeover
需要首先查阅该篇文章: Zero Downtime Release: Disruption-free Load Balancing of a Multi-Billion User Website.
可参考文件src\util\log.c
中的函数log_fd_send()
:
static int log_fd_send(int destination_fd, int payload_fd)
---> control.cmsg.cmsg_level = SOL_SOCKET;
---> control.cmsg.cmsg_type = SCM_RIGHTS;
---> l = sendmsg(destination_fd, &msg, MSG_NOSIGNAL);
2.1.3 描述符附录
简单看一下应用程序使用的描述符:
root@evb-ast2500:~# ps | grep -i "dbus"
191 messageb 5836 S /usr/bin/dbus-broker-launch --scope system --audit
192 messageb 2756 S dbus-broker --log 4 --controller 9 --machine-id c47c0d3d042848a1908818ca62f0644e --max-bytes 536870912 --max-fds 4096 --max-matches 16384 --audit
344 root 2952 S grep -i dbus
root@evb-ast2500:~#
root@evb-ast2500:~# ls -al /proc/191/fd
dr-x------ 2 root root 13 Feb 27 17:54 .
dr-xr-xr-x 8 messageb messageb 0 Feb 27 17:54 ..
lr-x------ 1 root root 64 Feb 27 17:54 0 -> /dev/null
lrwx------ 1 root root 64 Feb 27 17:54 1 -> socket:[2242]
lrwx------ 1 root root 64 Feb 27 17:55 10 -> anon_inode:[pidfd]
lrwx------ 1 root root 64 Feb 27 17:55 11 -> anon_inode:[timerfd]
lrwx------ 1 root root 64 Feb 27 17:55 12 -> /memfd:dbus-broker-log (deleted)
lrwx------ 1 root root 64 Feb 27 17:54 2 -> socket:[2242]
lrwx------ 1 root root 64 Feb 27 17:54 3 -> socket:[2210]
lrwx------ 1 root root 64 Feb 27 17:54 4 -> socket:[2246]
lrwx------ 1 root root 64 Feb 27 17:55 5 -> anon_inode:[eventpoll]
lrwx------ 1 root root 64 Feb 27 17:55 6 -> anon_inode:[signalfd]
lr-x------ 1 root root 64 Feb 27 17:55 7 -> anon_inode:inotify
lrwx------ 1 root root 64 Feb 27 17:55 8 -> socket:[2254]
lrwx------ 1 root root 64 Feb 27 17:54 9 -> socket:[2303]
root@evb-ast2500:~#