diff --git a/agent/src/ebpf/user/profile/perf_profiler.c b/agent/src/ebpf/user/profile/perf_profiler.c index 5814cc83c0f..9e6568e8dd0 100644 --- a/agent/src/ebpf/user/profile/perf_profiler.c +++ b/agent/src/ebpf/user/profile/perf_profiler.c @@ -709,24 +709,6 @@ void build_prog_jump_tables(struct bpf_tracer *tracer) PROG_DWARF_UNWIND_FOR_PE, PROG_DWARF_UNWIND_PE_IDX); insert_prog_to_map(tracer, MAP_CP_PROGS_JMP_PE_NAME, PROG_ONCPU_OUTPUT_FOR_PE, PROG_ONCPU_OUTPUT_PE_IDX); - insert_prog_to_map(tracer, MAP_CP_PROGS_JMP_PE_NAME, - PROG_PYTHON_UNWIND_FOR_PE, - PROG_PYTHON_UNWIND_PE_IDX); - insert_prog_to_map(tracer, MAP_CP_PROGS_JMP_PE_NAME, - PROG_LUA_UNWIND_FOR_PE, - PROG_LUA_UNWIND_PE_IDX); - insert_prog_to_map(tracer, MAP_CP_PROGS_JMP_PE_NAME, - PROG_PHP_UNWIND_FOR_PE, - PROG_PHP_UNWIND_PE_IDX); - insert_prog_to_map(tracer, MAP_CP_PROGS_JMP_PE_NAME, - PROG_V8_UNWIND_FOR_PE, - PROG_V8_UNWIND_PE_IDX); - insert_prog_to_map(tracer, MAP_CP_PROGS_JMP_PE_NAME, - PROG_DWARF_UNWIND_BEFORE_PHP_FOR_PE, - PROG_DWARF_UNWIND_BEFORE_PHP_PE_IDX); - insert_prog_to_map(tracer, MAP_CP_PROGS_JMP_PE_NAME, - PROG_DWARF_UNWIND_BEFORE_V8_FOR_PE, - PROG_DWARF_UNWIND_BEFORE_V8_PE_IDX); extended_prog_jump_tables(tracer); } diff --git a/server/agent_config/README-CH.md b/server/agent_config/README-CH.md index 6c557bd862c..e0910bc92e1 100644 --- a/server/agent_config/README-CH.md +++ b/server/agent_config/README-CH.md @@ -501,77 +501,6 @@ global: deepflow-agent 默认每 10 秒监控一次所设定的系统负载指标项。 -### 发送吞吐 {#global.circuit_breakers.tx_throughput} - -#### 触发阈值 {#global.circuit_breakers.tx_throughput.trigger_threshold} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`global.circuit_breakers.tx_throughput.trigger_threshold` - -Upgrade from old version: `max_tx_bandwidth` - -**默认值**: -```yaml -global: - circuit_breakers: - tx_throughput: - trigger_threshold: 0 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | Mbps | -| Range | [0, 100000] | - -**详细描述**: - -如果流量分发所用网络接口的出方向吞吐量达到或超出此阈值,deepflow-agent 停止流量分发; -如果该网络接口的出方向吞吐量连续 5 个监控周期低于`(trigger_threshold - -outputs.npb.max_tx_throughput)*90%`,deepflow-agent 恢复流量分发。 - -注意: -1. 取值为 0 时,该特性不生效; -2. 若取非 0 值,必须大于 `outputs.npb.max_tx_throughput`。 - -#### 吞吐监控间隔 {#global.circuit_breakers.tx_throughput.throughput_monitoring_interval} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`global.circuit_breakers.tx_throughput.throughput_monitoring_interval` - -Upgrade from old version: `bandwidth_probe_interval` - -**默认值**: -```yaml -global: - circuit_breakers: - tx_throughput: - throughput_monitoring_interval: 10s -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | duration | -| Range | ['1s', '60s'] | - -**详细描述**: - -deepflow-agent 对流量分发所使用网络接口的出方向吞吐量指标的监控周期。 - ### 空闲磁盘 {#global.circuit_breakers.free_disk} #### 百分比触发阈值 {#global.circuit_breakers.free_disk.percentage_trigger_threshold} @@ -2269,97 +2198,6 @@ inputs: - ebpf.profile.off_cpu(注意确认 `inputs.ebpf.profile.off_cpu.disabled` 已配置为 **false**) - ebpf.profile.memory(注意确认 `inputs.ebpf.profile.memory.disabled` 已配置为 **false**) -### 智能体治理 {#inputs.proc.ai_agent} - -#### HTTP 端点 {#inputs.proc.ai_agent.http_endpoints} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.proc.ai_agent.http_endpoints` - -**默认值**: -```yaml -inputs: - proc: - ai_agent: - http_endpoints: - - /v1/chat/completions - - /v1/embeddings - - /v1/responses -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -用于识别智能体的 HTTP 端点前缀,命中后会标记进程为 AI Agent。 - -#### 最大载荷大小 {#inputs.proc.ai_agent.max_payload_size} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.proc.ai_agent.max_payload_size` - -**默认值**: -```yaml -inputs: - proc: - ai_agent: - max_payload_size: 0 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [0, 2147483647] | - -**详细描述**: - -AI Agent 流重组最大载荷大小,0 表示不限。 - -#### 文件 IO 事件 {#inputs.proc.ai_agent.file_io_enabled} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.proc.ai_agent.file_io_enabled` - -**默认值**: -```yaml -inputs: - proc: - ai_agent: - file_io_enabled: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -是否开启 AI Agent 文件 IO 事件采集。 - ### 符号表 {#inputs.proc.symbol_table} #### Golang 特有 {#inputs.proc.symbol_table.golang_specific} @@ -2719,38 +2557,6 @@ inputs: Bond 网卡的从网卡列表。 -#### 需要采集的额外网络 Namespace {#inputs.cbpf.af_packet.extra_netns_regex} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.cbpf.af_packet.extra_netns_regex` - -Upgrade from old version: `extra_netns_regex` - -**默认值**: -```yaml -inputs: - cbpf: - af_packet: - extra_netns_regex: '' -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -除默认网络 namespace 之外,deepflow-agent 还会根据此参数正则匹配额外的网络 namespace, -在匹配命中的网络 namespace 中根据`inputs.cbpf.af_packet.interface_regex`正则匹配网络接口并采集流量。默认 -配置 `""` 表示仅采集默认网络 namesapce,不采集额外的网络 namespace 流量。 - #### 额外的 BPF 过滤器 {#inputs.cbpf.af_packet.extra_bpf_filter} **标签**: @@ -2807,39 +2613,6 @@ inputs: | ---- | ---------------------------- | | Type | string | -#### 物理网络镜像流量中的 VLAN PCP {#inputs.cbpf.af_packet.vlan_pcp_in_physical_mirror_traffic} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`inputs.cbpf.af_packet.vlan_pcp_in_physical_mirror_traffic` - -Upgrade from old version: `static_config.mirror-traffic-pcp` - -**默认值**: -```yaml -inputs: - cbpf: - af_packet: - vlan_pcp_in_physical_mirror_traffic: 0 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [0, 9] | - -**详细描述**: - -- 当此配置值小于等于 7 时,仅当 VLAN PCP 与该值匹配时,从 VLAN tag 中计算 TAP。 -- 当此配置值为 8 时,从外层 VLAN tag 中计算 TAP, -- 当此配置值为 9 时,从内层 VLAN tag 中计算 TAP。 - #### 禁用 BPF 过滤 {#inputs.cbpf.af_packet.bpf_filter_disabled} **标签**: @@ -3118,16 +2891,15 @@ inputs: #### DPDK {#inputs.cbpf.special_network.dpdk} -##### 数据源 {#inputs.cbpf.special_network.dpdk.source} +##### 乱序重排缓存时间窗口大小 {#inputs.cbpf.special_network.dpdk.reorder_cache_window_size} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.special_network.dpdk.source` +`inputs.cbpf.special_network.dpdk.reorder_cache_window_size` **默认值**: ```yaml @@ -3135,28 +2907,22 @@ inputs: cbpf: special_network: dpdk: - source: None + reorder_cache_window_size: 60ms ``` -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| None | | -| eBPF | | -| pdump | | - **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | duration | +| Range | ['60ms', '100ms'] | **详细描述**: -目前支持两种采集 DPDK 流量的方式,包括: -- pdump: 详情见 [https://dpdk-docs.readthedocs.io/en/latest/prog_guide/multi_proc_support.html](https://dpdk-docs.readthedocs.io/en/latest/prog_guide/multi_proc_support.html) -- eBPF: 使用 eBPF Uprobe 的方式获取 DPDK 流量,同时需要配置 `inputs.ebpf.socket.uprobe.dpdk` +当 `inputs.cbpf.special_network.dpdk.source` 为 eBPF 时该配置生效,时间窗口变大会导致 agent 占用更多的内存。 -##### 乱序重排缓存时间窗口大小 {#inputs.cbpf.special_network.dpdk.reorder_cache_window_size} +### 调优 {#inputs.cbpf.tunning} + +#### 启用 Dispatcher 队列 {#inputs.cbpf.tunning.dispatcher_queue_enabled} **标签**: @@ -3164,165 +2930,173 @@ inputs: **FQCN**: -`inputs.cbpf.special_network.dpdk.reorder_cache_window_size` +`inputs.cbpf.tunning.dispatcher_queue_enabled` + +Upgrade from old version: `static_config.dispatcher-queue` **默认值**: ```yaml inputs: cbpf: - special_network: - dpdk: - reorder_cache_window_size: 60ms + tunning: + dispatcher_queue_enabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['60ms', '100ms'] | +| Type | bool | **详细描述**: -当 `inputs.cbpf.special_network.dpdk.source` 为 eBPF 时该配置生效,时间窗口变大会导致 agent 占用更多的内存。 +当 `inputs.cbpf.common.capture_mode` 为`本地流量`或`虚拟网络镜像`时该配置生效。 -#### Libpcap {#inputs.cbpf.special_network.libpcap} +对所有流量采集方式都可用。 -##### Enabled {#inputs.cbpf.special_network.libpcap.enabled} +#### 最大采集包长 {#inputs.cbpf.tunning.max_capture_packet_size} **标签**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.special_network.libpcap.enabled` +`inputs.cbpf.tunning.max_capture_packet_size` -Upgrade from old version: `static_config.libpcap-enabled` +Upgrade from old version: `capture_packet_size` **默认值**: ```yaml inputs: cbpf: - special_network: - libpcap: - enabled: false + tunning: + max_capture_packet_size: 65535 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Unit | byte | +| Range | [128, 65535] | **详细描述**: -libpcap 的启动开关,该参数在 Windows 系统中默认开启,在 Linux 系统中默认关闭。libcap 在 Windows -和 Linux 系统中均支持,但在多接口的环境中流量采集性能较低。 - -#### vHost User {#inputs.cbpf.special_network.vhost_user} +该参数配置对 DPDK 环境无效。 -##### vHost Socket Path {#inputs.cbpf.special_network.vhost_user.vhost_socket_path} +#### 最大采集 PPS {#inputs.cbpf.tunning.max_capture_pps} **标签**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.special_network.vhost_user.vhost_socket_path` +`inputs.cbpf.tunning.max_capture_pps` -Upgrade from old version: `static_config.vhost-socket-path` +Upgrade from old version: `max_collect_pps` **默认值**: ```yaml inputs: cbpf: - special_network: - vhost_user: - vhost_socket_path: '' + tunning: + max_capture_pps: 1048576 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Unit | pps | +| Range | [1, 10000000] | **详细描述**: -支持在 Linux 环境中以虚拟网络镜像模式运行。 +deepflow-agent 采集数据包的速率上限。 -#### 物理交换机 {#inputs.cbpf.special_network.physical_switch} +### 预处理 {#inputs.cbpf.preprocess} -##### sFlow 接收端口号 {#inputs.cbpf.special_network.physical_switch.sflow_ports} +#### 隧道解封装协议 {#inputs.cbpf.preprocess.tunnel_decap_protocols} **标签**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.special_network.physical_switch.sflow_ports` +`inputs.cbpf.preprocess.tunnel_decap_protocols` -Upgrade from old version: `static_config.xflow-collector.sflow-ports` +Upgrade from old version: `decap_type` **默认值**: ```yaml inputs: cbpf: - special_network: - physical_switch: - sflow_ports: [] + preprocess: + tunnel_decap_protocols: + - 1 + - 2 ``` +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| 1 | VXLAN | +| 2 | IPIP | +| 3 | GRE | +| 4 | Geneve | +| 5 | VXLAN-NSH | + **模式**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [1, 65535] | **详细描述**: -配置 sFlow 的接收端口号,默认值`[]`表示不采集 sFlow 数据。通常 sFlow 使用 6343 端口。 -注意,该特性仅对企业版的 Trident 有效。 +deepflow-agent 需要对数据包解封装的隧道协议,仅企业版本支持解析 GRE 和 VXLAN-NSH。 -##### NetFlow 接收端口号 {#inputs.cbpf.special_network.physical_switch.netflow_ports} +#### 隧道头剥离协议 {#inputs.cbpf.preprocess.tunnel_trim_protocols} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.special_network.physical_switch.netflow_ports` +`inputs.cbpf.preprocess.tunnel_trim_protocols` -Upgrade from old version: `static_config.xflow-collector.netflow-ports` +Upgrade from old version: `static_config.trim-tunnel-types` **默认值**: ```yaml inputs: cbpf: - special_network: - physical_switch: - netflow_ports: [] + preprocess: + tunnel_trim_protocols: [] ``` -**模式**: -| Key | Value | +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| ERSPAN | | +| VXLAN | | +| TEB | | + +**模式**: +| Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [1, 65535] | +| Type | string | **详细描述**: -配置 NetFlow 的接收端口号,默认值`[]`表示不采集 NetFlow 数据。通常 sFlow 使用 2055 端口。 -注意,该特性仅对企业版的 Trident 有效,且目前仅支持 NetFlow v5 协议。 +流量镜像(虚拟或物理)模式下,deepflow-agent 需要剥离的隧道头协议类型。 +仅企业版支持解析 ERSPAN 和 TEB。 -### 调优 {#inputs.cbpf.tunning} +## eBPF {#inputs.ebpf} -#### 启用 Dispatcher 队列 {#inputs.cbpf.tunning.dispatcher_queue_enabled} +### Disabled {#inputs.ebpf.disabled} **标签**: @@ -3330,16 +3104,15 @@ inputs: **FQCN**: -`inputs.cbpf.tunning.dispatcher_queue_enabled` +`inputs.ebpf.disabled` -Upgrade from old version: `static_config.dispatcher-queue` +Upgrade from old version: `static_config.ebpf.disabled` **默认值**: ```yaml inputs: - cbpf: - tunning: - dispatcher_queue_enabled: false + ebpf: + disabled: false ``` **模式**: @@ -3349,187 +3122,193 @@ inputs: **详细描述**: -当 `inputs.cbpf.common.capture_mode` 为`本地流量`或`虚拟网络镜像`时该配置生效。 +eBPF 特性的总开关。 -对所有流量采集方式都可用。 +### Socket {#inputs.ebpf.socket} -#### 最大采集包长 {#inputs.cbpf.tunning.max_capture_packet_size} +#### Uprobe {#inputs.ebpf.socket.uprobe} + +##### Golang {#inputs.ebpf.socket.uprobe.golang} + +###### Enabled {#inputs.ebpf.socket.uprobe.golang.enabled} **标签**: -`hot_update` +agent_restart **FQCN**: -`inputs.cbpf.tunning.max_capture_packet_size` +`inputs.ebpf.socket.uprobe.golang.enabled` -Upgrade from old version: `capture_packet_size` +Upgrade from old version: `static_config.ebpf.uprobe-golang-trace-enabled, static_config.ebpf.uprobe-process-name-regexs.golang` **默认值**: ```yaml inputs: - cbpf: - tunning: - max_capture_packet_size: 65535 + ebpf: + socket: + uprobe: + golang: + enabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [128, 65535] | +| Type | bool | **详细描述**: -该参数配置对 DPDK 环境无效。 +Golang 程序 HTTP2/HTTPS 协议数据采集及零侵扰追踪特性的开启开关。 + +注意:开启此功能时,需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, +即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.socket.uprobe.golang`。 -#### 裸包缓冲区 Block 大小 {#inputs.cbpf.tunning.raw_packet_buffer_block_size} +###### 追踪超时时间 {#inputs.ebpf.socket.uprobe.golang.tracing_timeout} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.tunning.raw_packet_buffer_block_size` +`inputs.ebpf.socket.uprobe.golang.tracing_timeout` -Upgrade from old version: `static_config.analyzer-raw-packet-block-size` +Upgrade from old version: `static_config.ebpf.go-tracing-timeout` **默认值**: ```yaml inputs: - cbpf: - tunning: - raw_packet_buffer_block_size: 65536 + ebpf: + socket: + uprobe: + golang: + tracing_timeout: 120s ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [65536, 16000000] | +| Type | duration | +| Range | ['0ns', '1d'] | **详细描述**: -Analyzer 模式下采集到的包进入队列前需要分配内存暂存。为避免每个包进行内存申请,每次开辟 -raw_packet_buffer_block_size 大小的内存块给数个包使用。 -更大的配置可以减少内存分配,但会延迟内存释放。 -该配置对以下采集模式(`inputs.cbpf.common.capture_mode`)生效: -- analyzer 模式 -- local 模式,且 `inputs.cbpf.af_packet.inner_interface_capture_enabled` = true -- local 模式,且 `inputs.cbpf.tunning.dispatcher_queue_enabled` = true -- mirror 模式,且 `inputs.cbpf.tunning.dispatcher_queue_enabled` = true +Golang 程序追踪时请求与响应之间的最大时间间隔,设置为 '0ns' 时,Golang 程序的零侵扰追踪特性自动关闭。 + +##### TLS {#inputs.ebpf.socket.uprobe.tls} -#### 裸包队列大小 {#inputs.cbpf.tunning.raw_packet_queue_size} +###### Enabled {#inputs.ebpf.socket.uprobe.tls.enabled} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.tunning.raw_packet_queue_size` +`inputs.ebpf.socket.uprobe.tls.enabled` -Upgrade from old version: `static_config.analyzer-queue-size` +Upgrade from old version: `static_config.ebpf.uprobe-openssl-trace-enabled, static_config.ebpf.uprobe-process-name-regexs.openssl` **默认值**: ```yaml inputs: - cbpf: - tunning: - raw_packet_queue_size: 131072 + ebpf: + socket: + uprobe: + tls: + enabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [65536, 64000000] | +| Type | bool | **详细描述**: -以下队列的长度(仅在 `inputs.cbpf.common.capture_mode` = `物理网络镜像`时有效): -- 0.1-bytes-to-parse -- 0.2-packet-to-flowgenerator -- 0.3-packet-to-pipeline +是否启用使用 openssl 库的进程以支持 HTTPS 协议数据采集。 -#### 最大采集 PPS {#inputs.cbpf.tunning.max_capture_pps} +可通过以下方式判断应用进程是否能够使用 `Uprobe hook openssl 库`来采集加密数据: +- 执行命令`sudo cat /proc//maps | grep "libssl.so"`,若包含 openssl 相关信息 + 则说明该进程正在使用 openssl 库。 +- 如果上面没有搜到 "libssl.so" 也可能是静态编译了,这时候我们可以通过下面方式确认: + 执行命令 `sudo nm /proc//exe | grep SSL_write` 若包含 `SSL_write` 相关信息如:`0000000000502ac0 T SSL_write` + 则说明该进程正在使用静态编译的 openssl 库。 + +启用后,deepflow-agent 将获取符合正则表达式匹配的进程信息,并 Hook openssl 库的相应加解密接口。 +在日志中您会看到类似如下信息: +``` +[eBPF] INFO openssl uprobe, pid:1005, path:/proc/1005/root/usr/lib64/libssl.so.1.0.2k +或者 +[eBPF] INFO openssl uprobe, pid:28890, path:/proc/28890/root/usr/sbin/nginx +``` + +注意:开启此功能后,Envoy mTLS 流量可自动完成追踪; +若为非 Envoy 流量,则需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, +即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.socket.uprobe.tls`。 + +#### Kprobe {#inputs.ebpf.socket.kprobe} + +##### 禁用 kprobe {#inputs.ebpf.socket.kprobe.disabled} **标签**: -`hot_update` +agent_restart **FQCN**: -`inputs.cbpf.tunning.max_capture_pps` - -Upgrade from old version: `max_collect_pps` +`inputs.ebpf.socket.kprobe.disabled` **默认值**: ```yaml inputs: - cbpf: - tunning: - max_capture_pps: 1048576 + ebpf: + socket: + kprobe: + disabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Unit | pps | -| Range | [1, 10000000] | +| Type | bool | **详细描述**: -deepflow-agent 采集数据包的速率上限。 - -### 预处理 {#inputs.cbpf.preprocess} +当设置为 true 时,kprobe 功能将被禁用。 -#### 隧道解封装协议 {#inputs.cbpf.preprocess.tunnel_decap_protocols} +##### 启用 Unix Socket 追踪 {#inputs.ebpf.socket.kprobe.enable_unix_socket} **标签**: -`hot_update` +agent_restart **FQCN**: -`inputs.cbpf.preprocess.tunnel_decap_protocols` - -Upgrade from old version: `decap_type` +`inputs.ebpf.socket.kprobe.enable_unix_socket` **默认值**: ```yaml inputs: - cbpf: - preprocess: - tunnel_decap_protocols: - - 1 - - 2 + ebpf: + socket: + kprobe: + enable_unix_socket: false ``` -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| 1 | VXLAN | -| 2 | IPIP | -| 3 | GRE | -| 4 | Geneve | -| 5 | VXLAN-NSH | - **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | +| Type | bool | **详细描述**: -deepflow-agent 需要对数据包解封装的隧道协议,仅企业版本支持解析 GRE 和 VXLAN-NSH。 +当设置为 true 时,启用 Unix Socket 追踪。 -#### 隧道头剥离协议 {#inputs.cbpf.preprocess.tunnel_trim_protocols} +##### 黑名单 {#inputs.ebpf.socket.kprobe.blacklist} + +###### 端口号 {#inputs.ebpf.socket.kprobe.blacklist.ports} **标签**: @@ -3537,25 +3316,20 @@ deepflow-agent 需要对数据包解封装的隧道协议,仅企业版本支 **FQCN**: -`inputs.cbpf.preprocess.tunnel_trim_protocols` +`inputs.ebpf.socket.kprobe.blacklist.ports` -Upgrade from old version: `static_config.trim-tunnel-types` +Upgrade from old version: `static_config.ebpf.kprobe-blacklist.port-list` **默认值**: ```yaml inputs: - cbpf: - preprocess: - tunnel_trim_protocols: [] + ebpf: + socket: + kprobe: + blacklist: + ports: '' ``` -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| ERSPAN | | -| VXLAN | | -| TEB | | - **模式**: | Key | Value | | ---- | ---------------------------- | @@ -3563,28 +3337,33 @@ inputs: **详细描述**: -流量镜像(虚拟或物理)模式下,deepflow-agent 需要剥离的隧道头协议类型。 -仅企业版支持解析 ERSPAN 和 TEB。 +TCP 和 UDP 的端口黑名单列表。端口号列入黑名单的 socket 将被 Kprobe 采集忽略。黑名单 +生效优先级高于 kprobe 白名单。 + +配置样例: `ports: 80,1000-2000` + +##### 白名单 {#inputs.ebpf.socket.kprobe.whitelist} -#### TCP分段重组端口 {#inputs.cbpf.preprocess.packet_segmentation_reassembly} +###### 白名单 {#inputs.ebpf.socket.kprobe.whitelist.ports} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.preprocess.packet_segmentation_reassembly` +`inputs.ebpf.socket.kprobe.whitelist.ports` -Upgrade from old version: `static_config.packet-segmentation-reassembly` +Upgrade from old version: `static_config.ebpf.kprobe-whitelist.port-list` **默认值**: ```yaml inputs: - cbpf: - preprocess: - packet_segmentation_reassembly: [] + ebpf: + socket: + kprobe: + whitelist: + ports: '' ``` **模式**: @@ -3594,119 +3373,114 @@ inputs: **详细描述**: -对指定端口的流,相邻的两个TCP分段 Packet 聚合在一起解析应用日志 +TCP 和 UDP 的端口白名单列表,白名单生效优先级低于 kprobe 黑名单。 +未列入黑名单、白名单的端口用 kprobe 做采集。 -配置示例: +配置样例: `ports: 80,1000-2000` -packet_segmentation_reassembly: -- 1000 -- 2000-2010 -- 5000 +#### SockOps {#inputs.ebpf.socket.sock_ops} -### 物理网络流量镜像 {#inputs.cbpf.physical_mirror} +##### TCP Option Trace {#inputs.ebpf.socket.sock_ops.tcp_option_trace} -#### 默认采集网络类型 {#inputs.cbpf.physical_mirror.default_capture_network_type} +###### TCP Option 注入 {#inputs.ebpf.socket.sock_ops.tcp_option_trace.enabled} **标签**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.physical_mirror.default_capture_network_type` - -Upgrade from old version: `static_config.default-tap-type` +`inputs.ebpf.socket.sock_ops.tcp_option_trace.enabled` **默认值**: ```yaml inputs: - cbpf: - physical_mirror: - default_capture_network_type: 3 + ebpf: + socket: + sock_ops: + tcp_option_trace: + enabled: false ``` -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| 3 | 云网络 | -| _DYNAMIC_OPTIONS_ | _DYNAMIC_OPTIONS_ | - **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | +| Type | bool | **详细描述**: -在 `inputs.cbpf.common.capture_mode` 为`物理网络镜像`模式下,deepflow-agent 通过镜像流量的外层 VLAN 标签识别并标记采集数据的 -TAP(Traffic Access Point)值。当流量外层 VLAN 标签没有对应的 TAP 值,或 VLAN pcp 值与 -`inputs.cbpf.af_packet.vlan_pcp_in_physical_mirror_traffic` 的配置不一致时,deepflow-agent 使用本参数值 -标记数据的 TAP 值。 +是否开启 TCP Option Tracing SockOps 程序,用于在满足条件的 TCP 连接上注入 DeepFlow 元数据(如进程 PID)。 +注意:该功能依赖 cgroup v2(统一层级)和内核版本 > 5.10。在 cgroup v1 主机上 SockOps 绑定会失败. +兼容性:已在 x86 上验证内核 > 5.10;arm 目前仅在 6.8 内核上测试。 +限制:PID 跟踪依赖per-CPU syscall map。CPU 拥堵、软中断可能在不同 CPU 运行时,注入的元数据可能缺失或过期。 -#### 禁用 Packet 去重 {#inputs.cbpf.physical_mirror.packet_dedup_disabled} +###### PID 注入窗口 {#inputs.ebpf.socket.sock_ops.tcp_option_trace.sampling_window_bytes} **标签**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.physical_mirror.packet_dedup_disabled` - -Upgrade from old version: `static_config.analyzer-dedup-disabled` +`inputs.ebpf.socket.sock_ops.tcp_option_trace.sampling_window_bytes` **默认值**: ```yaml inputs: - cbpf: - physical_mirror: - packet_dedup_disabled: false + ebpf: + socket: + sock_ops: + tcp_option_trace: + sampling_window_bytes: 16384 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Unit | Bytes | +| Range | [0, 1048576] | **详细描述**: -当 `inputs.cbpf.common.capture_mode` 为`物理网络镜像`模式, 该参数配置为 `true` 时,deepflow-agent 将不对数据包做去重处理。 +控制 PID 注入之间的最小 TCP 负载间隔字节数。缺省为 16KB,与历史行为一致;值越小注入越频繁,值越大越稀疏。 +设置为0关闭采样窗口功能,对所有数据包注入。 + +#### 调优 {#inputs.ebpf.socket.tunning} -#### 专有云网关流量 {#inputs.cbpf.physical_mirror.private_cloud_gateway_traffic} +##### 最大采集速率 {#inputs.ebpf.socket.tunning.max_capture_rate} **标签**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.physical_mirror.private_cloud_gateway_traffic` +`inputs.ebpf.socket.tunning.max_capture_rate` -Upgrade from old version: `static_config.cloud-gateway-traffic` +Upgrade from old version: `static_config.ebpf.global-ebpf-pps-threshold` **默认值**: ```yaml inputs: - cbpf: - physical_mirror: - private_cloud_gateway_traffic: false + ebpf: + socket: + tunning: + max_capture_rate: 0 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Unit | Per Second | +| Range | [0, 64000000] | **详细描述**: -当 `inputs.cbpf.common.capture_mode` 为 `物理网络镜像` 模式,该参数配置为 `true` 时,deepflow-agent 会将流量识别为 NFVGW 流量。 - -## eBPF {#inputs.ebpf} +eBPF 数据的最大采集速率,设置为 `0` 表示不对 deepflow-agent 的 eBPF 数据采集速率做限制。 -### Disabled {#inputs.ebpf.disabled} +##### 禁用 syscall_trace_id 相关的计算 {#inputs.ebpf.socket.tunning.syscall_trace_id_disabled} **标签**: @@ -3714,15 +3488,15 @@ inputs: **FQCN**: -`inputs.ebpf.disabled` - -Upgrade from old version: `static_config.ebpf.disabled` +`inputs.ebpf.socket.tunning.syscall_trace_id_disabled` **默认值**: ```yaml inputs: ebpf: - disabled: false + socket: + tunning: + syscall_trace_id_disabled: false ``` **模式**: @@ -3732,15 +3506,10 @@ inputs: **详细描述**: -eBPF 特性的总开关。 - -### Socket {#inputs.ebpf.socket} - -#### Uprobe {#inputs.ebpf.socket.uprobe} - -##### Golang {#inputs.ebpf.socket.uprobe.golang} +当 trace_id 注入所有请求时,所有请求的 syscall_trace_id 计算逻辑可以关闭。这将大大减少 +eBPF hook 进程的 CPU 消耗。 -###### Enabled {#inputs.ebpf.socket.uprobe.golang.enabled} +##### 禁用预分配内存 {#inputs.ebpf.socket.tunning.map_prealloc_disabled} **标签**: @@ -3748,18 +3517,17 @@ eBPF 特性的总开关。 **FQCN**: -`inputs.ebpf.socket.uprobe.golang.enabled` +`inputs.ebpf.socket.tunning.map_prealloc_disabled` -Upgrade from old version: `static_config.ebpf.uprobe-golang-trace-enabled, static_config.ebpf.uprobe-process-name-regexs.golang` +Upgrade from old version: `static_config.ebpf.map-prealloc-disabled` **默认值**: ```yaml inputs: ebpf: socket: - uprobe: - golang: - enabled: false + tunning: + map_prealloc_disabled: false ``` **模式**: @@ -3769,12 +3537,11 @@ inputs: **详细描述**: -Golang 程序 HTTP2/HTTPS 协议数据采集及零侵扰追踪特性的开启开关。 - -注意:开启此功能时,需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, -即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.socket.uprobe.golang`。 +当完整的map预分配过于昂贵时,将此配置设置为 `true` 可以防止在定义map时进行内存预分配, +但这可能会导致一些性能下降。此配置仅适用于 `BPF_MAP_TYPE_HASH` 类型的 bpf map。 +目前适用于 socket trace 和 uprobe Golang/OpenSSL trace 功能。禁用内存预分配大约会减少45M的内存占用。 -###### 追踪超时时间 {#inputs.ebpf.socket.uprobe.golang.tracing_timeout} +##### Socket Hook Syscall 列表 {#inputs.ebpf.socket.tunning.hooked_socket_syscalls} **标签**: @@ -3782,33 +3549,60 @@ Golang 程序 HTTP2/HTTPS 协议数据采集及零侵扰追踪特性的开启开 **FQCN**: -`inputs.ebpf.socket.uprobe.golang.tracing_timeout` - -Upgrade from old version: `static_config.ebpf.go-tracing-timeout` +`inputs.ebpf.socket.tunning.hooked_socket_syscalls` **默认值**: ```yaml inputs: ebpf: socket: - uprobe: - golang: - tracing_timeout: 120s + tunning: + hooked_socket_syscalls: + - read + - readv + - recvfrom + - recvmsg + - recvmmsg + - sendmsg + - sendmmsg + - sendto + - write + - writev ``` +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| read | | +| readv | | +| recvfrom | | +| recvmsg | | +| recvmmsg | | +| sendmsg | | +| sendmmsg | | +| sendto | | +| write | | +| writev | | + **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['0ns', '1d'] | +| Type | string | **详细描述**: -Golang 程序追踪时请求与响应之间的最大时间间隔,设置为 '0ns' 时,Golang 程序的零侵扰追踪特性自动关闭。 +控制为哪些受支持的 socket syscall 安装 eBPF hook。 -##### TLS {#inputs.ebpf.socket.uprobe.tls} +该列表只控制是否 hook 某个 syscall,不控制具体使用哪种 backend。每个启用的 +syscall 仍然遵循当前运行模式下既有的 backend 选择逻辑。例如 mixed 模式继续保留 +现有的 hybrid 与 tracepoint-only 分工,pure-kprobe 模式继续保留既有的 kprobe +行为,kfunc 模式继续保留既有的 kfunc 行为,以及 `recvfrom` 和 `recvmmsg` +的 tracepoint fallback。 -###### Enabled {#inputs.ebpf.socket.uprobe.tls.enabled} +支持的配置值:`read`、`readv`、`recvfrom`、`recvmsg`、`recvmmsg`、`sendmsg`、 +`sendmmsg`、`sendto`、`write`、`writev`。 + +##### 启用fentry/fexit特性 {#inputs.ebpf.socket.tunning.fentry_enabled} **标签**: @@ -3816,18 +3610,15 @@ Golang 程序追踪时请求与响应之间的最大时间间隔,设置为 '0n **FQCN**: -`inputs.ebpf.socket.uprobe.tls.enabled` - -Upgrade from old version: `static_config.ebpf.uprobe-openssl-trace-enabled, static_config.ebpf.uprobe-process-name-regexs.openssl` +`inputs.ebpf.socket.tunning.fentry_enabled` **默认值**: ```yaml inputs: ebpf: socket: - uprobe: - tls: - enabled: false + tunning: + fentry_enabled: false ``` **模式**: @@ -3837,204 +3628,150 @@ inputs: **详细描述**: -是否启用使用 openssl 库的进程以支持 HTTPS 协议数据采集。 - -可通过以下方式判断应用进程是否能够使用 `Uprobe hook openssl 库`来采集加密数据: -- 执行命令`sudo cat /proc//maps | grep "libssl.so"`,若包含 openssl 相关信息 - 则说明该进程正在使用 openssl 库。 -- 如果上面没有搜到 "libssl.so" 也可能是静态编译了,这时候我们可以通过下面方式确认: - 执行命令 `sudo nm /proc//exe | grep SSL_write` 若包含 `SSL_write` 相关信息如:`0000000000502ac0 T SSL_write` - 则说明该进程正在使用静态编译的 openssl 库。 - -启用后,deepflow-agent 将获取符合正则表达式匹配的进程信息,并 Hook openssl 库的相应加解密接口。 -在日志中您会看到类似如下信息: -``` -[eBPF] INFO openssl uprobe, pid:1005, path:/proc/1005/root/usr/lib64/libssl.so.1.0.2k -或者 -[eBPF] INFO openssl uprobe, pid:28890, path:/proc/28890/root/usr/sbin/nginx -``` +使用 fentry/fexit 特性说明 +- 相比传统的 kprobes,fentry 和 fexit 程序提供了更高的性能和可用性,可带来约 5%–10% 的性能提升。 +- 部分 Linux 内核对该特性支持不够完善,可能导致内核 BUG 和节点崩溃。已知的 BUG 修复包括: + - TencentOS Linux kernel 5.4.119 的修复 + [https://github.com/torvalds/linux/commit/c3d6324f841bab2403be6419986e2b1d1068d423](https://github.com/torvalds/linux/commit/c3d6324f841bab2403be6419986e2b1d1068d423) + - Alibaba Cloud Linux kernel 5.10.23 的修复 + [https://github.com/gregkh/linux/commit/e21d2b92354b3cd25dd774ebb0f0e52ff04a7861](https://github.com/gregkh/linux/commit/e21d2b92354b3cd25dd774ebb0f0e52ff04a7861) +- 内核建议:若要启用 fentry/fexit 特性,推荐使用 Linux kernel 5.10.28 及以上版本,以确保稳定性和性能。 -注意:开启此功能后,Envoy mTLS 流量可自动完成追踪; -若为非 Envoy 流量,则需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, -即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.socket.uprobe.tls`。 +### File {#inputs.ebpf.file} -##### DPDK {#inputs.ebpf.socket.uprobe.dpdk} +#### IO 事件 {#inputs.ebpf.file.io_event} -###### DPDK 应用命令名称 {#inputs.ebpf.socket.uprobe.dpdk.command} +##### 采集模式 {#inputs.ebpf.file.io_event.collect_mode} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.uprobe.dpdk.command` +`inputs.ebpf.file.io_event.collect_mode` + +Upgrade from old version: `static_config.ebpf.io-event-collect-mode` **默认值**: ```yaml inputs: ebpf: - socket: - uprobe: - dpdk: - command: '' + file: + io_event: + collect_mode: 1 ``` +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| 0 | 禁用 | +| 1 | 调用生命周期 | +| 2 | 全部 | + **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | **详细描述**: -设置 DPDK 应用的命令名称, eBPF 会自动寻找并进行追踪采集数据包 - -配置样例: 如果命令行是 `/usr/bin/mydpdk`, 可以配置成 `command: mydpdk`, 并设置 `inputs.cbpf.special_network.dpdk.source = eBPF` +采集模式: +- 禁用:不采集任何文件 IO 事件。 +- 调用生命周期:仅采集调用生命周期内的文件 IO 事件。 +- 全部:采集所有的文件 IO 事件。 -在 DPDK 作为 vhost-user 后端的场景中,虚拟机与 DPDK 应用之间通过 virtqueue(vring)进行数据交换。 -eBPF 可以在无需修改 DPDK 或虚拟机的前提下,自动 hook 到 vring 接口,实现对传输数据包的捕获和分析, -无需额外配置即可实现流量可观测。相比之下,若要捕获物理网卡上的数据包,则需要配合 DPDK 的驱动接口进行显式配置。 +说明: +- 为了获取文件的完整路径,需要结合进程的挂载信息进行路径拼接。然而,一些进程在完成任务后会迅速退出, + 此时我们处理其产生的文件读写数据时,可能已无法从 /proc/[pid]/mountinfo 中获取挂载信息,导致路径不 + 完整(缺少挂载点)。我们对于 50ms 以下生存期的进程,文件路径会缺少挂载点信息。对于长期运行的进程, + 则不存在该问题。 -###### DPDK 应用数据包接收 hook 点设置 {#inputs.ebpf.socket.uprobe.dpdk.rx_hooks} +##### 最小耗时 {#inputs.ebpf.file.io_event.minimal_duration} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.uprobe.dpdk.rx_hooks` +`inputs.ebpf.file.io_event.minimal_duration` + +Upgrade from old version: `static_config.ebpf.io-event-minimal-duration` **默认值**: ```yaml inputs: ebpf: - socket: - uprobe: - dpdk: - rx_hooks: [] + file: + io_event: + minimal_duration: 1ms ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | duration | +| Range | ['1ns', '1s'] | **详细描述**: -根据实际的网卡驱动填写合适的数据包接收 hook 点,可以利用命令 'lspci -vmmk' 寻找网卡驱动类型例如: -``` -Slot: 04:00.0 -Class: Ethernet controller -Vendor: Intel Corporation -Device: Ethernet Controller XL710 for 40GbE QSFP+ -SVendor: Unknown vendor 1e18 -SDevice: Device 4712 -Rev: 02 -Driver: igb_uio -Module: i40e -``` -上面的 "Driver: igb_uio" 说明是 DPDP 纳管的设备 (除此之外还有 "vfio-pci", "uio_pci_generic" -也被 DPDK 纳管), 真实驱动是 'i40e' (从 'Module: i40e' 得到) - -可以使用 deepflow 提供的可持续剖析功能对 DPDK 应用做函数剖析查看具体接口名字,也可以使用 perf 命令 -在agent所在节点上运行 `perf record -F97 -a -g -p -- sleep 30`, -`perf script | grep -E 'recv|xmit|rx|tx' | grep ` (`drive_name` may be `ixgbe/i40e/mlx5`) -来确认驱动接口。 - -下面列出了不同驱动对应的接口名称,仅供参考: - 1. Physical NIC Drivers: - - Intel Drivers: - - ixgbe: Supports Intel 82598/82599/X520/X540/X550 series NICs. - - rx: ixgbe_recv_pkts, ixgbe_recv_pkts_vec - - tx: ixgbe_xmit_pkts, ixgbe_xmit_fixed_burst_vec, ixgbe_xmit_pkts_vec - - i40e: Supports Intel X710, XL710 series NICs. - - rx: i40e_recv_pkts - - tx: i40e_xmit_pkts - - ice: Supports Intel E810 series NICs. - - rx: ice_recv_pkts - - tx: ice_xmit_pkts - - Mellanox Drivers: - - mlx4: Supports Mellanox ConnectX-3 series NICs. - - rx: mlx4_rx_burst - - tx: mlx4_tx_burst - - mlx5: Supports Mellanox ConnectX-4, ConnectX-5, ConnectX-6 series NICs. - - rx: mlx5_rx_burst, mlx5_rx_burst_vec, mlx5_rx_burst_mprq - - tx: Pending confirmation - - Broadcom Drivers: - - bnxt: Supports Broadcom NetXtreme series NICs. - - rx: bnxt_recv_pkts, bnxt_recv_pkts_vec (x86, Vector mode receive) - - tx: bnxt_xmit_pkts, bnxt_xmit_pkts_vec (x86, Vector mode transmit) - 2. Virtual NIC Drivers: - - Virtio Driver: - - virtio: Supports Virtio-based virtual network interfaces. - - rx: virtio_recv_pkts, virtio_recv_mergeable_pkts_packed, virtio_recv_pkts_packed, - virtio_recv_pkts_vec, virtio_recv_pkts_inorder, virtio_recv_mergeable_pkts - - tx: virtio_xmit_pkts_packed, virtio_xmit_pkts, - - VMXNET3 Driver: - - vmxnet3: Supports VMware's VMXNET3 virtual NICs. - - rx: vmxnet3_recv_pkts - - tx: vmxnet3_xmit_pkts - -配置样例: `rx_hooks: [ixgbe_recv_pkts, i40e_recv_pkts, virtio_recv_pkts, virtio_recv_mergeable_pkts]` - -注意:在当前 DPDK 驱动接口的突发模式下发送和接收数据包时,旧版 Linux 内核(低于 5.2)的 eBPF 指令数量限制为 4096。 -因此,在 DPDK 捕获数据包期间,最多只能捕获 16 个数据包。对于 Linux 5.2 及以上版本的内核,最多可捕获 32 个数 -据包(这通常是 DPDK 突发模式的默认值)。对于低于 Linux 5.2 的内核,如果突发大小超过 16,可能会发生数据包丢失。 +deepflow-agent 所采集的文件 IO 事件的时延下限阈值,操作系统中时延低于此阈值 +的文件 IO 事件将被忽略。 -###### DPDK 应用数据包发送 hook 点设置 {#inputs.ebpf.socket.uprobe.dpdk.tx_hooks} +##### 启用虚拟文件采集 {#inputs.ebpf.file.io_event.enable_virtual_file_collect} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.uprobe.dpdk.tx_hooks` +`inputs.ebpf.file.io_event.enable_virtual_file_collect` **默认值**: ```yaml inputs: ebpf: - socket: - uprobe: - dpdk: - tx_hooks: [] + file: + io_event: + enable_virtual_file_collect: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | bool | **详细描述**: -根据实际的网卡驱动填写合适的数据包发送 hook 点, 获取驱动方法和发送hook点设置以及注意事项参考 `inputs.ebpf.socket.uprobe.dpdk.rx_hooks` 的说明. +当设置为 true 时,deepflow-agent 将采集发生在虚拟文件系统上的文件 +I/O 事件(例如 /proc、/sys、/run 等由内核动态生成的伪文件系统)。 +当设置为 false 时,将不会采集虚拟文件系统上的文件 I/O 事件。 -配置样例: `tx_hooks: [i40e_xmit_pkts, virtio_xmit_pkts_packed, virtio_xmit_pkts]` +### Profile {#inputs.ebpf.profile} -#### Kprobe {#inputs.ebpf.socket.kprobe} +#### 栈回溯 {#inputs.ebpf.profile.unwinding} -##### 禁用 kprobe {#inputs.ebpf.socket.kprobe.disabled} +##### 禁用 DWARF 栈回溯 {#inputs.ebpf.profile.unwinding.dwarf_disabled} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.socket.kprobe.disabled` +`inputs.ebpf.profile.unwinding.dwarf_disabled` + +Upgrade from old version: `static_config.ebpf.dwarf-disabled` **默认值**: ```yaml inputs: ebpf: - socket: - kprobe: - disabled: false + profile: + unwinding: + dwarf_disabled: true ``` **模式**: @@ -4044,130 +3781,129 @@ inputs: **详细描述**: -当设置为 true 时,kprobe 功能将被禁用。 +默认设置为 `true`,将禁用 DWARF 栈回溯,对所有进程使用基于帧指针的回溯,如果进程不包含帧指针将无法显示正常的栈。 +设置为 `false` 将对所有不包含帧指针的进程启用 DWARF 回溯。采集器使用启发式算法判断待剖析进程是否包含帧指针。 +设置 `dwarf_regex` 后,将强制对匹配的进程使用 DWARF 回溯。 -##### 启用 Unix Socket 追踪 {#inputs.ebpf.socket.kprobe.enable_unix_socket} +##### DWARF 回溯进程匹配正则表达式 {#inputs.ebpf.profile.unwinding.dwarf_regex} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.socket.kprobe.enable_unix_socket` +`inputs.ebpf.profile.unwinding.dwarf_regex` + +Upgrade from old version: `static_config.ebpf.dwarf-regex` **默认值**: ```yaml inputs: ebpf: - socket: - kprobe: - enable_unix_socket: false + profile: + unwinding: + dwarf_regex: '' ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **详细描述**: -当设置为 true 时,启用 Unix Socket 追踪。 - -##### 黑名单 {#inputs.ebpf.socket.kprobe.blacklist} +如设置为空,采集器将使用启发式算法判断待剖析进程是否包含帧指针,并对不包含帧指针的进程使用 DWARF 栈回溯。 +如设置为合法正则表达式,采集器将不再自行推断进程是否包含帧指针,改用该正则表达式对进程名进行匹配,仅对匹配的进程使用 DWARF 帧回溯。 -###### 端口号 {#inputs.ebpf.socket.kprobe.blacklist.ports} +##### DWARF 回溯进程表容量 {#inputs.ebpf.profile.unwinding.dwarf_process_map_size} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.socket.kprobe.blacklist.ports` +`inputs.ebpf.profile.unwinding.dwarf_process_map_size` -Upgrade from old version: `static_config.ebpf.kprobe-blacklist.port-list` +Upgrade from old version: `static_config.ebpf.dwarf-process-map-size` **默认值**: ```yaml inputs: ebpf: - socket: - kprobe: - blacklist: - ports: '' + profile: + unwinding: + dwarf_process_map_size: 1024 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Range | [1, 131072] | **详细描述**: -TCP 和 UDP 的端口黑名单列表。端口号列入黑名单的 socket 将被 Kprobe 采集忽略。黑名单 -生效优先级高于 kprobe 白名单。 - -配置样例: `ports: 80,1000-2000` - -##### 白名单 {#inputs.ebpf.socket.kprobe.whitelist} +每个需要进行 DWARF 回溯的进程在该表中有一条记录,用于关联进程和回溯记录分片。 +每条记录大约占 8K 内存,默认配置大约需要分配 8M 内核内存。 +由于是哈希表,配置可以比最大进程号低。 +该配置只在 DWARF 功能开启时生效。 -###### 白名单 {#inputs.ebpf.socket.kprobe.whitelist.ports} +##### DWARF 回溯分片表容量 {#inputs.ebpf.profile.unwinding.dwarf_shard_map_size} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.socket.kprobe.whitelist.ports` +`inputs.ebpf.profile.unwinding.dwarf_shard_map_size` -Upgrade from old version: `static_config.ebpf.kprobe-whitelist.port-list` +Upgrade from old version: `static_config.ebpf.dwarf-shard-map-size` **默认值**: ```yaml inputs: ebpf: - socket: - kprobe: - whitelist: - ports: '' + profile: + unwinding: + dwarf_shard_map_size: 128 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Range | [1, 4096] | **详细描述**: -TCP 和 UDP 的端口白名单列表,白名单生效优先级低于 kprobe 黑名单。 -未列入黑名单、白名单的端口用 kprobe 做采集。 - -配置样例: `ports: 80,1000-2000` - -#### SockOps {#inputs.ebpf.socket.sock_ops} +DWARF 回溯记录分片数量。 +每条记录大约占 1M 内存,默认配置大约需要分配 128M 内核内存。 +该配置只在 DWARF 功能开启时生效。 -##### TCP Option Trace {#inputs.ebpf.socket.sock_ops.tcp_option_trace} +#### On-CPU {#inputs.ebpf.profile.on_cpu} -###### TCP Option 注入 {#inputs.ebpf.socket.sock_ops.tcp_option_trace.enabled} +##### Disabled {#inputs.ebpf.profile.on_cpu.disabled} **标签**: -`hot_update` +agent_restart **FQCN**: -`inputs.ebpf.socket.sock_ops.tcp_option_trace.enabled` +`inputs.ebpf.profile.on_cpu.disabled` + +Upgrade from old version: `static_config.ebpf.on-cpu-profile.disabled` **默认值**: ```yaml inputs: ebpf: - socket: - sock_ops: - tcp_option_trace: - enabled: false + profile: + on_cpu: + disabled: false ``` **模式**: @@ -4177,78 +3913,77 @@ inputs: **详细描述**: -是否开启 TCP Option Tracing SockOps 程序,用于在满足条件的 TCP 连接上注入 DeepFlow 元数据(如进程 PID)。 -注意:该功能依赖 cgroup v2(统一层级)和内核版本 > 5.10。在 cgroup v1 主机上 SockOps 绑定会失败. -兼容性:已在 x86 上验证内核 > 5.10;arm 目前仅在 6.8 内核上测试。 -限制:PID 跟踪依赖per-CPU syscall map。CPU 拥堵、软中断可能在不同 CPU 运行时,注入的元数据可能缺失或过期。 +eBPF On-CPU profile 数据的采集开关。 -###### PID 注入窗口 {#inputs.ebpf.socket.sock_ops.tcp_option_trace.sampling_window_bytes} +注意:开启此功能时,需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, +即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.profile.on_cpu`。 + +##### 采样频率 {#inputs.ebpf.profile.on_cpu.sampling_frequency} **标签**: -`hot_update` +agent_restart **FQCN**: -`inputs.ebpf.socket.sock_ops.tcp_option_trace.sampling_window_bytes` +`inputs.ebpf.profile.on_cpu.sampling_frequency` + +Upgrade from old version: `static_config.ebpf.on-cpu-profile.frequency` **默认值**: ```yaml inputs: ebpf: - socket: - sock_ops: - tcp_option_trace: - sampling_window_bytes: 16384 + profile: + on_cpu: + sampling_frequency: 99 ``` **模式**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Unit | Bytes | -| Range | [0, 1048576] | +| Range | [1, 1000] | **详细描述**: -控制 PID 注入之间的最小 TCP 负载间隔字节数。缺省为 16KB,与历史行为一致;值越小注入越频繁,值越大越稀疏。 -设置为0关闭采样窗口功能,对所有数据包注入。 - -#### 调优 {#inputs.ebpf.socket.tunning} +eBPF On-CPU profile 数据的采样周期。 -##### 最大采集速率 {#inputs.ebpf.socket.tunning.max_capture_rate} +##### 按 CPU 聚合 {#inputs.ebpf.profile.on_cpu.aggregate_by_cpu} **标签**: -`hot_update` +agent_restart **FQCN**: -`inputs.ebpf.socket.tunning.max_capture_rate` +`inputs.ebpf.profile.on_cpu.aggregate_by_cpu` -Upgrade from old version: `static_config.ebpf.global-ebpf-pps-threshold` +Upgrade from old version: `static_config.ebpf.on-cpu-profile.cpu` **默认值**: ```yaml inputs: ebpf: - socket: - tunning: - max_capture_rate: 0 + profile: + on_cpu: + aggregate_by_cpu: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Unit | Per Second | -| Range | [0, 64000000] | +| Type | bool | **详细描述**: -eBPF 数据的最大采集速率,设置为 `0` 表示不对 deepflow-agent 的 eBPF 数据采集速率做限制。 +采集 On-CPU 采样数据时,是否获取 CPUID 的开关。 +- `true`: 表示在采集 On-CPU 采样数据时获取 CPUID (On-CPU 剖析时,支持对单个 CPU 的分析)。 +- `false`: 表示在采集 On-CPU 采样数据时不获取 CPUID (On-CPU 剖析时,不支持单个 CPU 的分析)。 -##### 禁用 syscall_trace_id 相关的计算 {#inputs.ebpf.socket.tunning.syscall_trace_id_disabled} +#### 预处理 {#inputs.ebpf.profile.preprocess} + +##### 函数栈压缩 {#inputs.ebpf.profile.preprocess.stack_compression} **标签**: @@ -4256,15 +3991,17 @@ eBPF 数据的最大采集速率,设置为 `0` 表示不对 deepflow-agent 的 **FQCN**: -`inputs.ebpf.socket.tunning.syscall_trace_id_disabled` +`inputs.ebpf.profile.preprocess.stack_compression` + +Upgrade from old version: `static_config.ebpf.preprocess.stack-compression` **默认值**: ```yaml inputs: ebpf: - socket: - tunning: - syscall_trace_id_disabled: false + profile: + preprocess: + stack_compression: true ``` **模式**: @@ -4274,10 +4011,16 @@ inputs: **详细描述**: -当 trace_id 注入所有请求时,所有请求的 syscall_trace_id 计算逻辑可以关闭。这将大大减少 -eBPF hook 进程的 CPU 消耗。 +发送数据之前压缩函数调用栈。压缩能够有效降低 agent 的内存开销、数据传输的带宽消耗、以及 +ingester 的 CPU 开销,但是 Agent 也会因此消耗更多的 CPU。测试表明,将deepflow-agent 自身的 +on-cpu 函数调用栈压缩,可以将带宽消耗降低 `x` 倍,但会使得 agent 额外消耗 `y%` 的 CPU。 -##### 禁用预分配内存 {#inputs.ebpf.socket.tunning.map_prealloc_disabled} +#### 语言特定剖析 {#inputs.ebpf.profile.languages} + +控制对哪些解释型语言进行剖析。禁用不使用的语言可以节省每个语言约 5-6 MB 内存。 +总内存占用:~17-20 MB(全部启用),~6.1 MB(仅 Python),~5.2 MB(仅 PHP),~6.4 MB(仅 Node.js)。 + +##### 禁用 Python 剖析 {#inputs.ebpf.profile.languages.python_disabled} **标签**: @@ -4285,17 +4028,15 @@ eBPF hook 进程的 CPU 消耗。 **FQCN**: -`inputs.ebpf.socket.tunning.map_prealloc_disabled` - -Upgrade from old version: `static_config.ebpf.map-prealloc-disabled` +`inputs.ebpf.profile.languages.python_disabled` **默认值**: ```yaml inputs: ebpf: - socket: - tunning: - map_prealloc_disabled: false + profile: + languages: + python_disabled: false ``` **模式**: @@ -4305,11 +4046,10 @@ inputs: **详细描述**: -当完整的map预分配过于昂贵时,将此配置设置为 `true` 可以防止在定义map时进行内存预分配, -但这可能会导致一些性能下降。此配置仅适用于 `BPF_MAP_TYPE_HASH` 类型的 bpf map。 -目前适用于 socket trace 和 uprobe Golang/OpenSSL trace 功能。禁用内存预分配大约会减少45M的内存占用。 +禁用 Python 解释器剖析。禁用后将不采集 Python 进程的函数调用栈, +可节省约 6.1 MB 内核内存(python_tstate_addr_map、python_unwind_info_map、python_offsets_map)。 -##### Socket Hook Syscall 列表 {#inputs.ebpf.socket.tunning.hooked_socket_syscalls} +##### 禁用 PHP 剖析 {#inputs.ebpf.profile.languages.php_disabled} **标签**: @@ -4317,60 +4057,28 @@ inputs: **FQCN**: -`inputs.ebpf.socket.tunning.hooked_socket_syscalls` +`inputs.ebpf.profile.languages.php_disabled` **默认值**: ```yaml inputs: ebpf: - socket: - tunning: - hooked_socket_syscalls: - - read - - readv - - recvfrom - - recvmsg - - recvmmsg - - sendmsg - - sendmmsg - - sendto - - write - - writev + profile: + languages: + php_disabled: false ``` -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| read | | -| readv | | -| recvfrom | | -| recvmsg | | -| recvmmsg | | -| sendmsg | | -| sendmmsg | | -| sendto | | -| write | | -| writev | | - **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | bool | **详细描述**: -控制为哪些受支持的 socket syscall 安装 eBPF hook。 - -该列表只控制是否 hook 某个 syscall,不控制具体使用哪种 backend。每个启用的 -syscall 仍然遵循当前运行模式下既有的 backend 选择逻辑。例如 mixed 模式继续保留 -现有的 hybrid 与 tracepoint-only 分工,pure-kprobe 模式继续保留既有的 kprobe -行为,kfunc 模式继续保留既有的 kfunc 行为,以及 `recvfrom` 和 `recvmmsg` -的 tracepoint fallback。 - -支持的配置值:`read`、`readv`、`recvfrom`、`recvmsg`、`recvmmsg`、`sendmsg`、 -`sendmmsg`、`sendto`、`write`、`writev`。 +禁用 PHP 解释器剖析。禁用后将不采集 PHP 进程的函数调用栈, +可节省约 5.2 MB 内核内存(php_unwind_info_map、php_offsets_map)。 -##### 启用fentry/fexit特性 {#inputs.ebpf.socket.tunning.fentry_enabled} +##### 禁用 Node.js 剖析 {#inputs.ebpf.profile.languages.nodejs_disabled} **标签**: @@ -4378,15 +4086,15 @@ syscall 仍然遵循当前运行模式下既有的 backend 选择逻辑。例如 **FQCN**: -`inputs.ebpf.socket.tunning.fentry_enabled` +`inputs.ebpf.profile.languages.nodejs_disabled` **默认值**: ```yaml inputs: ebpf: - socket: - tunning: - fentry_enabled: false + profile: + languages: + nodejs_disabled: false ``` **模式**: @@ -4396,169 +4104,147 @@ inputs: **详细描述**: -使用 fentry/fexit 特性说明 -- 相比传统的 kprobes,fentry 和 fexit 程序提供了更高的性能和可用性,可带来约 5%–10% 的性能提升。 -- 部分 Linux 内核对该特性支持不够完善,可能导致内核 BUG 和节点崩溃。已知的 BUG 修复包括: - - TencentOS Linux kernel 5.4.119 的修复 - [https://github.com/torvalds/linux/commit/c3d6324f841bab2403be6419986e2b1d1068d423](https://github.com/torvalds/linux/commit/c3d6324f841bab2403be6419986e2b1d1068d423) - - Alibaba Cloud Linux kernel 5.10.23 的修复 - [https://github.com/gregkh/linux/commit/e21d2b92354b3cd25dd774ebb0f0e52ff04a7861](https://github.com/gregkh/linux/commit/e21d2b92354b3cd25dd774ebb0f0e52ff04a7861) -- 内核建议:若要启用 fentry/fexit 特性,推荐使用 Linux kernel 5.10.28 及以上版本,以确保稳定性和性能。 +禁用 Node.js(V8)解释器剖析。禁用后将不采集 Node.js 进程的函数调用栈, +可节省约 6.4 MB 内核内存(v8_unwind_info_map)。 -#### 预处理 {#inputs.ebpf.socket.preprocess} +### 调优 {#inputs.ebpf.tunning} -##### 乱序重排(OOOR)缓冲区大小 {#inputs.ebpf.socket.preprocess.out_of_order_reassembly_cache_size} +#### 采集队列大小 {#inputs.ebpf.tunning.collector_queue_size} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.preprocess.out_of_order_reassembly_cache_size` +`inputs.ebpf.tunning.collector_queue_size` -Upgrade from old version: `static_config.ebpf.syscall-out-of-order-cache-size` +Upgrade from old version: `static_config.ebpf-collector-queue-size` **默认值**: ```yaml inputs: ebpf: - socket: - preprocess: - out_of_order_reassembly_cache_size: 256 + tunning: + collector_queue_size: 65535 ``` **模式**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [8, 1024] | +| Range | [4096, 64000000] | **详细描述**: -由于 eBPF socket 事件是以批处理的方式向用户态空间发送数据,同一个应用调用的请求、响应由不同 CPU 处理时,可能 -会出现请求、响应乱序的情况,开启 Syscall 数据乱序重排特性后,每个 TCP/UDP 流会缓存一定数量的 eBPF socket -事件,以修正乱序数据对应用调用解析的影响。该参数设置了每个 TCP/UDP 流可以缓存的 eBPF socket 事件数量上限(每 -条事件数据占用的字节数上限受 `processors.request_log.tunning.payload_truncation` 控制)。在 Syscall 数据乱序较严重 -导致应用调用采集不全的环境中,可适当调大该参数。 +以下 deepflow-agent 的 eBPF 数据采集队列大小(分别限制): +- 0-ebpf-to-ebpf-collector +- 1-proc-event-to-sender +- 1-profile-to-sender -##### 乱序重排(OOOR)协议列表 {#inputs.ebpf.socket.preprocess.out_of_order_reassembly_protocols} +#### 用户态工作线程数 {#inputs.ebpf.tunning.userspace_worker_threads} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.preprocess.out_of_order_reassembly_protocols` +`inputs.ebpf.tunning.userspace_worker_threads` -Upgrade from old version: `static_config.ebpf.syscall-out-of-order-reassembly` +Upgrade from old version: `static_config.ebpf.thread-num` **默认值**: ```yaml inputs: ebpf: - socket: - preprocess: - out_of_order_reassembly_protocols: [] + tunning: + userspace_worker_threads: 1 ``` -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| _DYNAMIC_OPTIONS_ | | - **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Range | [1, 1024] | **详细描述**: -配置后 deepflow-agent 将对指定应用协议的处理增加乱序重排过程。注意:(1)开启特性将消耗更多的内存,因此 -需关注 agent 内存用量;(2)配置`HTTP2`或`gRPC`会全部开启这两个协议 +参与用户态数据处理的工作线程数量。实际最大值为主机 CPU 逻辑核心数。 -##### 乱序重排(OOOR)超时时间 {#inputs.ebpf.socket.preprocess.out_of_order_reassembly_timeout} +#### Kick 线程 Nice 值 {#inputs.ebpf.tunning.kick_kern_nice} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.preprocess.out_of_order_reassembly_timeout` +`inputs.ebpf.tunning.kick_kern_nice` **默认值**: ```yaml inputs: ebpf: - socket: - preprocess: - out_of_order_reassembly_timeout: 100ms + tunning: + kick_kern_nice: 0 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['100ms', '1s'] | +| Type | int | +| Range | [-20, 19] | **详细描述**: -OOOR 缓存的数据时间超时会直接输出, 可以根据采集器指标 `deepflow_agent_ebpf_collect.metrics.metrics.time_backtrack_max` 调整该参数。 +控制每个 CPU 上 kick 线程使用的 Linux nice 值。 + +这些线程会在周期性定时器到期后唤醒,并通过轻量级 syscall +触发内核侧超时检查,将批量 eBPF 数据从缓冲区中推送出来。 + +当“指标中心”中 `deepflow_tenant -> deepflow_agent_ebpf_collector` +下的 `metrics.period_push_max_delay` 达到 199ms 时,需要关注这个 +配置项。这说明周期性 push 延迟已经触发超限标记,此时可以适当降低 +该配置项的取值,以提高 kick 线程的调度倾向。 -注意:增大该值会消耗更多的内存 +更小的 nice 值意味着更高的调度倾向,更大的 nice 值意味着更低的 +调度倾向。取值范围为 -20 到 19。负值可能需要 CAP_SYS_NICE 或 +足够的 RLIMIT_NICE。该配置仍然可能对其他负载产生影响。 -##### 分段重组(SR)协议列表 {#inputs.ebpf.socket.preprocess.segmentation_reassembly_protocols} +#### Perf Page 数量 {#inputs.ebpf.tunning.perf_pages_count} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.preprocess.segmentation_reassembly_protocols` +`inputs.ebpf.tunning.perf_pages_count` -Upgrade from old version: `static_config.ebpf.syscall-segmentation-reassembly` +Upgrade from old version: `static_config.ebpf.perf-pages-count` **默认值**: ```yaml inputs: ebpf: - socket: - preprocess: - segmentation_reassembly_protocols: [] + tunning: + perf_pages_count: 128 ``` -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| _DYNAMIC_OPTIONS_ | | - **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Range | [32, 8192] | **详细描述**: -配置后 deepflow-agent 将对指定应用协议的处理增加分片重组过程,将多个 Syscall 的内容分片重组后再进行 -协议解析,以增强应用协议的采集成功率。 - -注意: -1. 该特性的生效的前提条件是`out_of_order_reassembly_protocols`开启并生效; - - 支持协议:[https://www.deepflow.io/docs/zh/features/l7-protocols/overview/](https://www.deepflow.io/docs/zh/features/l7-protocols/overview/) -2. 配置`HTTP2`或`gRPC`会全部开启这两个协议 - -### File {#inputs.ebpf.file} - -#### IO 事件 {#inputs.ebpf.file.io_event} +内核共享内存占用的页数。值为 `2^n (5 <= n <= 13)`。用于 perf 数据传输。 +如果值在 `2^n` 和 `2^(n+1)` 之间,将自动调整到最小值 `2^n`。 +页的大小为4KB。 -##### 采集模式 {#inputs.ebpf.file.io_event.collect_mode} +#### 内核环形队列大小 {#inputs.ebpf.tunning.kernel_ring_size} **标签**: @@ -4566,45 +4252,30 @@ inputs: **FQCN**: -`inputs.ebpf.file.io_event.collect_mode` +`inputs.ebpf.tunning.kernel_ring_size` -Upgrade from old version: `static_config.ebpf.io-event-collect-mode` +Upgrade from old version: `static_config.ebpf.ring-size` **默认值**: ```yaml inputs: ebpf: - file: - io_event: - collect_mode: 1 + tunning: + kernel_ring_size: 65536 ``` -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| 0 | 禁用 | -| 1 | 调用生命周期 | -| 2 | 全部 | - **模式**: | Key | Value | | ---- | ---------------------------- | | Type | int | +| Range | [8192, 131072] | **详细描述**: -采集模式: -- 禁用:不采集任何文件 IO 事件。 -- 调用生命周期:仅采集调用生命周期内的文件 IO 事件。 -- 全部:采集所有的文件 IO 事件。 - -说明: -- 为了获取文件的完整路径,需要结合进程的挂载信息进行路径拼接。然而,一些进程在完成任务后会迅速退出, - 此时我们处理其产生的文件读写数据时,可能已无法从 /proc/[pid]/mountinfo 中获取挂载信息,导致路径不 - 完整(缺少挂载点)。我们对于 50ms 以下生存期的进程,文件路径会缺少挂载点信息。对于长期运行的进程, - 则不存在该问题。 +内核环形队列的大小。值为 `2^n (13 <= n <= 17)`。 +如果值在 `2^n` 和 `2^(n+1)` 之间,将自动调整到最小值 `2^n`。 -##### 最小耗时 {#inputs.ebpf.file.io_event.minimal_duration} +#### 最大 Socket 条目数 {#inputs.ebpf.tunning.max_socket_entries} **标签**: @@ -4612,31 +4283,29 @@ inputs: **FQCN**: -`inputs.ebpf.file.io_event.minimal_duration` +`inputs.ebpf.tunning.max_socket_entries` -Upgrade from old version: `static_config.ebpf.io-event-minimal-duration` +Upgrade from old version: `static_config.ebpf.max-socket-entries` **默认值**: ```yaml inputs: ebpf: - file: - io_event: - minimal_duration: 1ms + tunning: + max_socket_entries: 131072 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['1ns', '1s'] | +| Type | int | +| Range | [10000, 2000000] | **详细描述**: -deepflow-agent 所采集的文件 IO 事件的时延下限阈值,操作系统中时延低于此阈值 -的文件 IO 事件将被忽略。 +设置 socket tracking 哈希表的最大条目数,根据实际场景中的并发请求数量而定。 -##### 启用虚拟文件采集 {#inputs.ebpf.file.io_event.enable_virtual_file_collect} +#### Socket Map 回收阈值 {#inputs.ebpf.tunning.socket_map_reclaim_threshold} **标签**: @@ -4644,65 +4313,61 @@ deepflow-agent 所采集的文件 IO 事件的时延下限阈值,操作系统 **FQCN**: -`inputs.ebpf.file.io_event.enable_virtual_file_collect` +`inputs.ebpf.tunning.socket_map_reclaim_threshold` + +Upgrade from old version: `static_config.ebpf.socket-map-max-reclaim` **默认值**: ```yaml inputs: ebpf: - file: - io_event: - enable_virtual_file_collect: false + tunning: + socket_map_reclaim_threshold: 120000 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Range | [8000, 2000000] | **详细描述**: -当设置为 true 时,deepflow-agent 将采集发生在虚拟文件系统上的文件 -I/O 事件(例如 /proc、/sys、/run 等由内核动态生成的伪文件系统)。 -当设置为 false 时,将不会采集虚拟文件系统上的文件 I/O 事件。 - -### Profile {#inputs.ebpf.profile} - -#### 栈回溯 {#inputs.ebpf.profile.unwinding} +Socket map 表条目清理阈值。 -##### 禁用 DWARF 栈回溯 {#inputs.ebpf.profile.unwinding.dwarf_disabled} +#### 最大 Trace 条目数 {#inputs.ebpf.tunning.max_trace_entries} **标签**: -`hot_update` +agent_restart **FQCN**: -`inputs.ebpf.profile.unwinding.dwarf_disabled` +`inputs.ebpf.tunning.max_trace_entries` -Upgrade from old version: `static_config.ebpf.dwarf-disabled` +Upgrade from old version: `static_config.ebpf.max-trace-entries` **默认值**: ```yaml inputs: ebpf: - profile: - unwinding: - dwarf_disabled: true + tunning: + max_trace_entries: 131072 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Range | [10000, 2000000] | **详细描述**: -默认设置为 `true`,将禁用 DWARF 栈回溯,对所有进程使用基于帧指针的回溯,如果进程不包含帧指针将无法显示正常的栈。 -设置为 `false` 将对所有不包含帧指针的进程启用 DWARF 回溯。采集器使用启发式算法判断待剖析进程是否包含帧指针。 -设置 `dwarf_regex` 后,将强制对匹配的进程使用 DWARF 回溯。 +线程和协程追踪的最大哈希表条目数。 -##### DWARF 回溯进程匹配正则表达式 {#inputs.ebpf.profile.unwinding.dwarf_regex} +## 资源 {#inputs.resources} + +### 推送间隔 {#inputs.resources.push_interval} **标签**: @@ -4710,30 +4375,28 @@ inputs: **FQCN**: -`inputs.ebpf.profile.unwinding.dwarf_regex` +`inputs.resources.push_interval` -Upgrade from old version: `static_config.ebpf.dwarf-regex` +Upgrade from old version: `platform_sync_interval` **默认值**: ```yaml inputs: - ebpf: - profile: - unwinding: - dwarf_regex: '' + resources: + push_interval: 10s ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | duration | +| Range | ['10s', '3600s'] | **详细描述**: -如设置为空,采集器将使用启发式算法判断待剖析进程是否包含帧指针,并对不包含帧指针的进程使用 DWARF 栈回溯。 -如设置为合法正则表达式,采集器将不再自行推断进程是否包含帧指针,改用该正则表达式对进程名进行匹配,仅对匹配的进程使用 DWARF 帧回溯。 +deepflow-agent 主动向 deepflow-server 上报/同步资源信息的时间间隔。 -##### DWARF 回溯进程表容量 {#inputs.ebpf.profile.unwinding.dwarf_process_map_size} +### 启用云主机资源同步 {#inputs.resources.workload_resource_sync_enabled} **标签**: @@ -4741,33 +4404,28 @@ inputs: **FQCN**: -`inputs.ebpf.profile.unwinding.dwarf_process_map_size` - -Upgrade from old version: `static_config.ebpf.dwarf-process-map-size` +`inputs.resources.workload_resource_sync_enabled` **默认值**: ```yaml inputs: - ebpf: - profile: - unwinding: - dwarf_process_map_size: 1024 + resources: + workload_resource_sync_enabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [1, 131072] | +| Type | bool | **详细描述**: -每个需要进行 DWARF 回溯的进程在该表中有一条记录,用于关联进程和回溯记录分片。 -每条记录大约占 8K 内存,默认配置大约需要分配 8M 内核内存。 -由于是哈希表,配置可以比最大进程号低。 -该配置只在 DWARF 功能开启时生效。 +开启开关后,deepflow-server 基于 deepflow-agent 上报的运行环境信息,生成一个云主机资源。 +用于无法通过云平台 API 同步云主机资源的场景,也可用于同步非云环境中普通物理服务器的资源信息。 -##### DWARF 回溯分片表容量 {#inputs.ebpf.profile.unwinding.dwarf_shard_map_size} +### 采集专有云资源 {#inputs.resources.private_cloud} + +#### 启用云宿主机资源 {#inputs.resources.private_cloud.hypervisor_resource_enabled} **标签**: @@ -4775,98 +4433,104 @@ inputs: **FQCN**: -`inputs.ebpf.profile.unwinding.dwarf_shard_map_size` +`inputs.resources.private_cloud.hypervisor_resource_enabled` -Upgrade from old version: `static_config.ebpf.dwarf-shard-map-size` +Upgrade from old version: `platform_enabled` **默认值**: ```yaml inputs: - ebpf: - profile: - unwinding: - dwarf_shard_map_size: 128 + resources: + private_cloud: + hypervisor_resource_enabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [1, 4096] | +| Type | bool | **详细描述**: -DWARF 回溯记录分片数量。 -每条记录大约占 1M 内存,默认配置大约需要分配 128M 内核内存。 -该配置只在 DWARF 功能开启时生效。 - -#### On-CPU {#inputs.ebpf.profile.on_cpu} +开启开关后,deepflow-agent 将采集 KVM 或 Linux 宿主机中的 VM 信息和网络信息,并上报/同步至 deepflow-server。 +采集的信息包括: +- raw_all_vm_xml +- raw_vm_states +- raw_ovs_interfaces +- raw_ovs_ports +- raw_brctl_show +- raw_vlan_config -##### Disabled {#inputs.ebpf.profile.on_cpu.disabled} +#### 虚拟机 MAC 源 {#inputs.resources.private_cloud.vm_mac_source} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.profile.on_cpu.disabled` +`inputs.resources.private_cloud.vm_mac_source` -Upgrade from old version: `static_config.ebpf.on-cpu-profile.disabled` +Upgrade from old version: `if_mac_source` **默认值**: ```yaml inputs: - ebpf: - profile: - on_cpu: - disabled: false + resources: + private_cloud: + vm_mac_source: 0 ``` +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| 0 | 网卡 MAC 地址 | +| 1 | 网卡名称 | +| 2 | Qemu XML 文件 | + **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | **详细描述**: -eBPF On-CPU profile 数据的采集开关。 - -注意:开启此功能时,需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, -即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.profile.on_cpu`。 +配置 deepflow-agent 提取 VM 真实 MAC 地址的方法: +- 网卡 MAC 地址: 从 tap 接口的 MAC 地址中提取 VM 的 MAC 地址 +- 网卡名称: 从 tap 接口的名字中提取 MAC 地址 +- Qemu XML 文件: 从 VM XML 文件中提取 MAC 地址 -##### 采样频率 {#inputs.ebpf.profile.on_cpu.sampling_frequency} +#### 虚拟机 XML 文件夹 {#inputs.resources.private_cloud.vm_xml_directory} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.profile.on_cpu.sampling_frequency` +`inputs.resources.private_cloud.vm_xml_directory` -Upgrade from old version: `static_config.ebpf.on-cpu-profile.frequency` +Upgrade from old version: `vm_xml_path` **默认值**: ```yaml inputs: - ebpf: - profile: - on_cpu: - sampling_frequency: 99 + resources: + private_cloud: + vm_xml_directory: /etc/libvirt/qemu/ ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [1, 1000] | +| Type | string | +| Range | [0, 100] | **详细描述**: -eBPF On-CPU profile 数据的采样周期。 +宿主机中存放 VM XML 文件的目录 -##### 按 CPU 聚合 {#inputs.ebpf.profile.on_cpu.aggregate_by_cpu} +#### 虚拟机 MAC 映射脚本 {#inputs.resources.private_cloud.vm_mac_mapping_script} **标签**: @@ -4874,1897 +4538,419 @@ eBPF On-CPU profile 数据的采样周期。 **FQCN**: -`inputs.ebpf.profile.on_cpu.aggregate_by_cpu` +`inputs.resources.private_cloud.vm_mac_mapping_script` -Upgrade from old version: `static_config.ebpf.on-cpu-profile.cpu` +Upgrade from old version: `static_config.tap-mac-script` **默认值**: ```yaml inputs: - ebpf: - profile: - on_cpu: - aggregate_by_cpu: false + resources: + private_cloud: + vm_mac_mapping_script: '' ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | +| Range | [0, 100] | **详细描述**: -采集 On-CPU 采样数据时,是否获取 CPUID 的开关。 -- `true`: 表示在采集 On-CPU 采样数据时获取 CPUID (On-CPU 剖析时,支持对单个 CPU 的分析)。 -- `false`: 表示在采集 On-CPU 采样数据时不获取 CPUID (On-CPU 剖析时,不支持单个 CPU 的分析)。 +复杂环境中,TAP 网卡的 MAC 地址映射关系可以通过编写脚本实现。使用脚本时需要满足以下条件: +1. if_mac_source = 2 +2. tap_mode = 0 +3. TAP 网卡的名称与虚拟机 XML 文件中的名称相同 +4. 脚本输出格式如下: + - tap2d283dfe,11:22:33:44:55:66 + - tap2d283223,aa:bb:cc:dd:ee:ff -#### Off-CPU {#inputs.ebpf.profile.off_cpu} +### 采集 K8s 资源 {#inputs.resources.kubernetes} -##### Disabled {#inputs.ebpf.profile.off_cpu.disabled} +#### K8s 命名空间 {#inputs.resources.kubernetes.kubernetes_namespace} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.profile.off_cpu.disabled` +`inputs.resources.kubernetes.kubernetes_namespace` -Upgrade from old version: `static_config.ebpf.off-cpu-profile.disabled` +Upgrade from old version: `static_config.kubernetes-namespace` **默认值**: ```yaml inputs: - ebpf: - profile: - off_cpu: - disabled: true + resources: + kubernetes: + kubernetes_namespace: null ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **详细描述**: -eBPF Off-CPU profile 数据的采集开关。 - -注意:开启此功能时,需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, -即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.profile.off_cpu`。 +指定采集器获取 K8s 资源时的命名空间 -##### 按 CPU 聚合 {#inputs.ebpf.profile.off_cpu.aggregate_by_cpu} +#### K8s API 资源 {#inputs.resources.kubernetes.api_resources} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.profile.off_cpu.aggregate_by_cpu` +`inputs.resources.kubernetes.api_resources` -Upgrade from old version: `static_config.ebpf.off-cpu-profile.cpu` +Upgrade from old version: `static_config.kubernetes-resources` **默认值**: ```yaml inputs: - ebpf: - profile: - off_cpu: - aggregate_by_cpu: false + resources: + kubernetes: + api_resources: + - name: namespaces + - name: nodes + - name: pods + - name: replicationcontrollers + - name: services + - name: daemonsets + - name: deployments + - name: replicasets + - name: statefulsets + - name: ingresses + - name: configmaps ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | dict | **详细描述**: -采集 Off-CPU 数据时,是否获取 CPUID 的开关。 -- `true`: 表示在采集 Off-CPU 数据时获取 CPUID (Off-CPU 剖析时,支持对单个 CPU 的分析)。 -- `false`: 表示在采集 Off-CPU 数据时不获取 CPUID (Off-CPU 剖析时,不支持单个 CPU 的分析)。 +指定采集器采集的 K8s 资源。 + +列表中的条目格式如下: +{ + name: string + group: string + version: string + disabled: bool + field_selector: string +} + +默认采集的资源如下: +- namespaces +- nodes +- pods +- replicationcontrollers +- services +- daemonsets +- deployments +- replicasets +- statefulsets +- ingresses +- configmaps + +禁用某个资源,在列表中添加 `disabled: true` 的条目: +```yaml +inputs: + resources: + kubernetes: + api_resources: + - name: services + disabled: true +``` + +启用某个资源,在列表中添加该资源的条目。注意该设置会覆盖默认的资源采集。 +例如,要启用在 group `apps` 和 `apps.kruise.io` 中的 `statefulsets`,需要添加两个条目: +```yaml +inputs: + resources: + kubernetes: + api_resources: + - name: statefulsets + group: apps + - name: statefulsets + group: apps.kruise.io + version: v1beta1 +``` + +要采集 openshift 中的 `routes`,可以使用以下设置: +```yaml +inputs: + resources: + kubernetes: + api_resources: + - name: ingresses + disabled: true + - name: routes +``` -##### 最小阻塞时间 {#inputs.ebpf.profile.off_cpu.min_blocking_time} +##### 名称 {#inputs.resources.kubernetes.api_resources.name} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.profile.off_cpu.min_blocking_time` +`inputs.resources.kubernetes.api_resources.name` -Upgrade from old version: `static_config.ebpf.off-cpu-profile.minblock` +Upgrade from old version: `static_config.kubernetes-resources.name` **默认值**: ```yaml inputs: - ebpf: - profile: - off_cpu: - min_blocking_time: 50us + resources: + kubernetes: + api_resources: + - name: '' ``` +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| namespaces | | +| nodes | | +| pods | | +| replicationcontrollers | | +| services | | +| daemonsets | | +| deployments | | +| replicasets | | +| statefulsets | | +| ingresses | | +| routes | | +| servicerules | | +| clonesets | | +| ippools | | +| opengaussclusters | | +| configmaps | | + **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['0ns', '1h'] | +| Type | string | **详细描述**: -低于'最小阻塞时间'的 Off-CPU 数据将被 deepflow-agent 忽略,'最小阻塞时间'设置为 '0ns' 表示 -采集所有的 Off-CPU 数据。由于 CPU 调度事件数量庞大(每秒可能超过一百万次),调小该参数将带来 -明显的资源开销,如果需要跟踪大时延的调度阻塞事件,建议调大该参数,以降低资源开销。另外,deepflow-agent -不采集阻塞超过 1 小时的事件。 - -#### Memory {#inputs.ebpf.profile.memory} +K8s API 资源名 -##### Disabled {#inputs.ebpf.profile.memory.disabled} +##### 组 {#inputs.resources.kubernetes.api_resources.group} **标签**: -`hot_update` -ee_feature +agent_restart **FQCN**: -`inputs.ebpf.profile.memory.disabled` +`inputs.resources.kubernetes.api_resources.group` -Upgrade from old version: `static_config.ebpf.memory-profile.disabled` +Upgrade from old version: `static_config.kubernetes-resources.group` **默认值**: ```yaml inputs: - ebpf: - profile: - memory: - disabled: true + resources: + kubernetes: + api_resources: + - group: '' ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **详细描述**: -eBPF memory profile 数据的采集开关。 - -注意:开启此功能时,需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, -即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.profile.memory`。 +K8s API 资源组 -##### 内存剖析上报间隔 {#inputs.ebpf.profile.memory.report_interval} +##### 版本 {#inputs.resources.kubernetes.api_resources.version} **标签**: -`hot_update` -ee_feature +agent_restart **FQCN**: -`inputs.ebpf.profile.memory.report_interval` +`inputs.resources.kubernetes.api_resources.version` -Upgrade from old version: `static_config.ebpf.memory-profile.report-interval` +Upgrade from old version: `static_config.kubernetes-resources.version` **默认值**: ```yaml inputs: - ebpf: - profile: - memory: - report_interval: 10s + resources: + kubernetes: + api_resources: + - version: '' ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['1s', '60s'] | +| Type | string | **详细描述**: -deepflow-agent 聚合和上报内存剖析数据的间隔。 +K8s API 版本 -##### 进程分配地址 LRU 长度 {#inputs.ebpf.profile.memory.allocated_addresses_lru_len} +##### Disabled {#inputs.resources.kubernetes.api_resources.disabled} **标签**: -`hot_update` -ee_feature +agent_restart **FQCN**: -`inputs.ebpf.profile.memory.allocated_addresses_lru_len` +`inputs.resources.kubernetes.api_resources.disabled` + +Upgrade from old version: `static_config.kubernetes-resources.disabled` **默认值**: ```yaml inputs: - ebpf: - profile: - memory: - allocated_addresses_lru_len: 131072 + resources: + kubernetes: + api_resources: + - disabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [1024, 4194704] | +| Type | bool | **详细描述**: -采集器使用 LRU 缓存记录进程分配的地址,以避免内存使用失控。每个 LRU 条目大约占 32B 内存。 +禁用 K8s API 资源 -##### 排序长度 {#inputs.ebpf.profile.memory.sort_length} +##### Field Selector {#inputs.resources.kubernetes.api_resources.field_selector} **标签**: -`hot_update` -ee_feature +agent_restart **FQCN**: -`inputs.ebpf.profile.memory.sort_length` +`inputs.resources.kubernetes.api_resources.field_selector` + +Upgrade from old version: `static_config.kubernetes-resources.field-selector` **默认值**: ```yaml inputs: - ebpf: - profile: - memory: - sort_length: 16384 + resources: + kubernetes: + api_resources: + - field_selector: '' ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [0, 65536] | +| Type | string | **详细描述**: -为了匹配 mallocs 和 frees,内存剖析会在处理前对数据按时间戳排序。 -该参数是排序数组的长度。 -配置该选项时先按说明调整 `sort_interval` 参数,在参考采集器性能统计 `deepflow_agent_ebpf_memory_profiler` 中 -`dequeued_by_length` 和 `dequeued_by_interval` 指标,在保证前者小于后者几倍的前提下适当调小该参数。 +K8s API 资源字段选择器 -##### 排序间隔 {#inputs.ebpf.profile.memory.sort_interval} +#### K8s API List 页大小 {#inputs.resources.kubernetes.api_list_page_size} **标签**: -`hot_update` -ee_feature +agent_restart **FQCN**: -`inputs.ebpf.profile.memory.sort_interval` +`inputs.resources.kubernetes.api_list_page_size` + +Upgrade from old version: `static_config.kubernetes-api-list-limit` **默认值**: ```yaml inputs: - ebpf: - profile: - memory: - sort_interval: 1500ms + resources: + kubernetes: + api_list_page_size: 1000 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['1ns', '10s'] | +| Type | int | +| Range | [10, 4294967295] | **详细描述**: -为了匹配 mallocs 和 frees,内存剖析会在处理前对数据按时间戳排序。 -该参数控制排序数组中第一个和最后一个元素之间的时间间隔的最大值。 -配置该选项可以参考采集器性能统计 `deepflow_agent_ebpf_memory_profiler` 中 -`time_backtracked` 指标,增大该参数使之为 0 即可。注意可能需要相应增大 `sort_length` 参数。 +用于指定 K8s 资源获取分页大小。 -##### 队列大小 {#inputs.ebpf.profile.memory.queue_size} +#### K8s API List 最大间隔 {#inputs.resources.kubernetes.api_list_max_interval} **标签**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.profile.memory.queue_size` +`inputs.resources.kubernetes.api_list_max_interval` + +Upgrade from old version: `static_config.kubernetes-api-list-interval` **默认值**: ```yaml inputs: - ebpf: - profile: - memory: - queue_size: 32768 + resources: + kubernetes: + api_list_max_interval: 10m ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [4096, 64000000] | +| Type | duration | +| Range | ['10m', '30d'] | **详细描述**: -内存剖析组件内部的队列大小。 -配置该选项可以参考采集器性能统计 `deepflow_agent_ebpf_memory_profiler` 中 -`overwritten` 和 `pending` 指标,增大该配置使得前者为 0,后者不高于该配置即可。 - -#### 预处理 {#inputs.ebpf.profile.preprocess} +当 watcher 未收到更新时,获取 K8s 资源的间隔时间。 -##### 函数栈压缩 {#inputs.ebpf.profile.preprocess.stack_compression} +#### Ingress Flavour {#inputs.resources.kubernetes.ingress_flavour} **标签**: -agent_restart + +deprecated **FQCN**: -`inputs.ebpf.profile.preprocess.stack_compression` +`inputs.resources.kubernetes.ingress_flavour` -Upgrade from old version: `static_config.ebpf.preprocess.stack-compression` - -**默认值**: -```yaml -inputs: - ebpf: - profile: - preprocess: - stack_compression: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -发送数据之前压缩函数调用栈。压缩能够有效降低 agent 的内存开销、数据传输的带宽消耗、以及 -ingester 的 CPU 开销,但是 Agent 也会因此消耗更多的 CPU。测试表明,将deepflow-agent 自身的 -on-cpu 函数调用栈压缩,可以将带宽消耗降低 `x` 倍,但会使得 agent 额外消耗 `y%` 的 CPU。 - -#### 语言特定剖析 {#inputs.ebpf.profile.languages} - -控制对哪些解释型语言进行剖析。禁用不使用的语言可以节省每个语言约 5-6 MB 内存。 -总内存占用:~17-20 MB(全部启用),~6.1 MB(仅 Python),~5.2 MB(仅 PHP),~6.4 MB(仅 Node.js)。 - -##### 禁用 Python 剖析 {#inputs.ebpf.profile.languages.python_disabled} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.languages.python_disabled` - -**默认值**: -```yaml -inputs: - ebpf: - profile: - languages: - python_disabled: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -禁用 Python 解释器剖析。禁用后将不采集 Python 进程的函数调用栈, -可节省约 6.1 MB 内核内存(python_tstate_addr_map、python_unwind_info_map、python_offsets_map)。 - -##### 禁用 PHP 剖析 {#inputs.ebpf.profile.languages.php_disabled} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.languages.php_disabled` - -**默认值**: -```yaml -inputs: - ebpf: - profile: - languages: - php_disabled: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -禁用 PHP 解释器剖析。禁用后将不采集 PHP 进程的函数调用栈, -可节省约 5.2 MB 内核内存(php_unwind_info_map、php_offsets_map)。 - -##### 禁用 Node.js 剖析 {#inputs.ebpf.profile.languages.nodejs_disabled} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.languages.nodejs_disabled` - -**默认值**: -```yaml -inputs: - ebpf: - profile: - languages: - nodejs_disabled: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -禁用 Node.js(V8)解释器剖析。禁用后将不采集 Node.js 进程的函数调用栈, -可节省约 6.4 MB 内核内存(v8_unwind_info_map)。 - -### 网络 {#inputs.ebpf.network} - -#### NIC optimization Enabled {#inputs.ebpf.network.nic_opt_enabled} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.ebpf.network.nic_opt_enabled` - -**默认值**: -```yaml -inputs: - ebpf: - network: - nic_opt_enabled: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -是否启用网卡优化功能,用于提升多核环境下的网络包处理能力 -以及突发流量承载能力。 - -开启后将综合进行以下优化: - - RSS 硬件队列数量配置 - - RX ring 描述符数量调优 - - 硬件中断与 CPU 亲和性绑定(IRQ 绑核) - - 可选的 XDP CPUMAP 软件重定向分发 - -该优化主要解决 RSS 硬件无法基于封装报文内层头部 -(如 GRE、Double VLAN、VXLAN、ERSPAN)进行哈希分摊的问题, -避免流量集中在单个 CPU 上造成过载和丢包。 - -通过调整 RX ring 描述符数量,可提升突发流量场景下的 -接收缓存能力,降低 ring 满导致的丢包风险。 - -在启用 XDP CPU Redirect 时,数据包会在接收后通过 -CPUMAP 在多个 CPU 核心之间进行软件层重分发, -从而突破硬件 RSS 的能力限制,实现更均衡的负载分布。 - -建议在以下场景开启该功能: - 1)使用 tcpdump 抓包发现该接口流量主要为 GRE、 - Double VLAN、VXLAN 等封装报文; - 2)通过 `top`(按 1)观察各 CPU 软中断占用率时, - 发现某一个 CPU 的 softirq 接近 100%, - 而其他 CPU 软中断占用率明显偏低。 - -为获得最佳性能,建议将中断 CPU 和 XDP 重定向 CPU -配置在与物理网卡相同的 NUMA 节点上。 - -#### 网卡优化配置 {#inputs.ebpf.network.nic_optimize} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.ebpf.network.nic_optimize` - -**默认值**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - interface: '' - irq_cpu_list: '' - rss_channel_count: 0 - rx_ring_size: 0 - xdp_cpu_redirect: false - xdp_cpu_redirect_list: '' - xdp_queue_size: 2048 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | dict | - -**详细描述**: - -针对指定网卡接口进行性能优化配置。 - -该功能通过调优 RSS 队列、中断绑核、RX ring 大小、 -以及可选的 XDP CPUMAP 重定向机制,提升多核扩展能力 -和突发流量承载能力。 - -建议在以下场景开启: - - 接口流量主要为 GRE、Double VLAN、VXLAN、ERSPAN 等封装报文; - - 某个 CPU 的 softirq 占用率接近 100%,而其他 CPU 空闲。 - -为了获得更好的性能,程序会自动停用 irqbalance 服务,以防止网卡中断在 CPU 之间迁移。 - -可为多个接口分别配置优化策略。 - -样例: -```yaml -inputs: - ebpf: - network: - nic_opt_enabled: true - nic_optimize: - - interface: eth0 - rx_ring_size: 4096 - rss_channel_count: 2 - irq_cpu_list: 1,2 - xdp_cpu_redirect: true - xdp_queue_size: 2048 - xdp_cpu_redirect_list: 4,5,6,7 - - interface: eth1 - rx_ring_size: 4096 - rss_channel_count: 2 - irq_cpu_list: 1,2 - xdp_cpu_redirect: true - xdp_queue_size: 2048 - xdp_cpu_redirect_list: 4,5,6,7 -``` - -##### 网卡接口 {#inputs.ebpf.network.nic_optimize.interface} - -**标签**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.interface` - -**默认值**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - interface: '' -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -需要进行优化的网卡接口名称。 - -##### RX Ring 描述符数量 {#inputs.ebpf.network.nic_optimize.rx_ring_size} - -**标签**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.rx_ring_size` - -**默认值**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - rx_ring_size: 0 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**详细描述**: - -网卡接收环(RX ring)的描述符数量。 - -增大该值可提升突发流量场景下的缓存能力, -降低因 ring 满导致的丢包风险。 -具体使用`ethtool -g ` 查看当前配置,根据实际情况调整到合适的值。 - -默认值为 0 表示保持原状忽略此项配置。 - -##### RSS 队列数量 {#inputs.ebpf.network.nic_optimize.rss_channel_count} - -**标签**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.rss_channel_count` - -**默认值**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - rss_channel_count: 0 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**详细描述**: - -RSS 硬件队列数量。 -数据包在物理网卡完成哈希后,将分发到指定数量的队列并触发中断。 - -最大一般支持 16,且不要超过逻辑 CPU 核数。 -具体使用`ethtool -l ` 查看当前配置,根据实际情况调整到合适的值。 - -当启用 XDP CPU Redirect 时建议设置为 1。 -默认值为 0 表示保持原状忽略此项配置。 - -##### 硬件中断 CPU 列表 {#inputs.ebpf.network.nic_optimize.irq_cpu_list} - -**标签**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.irq_cpu_list` - -**默认值**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - irq_cpu_list: '' -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -用于处理网卡硬件中断的 CPU ID 或 CPU 列表。 - -数量建议与 RSS 队列数量一致。 -若启用 XDP CPU Redirect,仅需指定一个 CPU。 - -可设置为: - - 指定 CPU 列表(如 2,4,6) - - local(自动匹配本地 NUMA 节点 CPU) - -建议所选 CPU 与物理网卡位于同一 NUMA 节点。 - -##### 启用 XDP CPU 重定向 {#inputs.ebpf.network.nic_optimize.xdp_cpu_redirect} - -**标签**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.xdp_cpu_redirect` - -**默认值**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - xdp_cpu_redirect: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -是否启用 XDP CPUMAP 重定向分发。 - -用于解决硬件 RSS 无法对封装报文 -(如 Double VLAN、ERSPAN 等)进行均匀分摊, -导致单核过载和丢包的问题。 - -##### XDP 队列大小 {#inputs.ebpf.network.nic_optimize.xdp_queue_size} - -**标签**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.xdp_queue_size` - -**默认值**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - xdp_queue_size: 2048 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**详细描述**: - -XDP CPUMAP 队列大小。 - -取值范围:[512, 8192],建议配置为 2 的幂。 - -增大可提升突发流量承载能力,但会占用更多内存。 - -##### XDP 重定向 CPU 列表 {#inputs.ebpf.network.nic_optimize.xdp_cpu_redirect_list} - -**标签**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.xdp_cpu_redirect_list` - -**默认值**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - xdp_cpu_redirect_list: '' -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -XDP 重定向后用于处理数据包的 CPU 列表。 - -填写样式如:4,6,8 - -### 调优 {#inputs.ebpf.tunning} - -#### 采集队列大小 {#inputs.ebpf.tunning.collector_queue_size} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.collector_queue_size` - -Upgrade from old version: `static_config.ebpf-collector-queue-size` - -**默认值**: -```yaml -inputs: - ebpf: - tunning: - collector_queue_size: 65535 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [4096, 64000000] | - -**详细描述**: - -以下 deepflow-agent 的 eBPF 数据采集队列大小(分别限制): -- 0-ebpf-to-ebpf-collector -- 1-proc-event-to-sender -- 1-profile-to-sender - -#### 用户态工作线程数 {#inputs.ebpf.tunning.userspace_worker_threads} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.userspace_worker_threads` - -Upgrade from old version: `static_config.ebpf.thread-num` - -**默认值**: -```yaml -inputs: - ebpf: - tunning: - userspace_worker_threads: 1 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [1, 1024] | - -**详细描述**: - -参与用户态数据处理的工作线程数量。实际最大值为主机 CPU 逻辑核心数。 - -#### Kick 线程 Nice 值 {#inputs.ebpf.tunning.kick_kern_nice} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.kick_kern_nice` - -**默认值**: -```yaml -inputs: - ebpf: - tunning: - kick_kern_nice: 0 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [-20, 19] | - -**详细描述**: - -控制每个 CPU 上 kick 线程使用的 Linux nice 值。 - -这些线程会在周期性定时器到期后唤醒,并通过轻量级 syscall -触发内核侧超时检查,将批量 eBPF 数据从缓冲区中推送出来。 - -当“指标中心”中 `deepflow_tenant -> deepflow_agent_ebpf_collector` -下的 `metrics.period_push_max_delay` 达到 199ms 时,需要关注这个 -配置项。这说明周期性 push 延迟已经触发超限标记,此时可以适当降低 -该配置项的取值,以提高 kick 线程的调度倾向。 - -更小的 nice 值意味着更高的调度倾向,更大的 nice 值意味着更低的 -调度倾向。取值范围为 -20 到 19。负值可能需要 CAP_SYS_NICE 或 -足够的 RLIMIT_NICE。该配置仍然可能对其他负载产生影响。 - -#### Perf Page 数量 {#inputs.ebpf.tunning.perf_pages_count} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.perf_pages_count` - -Upgrade from old version: `static_config.ebpf.perf-pages-count` - -**默认值**: -```yaml -inputs: - ebpf: - tunning: - perf_pages_count: 128 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [32, 8192] | - -**详细描述**: - -内核共享内存占用的页数。值为 `2^n (5 <= n <= 13)`。用于 perf 数据传输。 -如果值在 `2^n` 和 `2^(n+1)` 之间,将自动调整到最小值 `2^n`。 -页的大小为4KB。 - -#### 内核环形队列大小 {#inputs.ebpf.tunning.kernel_ring_size} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.kernel_ring_size` - -Upgrade from old version: `static_config.ebpf.ring-size` - -**默认值**: -```yaml -inputs: - ebpf: - tunning: - kernel_ring_size: 65536 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [8192, 131072] | - -**详细描述**: - -内核环形队列的大小。值为 `2^n (13 <= n <= 17)`。 -如果值在 `2^n` 和 `2^(n+1)` 之间,将自动调整到最小值 `2^n`。 - -#### 最大 Socket 条目数 {#inputs.ebpf.tunning.max_socket_entries} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.max_socket_entries` - -Upgrade from old version: `static_config.ebpf.max-socket-entries` - -**默认值**: -```yaml -inputs: - ebpf: - tunning: - max_socket_entries: 131072 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [10000, 2000000] | - -**详细描述**: - -设置 socket tracking 哈希表的最大条目数,根据实际场景中的并发请求数量而定。 - -#### Socket Map 回收阈值 {#inputs.ebpf.tunning.socket_map_reclaim_threshold} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.socket_map_reclaim_threshold` - -Upgrade from old version: `static_config.ebpf.socket-map-max-reclaim` - -**默认值**: -```yaml -inputs: - ebpf: - tunning: - socket_map_reclaim_threshold: 120000 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [8000, 2000000] | - -**详细描述**: - -Socket map 表条目清理阈值。 - -#### 最大 Trace 条目数 {#inputs.ebpf.tunning.max_trace_entries} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.max_trace_entries` - -Upgrade from old version: `static_config.ebpf.max-trace-entries` - -**默认值**: -```yaml -inputs: - ebpf: - tunning: - max_trace_entries: 131072 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [10000, 2000000] | - -**详细描述**: - -线程和协程追踪的最大哈希表条目数。 - -## 资源 {#inputs.resources} - -### 推送间隔 {#inputs.resources.push_interval} - -**标签**: - -`hot_update` - -**FQCN**: - -`inputs.resources.push_interval` - -Upgrade from old version: `platform_sync_interval` - -**默认值**: -```yaml -inputs: - resources: - push_interval: 10s -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | duration | -| Range | ['10s', '3600s'] | - -**详细描述**: - -deepflow-agent 主动向 deepflow-server 上报/同步资源信息的时间间隔。 - -### 启用云主机资源同步 {#inputs.resources.workload_resource_sync_enabled} - -**标签**: - -`hot_update` - -**FQCN**: - -`inputs.resources.workload_resource_sync_enabled` - -**默认值**: -```yaml -inputs: - resources: - workload_resource_sync_enabled: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -开启开关后,deepflow-server 基于 deepflow-agent 上报的运行环境信息,生成一个云主机资源。 -用于无法通过云平台 API 同步云主机资源的场景,也可用于同步非云环境中普通物理服务器的资源信息。 - -### 采集专有云资源 {#inputs.resources.private_cloud} - -#### 启用云宿主机资源 {#inputs.resources.private_cloud.hypervisor_resource_enabled} - -**标签**: - -`hot_update` - -**FQCN**: - -`inputs.resources.private_cloud.hypervisor_resource_enabled` - -Upgrade from old version: `platform_enabled` - -**默认值**: -```yaml -inputs: - resources: - private_cloud: - hypervisor_resource_enabled: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -开启开关后,deepflow-agent 将采集 KVM 或 Linux 宿主机中的 VM 信息和网络信息,并上报/同步至 deepflow-server。 -采集的信息包括: -- raw_all_vm_xml -- raw_vm_states -- raw_ovs_interfaces -- raw_ovs_ports -- raw_brctl_show -- raw_vlan_config - -#### 虚拟机 MAC 源 {#inputs.resources.private_cloud.vm_mac_source} - -**标签**: - -`hot_update` - -**FQCN**: - -`inputs.resources.private_cloud.vm_mac_source` - -Upgrade from old version: `if_mac_source` - -**默认值**: -```yaml -inputs: - resources: - private_cloud: - vm_mac_source: 0 -``` - -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| 0 | 网卡 MAC 地址 | -| 1 | 网卡名称 | -| 2 | Qemu XML 文件 | - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**详细描述**: - -配置 deepflow-agent 提取 VM 真实 MAC 地址的方法: -- 网卡 MAC 地址: 从 tap 接口的 MAC 地址中提取 VM 的 MAC 地址 -- 网卡名称: 从 tap 接口的名字中提取 MAC 地址 -- Qemu XML 文件: 从 VM XML 文件中提取 MAC 地址 - -#### 虚拟机 XML 文件夹 {#inputs.resources.private_cloud.vm_xml_directory} - -**标签**: - -`hot_update` - -**FQCN**: - -`inputs.resources.private_cloud.vm_xml_directory` - -Upgrade from old version: `vm_xml_path` - -**默认值**: -```yaml -inputs: - resources: - private_cloud: - vm_xml_directory: /etc/libvirt/qemu/ -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | -| Range | [0, 100] | - -**详细描述**: - -宿主机中存放 VM XML 文件的目录 - -#### 虚拟机 MAC 映射脚本 {#inputs.resources.private_cloud.vm_mac_mapping_script} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.private_cloud.vm_mac_mapping_script` - -Upgrade from old version: `static_config.tap-mac-script` - -**默认值**: -```yaml -inputs: - resources: - private_cloud: - vm_mac_mapping_script: '' -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | -| Range | [0, 100] | - -**详细描述**: - -复杂环境中,TAP 网卡的 MAC 地址映射关系可以通过编写脚本实现。使用脚本时需要满足以下条件: -1. if_mac_source = 2 -2. tap_mode = 0 -3. TAP 网卡的名称与虚拟机 XML 文件中的名称相同 -4. 脚本输出格式如下: - - tap2d283dfe,11:22:33:44:55:66 - - tap2d283223,aa:bb:cc:dd:ee:ff - -### 采集 K8s 资源 {#inputs.resources.kubernetes} - -#### K8s 命名空间 {#inputs.resources.kubernetes.kubernetes_namespace} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.kubernetes_namespace` - -Upgrade from old version: `static_config.kubernetes-namespace` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - kubernetes_namespace: null -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -指定采集器获取 K8s 资源时的命名空间 - -#### K8s API 资源 {#inputs.resources.kubernetes.api_resources} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_resources` - -Upgrade from old version: `static_config.kubernetes-resources` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: namespaces - - name: nodes - - name: pods - - name: replicationcontrollers - - name: services - - name: daemonsets - - name: deployments - - name: replicasets - - name: statefulsets - - name: ingresses - - name: configmaps -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | dict | - -**详细描述**: - -指定采集器采集的 K8s 资源。 - -列表中的条目格式如下: -{ - name: string - group: string - version: string - disabled: bool - field_selector: string -} - -默认采集的资源如下: -- namespaces -- nodes -- pods -- replicationcontrollers -- services -- daemonsets -- deployments -- replicasets -- statefulsets -- ingresses -- configmaps - -禁用某个资源,在列表中添加 `disabled: true` 的条目: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: services - disabled: true -``` - -启用某个资源,在列表中添加该资源的条目。注意该设置会覆盖默认的资源采集。 -例如,要启用在 group `apps` 和 `apps.kruise.io` 中的 `statefulsets`,需要添加两个条目: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: statefulsets - group: apps - - name: statefulsets - group: apps.kruise.io - version: v1beta1 -``` - -要采集 openshift 中的 `routes`,可以使用以下设置: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: ingresses - disabled: true - - name: routes -``` - -##### 名称 {#inputs.resources.kubernetes.api_resources.name} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_resources.name` - -Upgrade from old version: `static_config.kubernetes-resources.name` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: '' -``` - -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| namespaces | | -| nodes | | -| pods | | -| replicationcontrollers | | -| services | | -| daemonsets | | -| deployments | | -| replicasets | | -| statefulsets | | -| ingresses | | -| routes | | -| servicerules | | -| clonesets | | -| ippools | | -| opengaussclusters | | -| configmaps | | - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -K8s API 资源名 - -##### 组 {#inputs.resources.kubernetes.api_resources.group} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_resources.group` - -Upgrade from old version: `static_config.kubernetes-resources.group` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - group: '' -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -K8s API 资源组 - -##### 版本 {#inputs.resources.kubernetes.api_resources.version} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_resources.version` - -Upgrade from old version: `static_config.kubernetes-resources.version` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - version: '' -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -K8s API 版本 - -##### Disabled {#inputs.resources.kubernetes.api_resources.disabled} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_resources.disabled` - -Upgrade from old version: `static_config.kubernetes-resources.disabled` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - disabled: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -禁用 K8s API 资源 - -##### Field Selector {#inputs.resources.kubernetes.api_resources.field_selector} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_resources.field_selector` - -Upgrade from old version: `static_config.kubernetes-resources.field-selector` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - field_selector: '' -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -K8s API 资源字段选择器 - -#### K8s API List 页大小 {#inputs.resources.kubernetes.api_list_page_size} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_list_page_size` - -Upgrade from old version: `static_config.kubernetes-api-list-limit` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - api_list_page_size: 1000 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [10, 4294967295] | - -**详细描述**: - -用于指定 K8s 资源获取分页大小。 - -#### K8s API List 最大间隔 {#inputs.resources.kubernetes.api_list_max_interval} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_list_max_interval` - -Upgrade from old version: `static_config.kubernetes-api-list-interval` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - api_list_max_interval: 10m -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | duration | -| Range | ['10m', '30d'] | - -**详细描述**: - -当 watcher 未收到更新时,获取 K8s 资源的间隔时间。 - -#### Ingress Flavour {#inputs.resources.kubernetes.ingress_flavour} - -**标签**: - - -deprecated - -**FQCN**: - -`inputs.resources.kubernetes.ingress_flavour` - -Upgrade from old version: `static_config.ingress-flavour` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - ingress_flavour: kubernetes -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -#### Pod MAC 地址采集方法 {#inputs.resources.kubernetes.pod_mac_collection_method} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.pod_mac_collection_method` - -Upgrade from old version: `static_config.kubernetes-poller-type` - -**默认值**: -```yaml -inputs: - resources: - kubernetes: - pod_mac_collection_method: adaptive -``` - -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| adaptive | | -| active | | -| passive | | - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -- passive: deepflow-agent 采集 ARP/ND 数据包 计算其他 POD 的 MAC 和 IP 信息。 -- active: deepflow-agent 通过 setns 进入其他 POD 的 netns 查询 MAC 和 IP 信息(部署 - 时需要 SYS_ADMIN 权限)。 -- adaptive: deepflow-agent 优先使用 active 模式获取其他 POD 的 MAC 和 IP 信息。 - -### 从控制器拉取资源 {#inputs.resources.pull_resource_from_controller} - -DeepFlow-server 从控制器拉取资源的配置。 -DeepFlow-agent 不会读取此部分。 - -#### 云平台过滤器 {#inputs.resources.pull_resource_from_controller.domain_filter} - -**标签**: - -`hot_update` - -**FQCN**: - -`inputs.resources.pull_resource_from_controller.domain_filter` - -Upgrade from old version: `domains` - -**默认值**: -```yaml -inputs: - resources: - pull_resource_from_controller: - domain_filter: - - '0' -``` - -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| _DYNAMIC_OPTIONS_ | _DYNAMIC_OPTIONS_ | - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -在运行过程中 deepflow-agent 周期性从 deepflow-server 获取 IP、MAC 列表,用于 -向采集的观测数据注入标签。该参数可以控制向 deepflow-agent 发送的 IP、MAC 数据范围, -以减少下发的数据量。当业务系统中不存在跨云平台的服务访问时,可以配置仅向 deepflow-agent -下发本云平台的数据。参数的默认值为`0`,表示获取所有云平台的数据;也可以设置 lcuuid 列表, -仅获取部分云平台的数据。 - -#### 仅下发本集群中的 K8s Pod IP {#inputs.resources.pull_resource_from_controller.only_kubernetes_pod_ip_in_local_cluster} - -**标签**: - -`hot_update` - -**FQCN**: - -`inputs.resources.pull_resource_from_controller.only_kubernetes_pod_ip_in_local_cluster` - -Upgrade from old version: `pod_cluster_internal_ip` +Upgrade from old version: `static_config.ingress-flavour` **默认值**: ```yaml inputs: resources: - pull_resource_from_controller: - only_kubernetes_pod_ip_in_local_cluster: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -运行过程中 deepflow-agent 周期性从 deepflow-server 获取 IP、MAC 列表,用于 -向采集的观测数据注入标签。该参数可以控制向 deepflow-agent 发送的 IP、MAC 数据范围, -减少下发的数据量。当 Kubernetes 内部的 POD IP 不会直接与外部通信时,可以配置仅向 deepflow-agent -下发本集群的 POD IP、MAC 数据。参数默认值为 `false`,表示发送全部。 - -## 集成 {#inputs.integration} - -### Enabled {#inputs.integration.enabled} - -**标签**: - -`hot_update` - -**FQCN**: - -`inputs.integration.enabled` - -Upgrade from old version: `external_agent_http_proxy_enabled` - -**默认值**: -```yaml -inputs: - integration: - enabled: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -开关开启后,deepflow-agent 将开启外部数据的接收服务接口,以集成来自 Prometheus、 -Telegraf、OpenTelemetry 和 Skywalking、Vector 的数据。 - -### 监听端口 {#inputs.integration.listen_port} - -**标签**: - -`hot_update` - -**FQCN**: - -`inputs.integration.listen_port` - -Upgrade from old version: `external_agent_http_proxy_port` - -**默认值**: -```yaml -inputs: - integration: - listen_port: 38086 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [1, 65535] | - -**详细描述**: - -deepflow-agent 外部数据接收服务的监听端口。 - -### 压缩 {#inputs.integration.compression} - -#### Trace {#inputs.integration.compression.trace} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.integration.compression.trace` - -Upgrade from old version: `static_config.external-agent-http-proxy-compressed` - -**默认值**: -```yaml -inputs: - integration: - compression: - trace: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -开启后,deepflow-agent 将对集成的追踪数据进行压缩处理,压缩比例在 5:1~10:1 之间。注意: -开启此特性将增加 deepflow-agent 的 CPU 消耗。 - -#### Profile {#inputs.integration.compression.profile} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.integration.compression.profile` - -Upgrade from old version: `static_config.external-agent-http-proxy-profile-compressed` - -**默认值**: -```yaml -inputs: - integration: - compression: - profile: true + kubernetes: + ingress_flavour: kubernetes ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -开启后,deepflow-agent 将对集成的剖析数据进行压缩处理,压缩比例在 5:1~10:1 之间。注意: -开启此特性将增加 deepflow-agent 的 CPU 消耗。 - -### Prometheus 额外 Label {#inputs.integration.prometheus_extra_labels} - -deepflow-agent 支持从 Prometheus RemoteWrite 的 http header 中获取额外的 label。 +| Type | string | -#### Enabled {#inputs.integration.prometheus_extra_labels.enabled} +#### Pod MAC 地址采集方法 {#inputs.resources.kubernetes.pod_mac_collection_method} **标签**: @@ -6772,46 +4958,24 @@ deepflow-agent 支持从 Prometheus RemoteWrite 的 http header 中获取额外 **FQCN**: -`inputs.integration.prometheus_extra_labels.enabled` +`inputs.resources.kubernetes.pod_mac_collection_method` -Upgrade from old version: `static_config.prometheus-extra-config.enabled` +Upgrade from old version: `static_config.kubernetes-poller-type` **默认值**: ```yaml inputs: - integration: - prometheus_extra_labels: - enabled: false + resources: + kubernetes: + pod_mac_collection_method: adaptive ``` -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -Prometheus 额外 lable 的获取开关。 - -#### 额外 Label {#inputs.integration.prometheus_extra_labels.extra_labels} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.integration.prometheus_extra_labels.extra_labels` - -Upgrade from old version: `static_config.prometheus-extra-config.labels` - -**默认值**: -```yaml -inputs: - integration: - prometheus_extra_labels: - extra_labels: [] -``` +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| adaptive | | +| active | | +| passive | | **模式**: | Key | Value | @@ -6820,115 +4984,73 @@ inputs: **详细描述**: -Prometheus 额外 label 的列表。 - -#### Label 键总长度限制 {#inputs.integration.prometheus_extra_labels.label_length} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.integration.prometheus_extra_labels.label_length` - -Upgrade from old version: `static_config.prometheus-extra-config.labels-limit` - -**默认值**: -```yaml -inputs: - integration: - prometheus_extra_labels: - label_length: 1024 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [1024, 1048576] | - -**详细描述**: - -deepflow-agent 对 Prometheus 额外 label 解析并采集时,key 字段长度总和的上限。 - -#### Label 值总长度限制 {#inputs.integration.prometheus_extra_labels.value_length} - -**标签**: - -agent_restart - -**FQCN**: - -`inputs.integration.prometheus_extra_labels.value_length` - -Upgrade from old version: `static_config.prometheus-extra-config.values-limit` - -**默认值**: -```yaml -inputs: - integration: - prometheus_extra_labels: - value_length: 4096 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [4096, 4194304] | - -**详细描述**: +- passive: deepflow-agent 采集 ARP/ND 数据包 计算其他 POD 的 MAC 和 IP 信息。 +- active: deepflow-agent 通过 setns 进入其他 POD 的 netns 查询 MAC 和 IP 信息(部署 + 时需要 SYS_ADMIN 权限)。 +- adaptive: deepflow-agent 优先使用 active 模式获取其他 POD 的 MAC 和 IP 信息。 -deepflow-agent 对 Prometheus 额外 label 解析并采集时,value 字段长度总和的上限。 +### 从控制器拉取资源 {#inputs.resources.pull_resource_from_controller} -### 特性开关 {#inputs.integration.feature_control} +DeepFlow-server 从控制器拉取资源的配置。 +DeepFlow-agent 不会读取此部分。 -#### 禁用 Profile 集成 {#inputs.integration.feature_control.profile_integration_disabled} +#### 云平台过滤器 {#inputs.resources.pull_resource_from_controller.domain_filter} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.integration.feature_control.profile_integration_disabled` +`inputs.resources.pull_resource_from_controller.domain_filter` -Upgrade from old version: `static_config.external-profile-integration-disabled` +Upgrade from old version: `domains` **默认值**: ```yaml inputs: - integration: - feature_control: - profile_integration_disabled: false + resources: + pull_resource_from_controller: + domain_filter: + - '0' ``` +**枚举可选值**: +| Value | Note | +| ----- | ---------------------------- | +| _DYNAMIC_OPTIONS_ | _DYNAMIC_OPTIONS_ | + **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | -#### 禁用 Trace 集成 {#inputs.integration.feature_control.trace_integration_disabled} +**详细描述**: + +在运行过程中 deepflow-agent 周期性从 deepflow-server 获取 IP、MAC 列表,用于 +向采集的观测数据注入标签。该参数可以控制向 deepflow-agent 发送的 IP、MAC 数据范围, +以减少下发的数据量。当业务系统中不存在跨云平台的服务访问时,可以配置仅向 deepflow-agent +下发本云平台的数据。参数的默认值为`0`,表示获取所有云平台的数据;也可以设置 lcuuid 列表, +仅获取部分云平台的数据。 + +#### 仅下发本集群中的 K8s Pod IP {#inputs.resources.pull_resource_from_controller.only_kubernetes_pod_ip_in_local_cluster} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.integration.feature_control.trace_integration_disabled` +`inputs.resources.pull_resource_from_controller.only_kubernetes_pod_ip_in_local_cluster` -Upgrade from old version: `static_config.external-trace-integration-disabled` +Upgrade from old version: `pod_cluster_internal_ip` **默认值**: ```yaml inputs: - integration: - feature_control: - trace_integration_disabled: false + resources: + pull_resource_from_controller: + only_kubernetes_pod_ip_in_local_cluster: false ``` **模式**: @@ -6936,24 +5058,32 @@ inputs: | ---- | ---------------------------- | | Type | bool | -#### 禁用 Metric 集成 {#inputs.integration.feature_control.metric_integration_disabled} +**详细描述**: + +运行过程中 deepflow-agent 周期性从 deepflow-server 获取 IP、MAC 列表,用于 +向采集的观测数据注入标签。该参数可以控制向 deepflow-agent 发送的 IP、MAC 数据范围, +减少下发的数据量。当 Kubernetes 内部的 POD IP 不会直接与外部通信时,可以配置仅向 deepflow-agent +下发本集群的 POD IP、MAC 数据。参数默认值为 `false`,表示发送全部。 + +## 集成 {#inputs.integration} + +### Enabled {#inputs.integration.enabled} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.integration.feature_control.metric_integration_disabled` +`inputs.integration.enabled` -Upgrade from old version: `static_config.external-metric-integration-disabled` +Upgrade from old version: `external_agent_http_proxy_enabled` **默认值**: ```yaml inputs: integration: - feature_control: - metric_integration_disabled: false + enabled: true ``` **模式**: @@ -6961,49 +5091,60 @@ inputs: | ---- | ---------------------------- | | Type | bool | -#### 禁用 Log 集成 {#inputs.integration.feature_control.log_integration_disabled} +**详细描述**: + +开关开启后,deepflow-agent 将开启外部数据的接收服务接口,以集成来自 Prometheus、 +Telegraf、OpenTelemetry 和 Skywalking、Vector 的数据。 + +### 监听端口 {#inputs.integration.listen_port} **标签**: -agent_restart +`hot_update` **FQCN**: -`inputs.integration.feature_control.log_integration_disabled` +`inputs.integration.listen_port` -Upgrade from old version: `static_config.external-log-integration-disabled` +Upgrade from old version: `external_agent_http_proxy_port` **默认值**: ```yaml inputs: integration: - feature_control: - log_integration_disabled: false + listen_port: 38086 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Range | [1, 65535] | + +**详细描述**: + +deepflow-agent 外部数据接收服务的监听端口。 -## vector {#inputs.vector} +### 压缩 {#inputs.integration.compression} -### 启用 Vector 组件 {#inputs.vector.enabled} +#### Trace {#inputs.integration.compression.trace} **标签**: -`hot_update` -ee_feature +agent_restart **FQCN**: -`inputs.vector.enabled` +`inputs.integration.compression.trace` + +Upgrade from old version: `static_config.external-agent-http-proxy-compressed` **默认值**: ```yaml inputs: - vector: - enabled: false + integration: + compression: + trace: true ``` **模式**: @@ -7013,354 +5154,44 @@ inputs: **详细描述**: -对 Vector 组件的开关控制。 +开启后,deepflow-agent 将对集成的追踪数据进行压缩处理,压缩比例在 5:1~10:1 之间。注意: +开启此特性将增加 deepflow-agent 的 CPU 消耗。 -### Vector 组件配置控制 {#inputs.vector.config} +#### Profile {#inputs.integration.compression.profile} **标签**: -`hot_update` -ee_feature +agent_restart **FQCN**: -`inputs.vector.config` +`inputs.integration.compression.profile` + +Upgrade from old version: `static_config.external-agent-http-proxy-profile-compressed` **默认值**: ```yaml inputs: - vector: - config: null + integration: + compression: + profile: true ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | dict | +| Type | bool | **详细描述**: -Vector 组件的具体配置,所有可用配置可在此链接中查找:[vector.dev](https://vector.dev/docs/reference/configuration) -以下提供一份抓取 kubernetes 日志、宿主机指标及 kubernetes kubelet 指标的示例,并将这些数据发送到 DeepFlow-Agent。 - -抓取主机指标 -`K8S_NODE_NAME_FOR_DEEPFLOW` 变量仅容器环境必须,非容器环境可以去掉 -```yaml -sources: - host_metrics: - type: host_metrics - scrape_interval_secs: 10 - namespace: node -transforms: - host_process_filter: - type: filter - condition: '!starts_with(string!(.name), "process_")' - inputs: - - host_metrics - host_metrics_relabel: - type: remap - inputs: - - host_process_filter - source: | - .tags.instance = "${K8S_NODE_IP_FOR_DEEPFLOW}" - host_name, _ = get_env_var("K8S_NODE_NAME_FOR_DEEPFLOW") - if !is_empty(host_name) { - .tags.host = host_name - } - metrics_map = { - "boot_time": "boot_time_seconds", - "memory_active_bytes": "memory_Active_bytes", - "memory_available_bytes": "memory_MemAvailable_bytes", - "memory_buffers_bytes": "memory_Buffers_bytes", - "memory_cached_bytes": "memory_Cached_bytes", - "memory_free_bytes": "memory_MemFree_bytes", - "memory_swap_free_bytes": "memory_SwapFree_bytes", - "memory_swap_total_bytes": "memory_SwapTotal_bytes", - "memory_swap_used_bytes": "memory_SwapCached_bytes", - "memory_total_bytes": "memory_MemTotal_bytes", - "network_transmit_packets_drop_total": "network_transmit_drop_total", - "uptime": "uname_info", - "filesystem_total_bytes": "filesystem_size_bytes", - } - metric_name = get!(value: metrics_map, path: [.name]) - if !is_null(metric_name) { - .name = metric_name - } - if .tags.collector == "filesystem" { - .tags.fstype = .tags.filesystem - del(.tags.filesystem) - } -sinks: - prometheus_remote_write: - type: prometheus_remote_write - inputs: - - host_metrics_relabel - endpoint: http://127.0.0.1:38086/api/v1/prometheus - healthcheck: - enabled: false - -``` - -抓取 kubernetes 指标 -```yaml -secret: - kube_token: - type: directory - path: /var/run/secrets/kubernetes.io/serviceaccount -sources: - cadvisor_metrics: - type: prometheus_scrape - endpoints: - - https://${K8S_NODE_IP_FOR_DEEPFLOW}:10250/metrics/cadvisor - auth: - strategy: bearer - token: SECRET[kube_token.token] - scrape_interval_secs: 10 - scrape_timeout_secs: 10 - honor_labels: true - instance_tag: instance - endpoint_tag: metrics_endpoint - tls: - verify_certificate: false - kubelet_metrics: - type: prometheus_scrape - endpoints: - - https://${K8S_NODE_IP_FOR_DEEPFLOW}:10250/metrics - auth: - strategy: bearer - token: SECRET[kube_token.token] - scrape_interval_secs: 10 - scrape_timeout_secs: 10 - honor_labels: true - instance_tag: instance - endpoint_tag: metrics_endpoint - tls: - verify_certificate: false - kube_state_metrics: - type: prometheus_scrape - endpoints: - - http://opensource-kube-state-metrics:8080/metrics - scrape_interval_secs: 10 - scrape_timeout_secs: 10 - honor_labels: true - instance_tag: instance - endpoint_tag: metrics_endpoint -transforms: - cadvisor_relabel_filter: - type: filter - inputs: - - cadvisor_metrics - condition: "!match(string!(.name), r'container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)|container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)|container_memory_(mapped_file|swap)|container_(file_descriptors|tasks_state|threads_max)')" - kubelet_relabel_filter: - type: filter - inputs: - - kubelet_metrics - condition: "match(string!(.name), r'kubelet_cgroup_(manager_duration_seconds_bucket|manager_duration_seconds_count)|kubelet_node_(config_error|node_name)|kubelet_pleg_relist_(duration_seconds_bucket|duration_seconds_count|interval_seconds_bucket)|kubelet_pod_(start_duration_seconds_count|worker_duration_seconds_bucket|worker_duration_seconds_count)|kubelet_running_(container_count|containers|pod_count|pods)|kubelet_runtime_(operations_duration_seconds_bucket|perations_errors_total|operations_total)|kubelet_volume_stats_(available_bytes|capacity_bytes|inodes|inodes_used)|process_(cpu_seconds_total|resident_memory_bytes)|rest_client_(request_duration_seconds_bucket|requests_total)|storage_operation_(duration_seconds_bucket|duration_seconds_count|errors_total)|up|volume_manager_total_volumes')" - kube_state_relabel_filter: - type: filter - inputs: - - kube_state_metrics - condition: "!match(string!(.name), r'kube_endpoint_address_not_ready|kube_endpoint_address_available')" - common_relabel_config: - type: remap - inputs: - - cadvisor_relabel_filter - - kubelet_relabel_filter - - kube_state_relabel_filter - source: |- - if !is_null(.tags) && is_string(.tags.metrics_endpoint) { - .tags.metrics_path = parse_regex!(.tags.metrics_endpoint, r'https?:\/\/[^\/]+(?\/.*)$').path - } -sinks: - prometheus_remote_write: - type: prometheus_remote_write - inputs: - - common_relabel_config - endpoint: http://127.0.0.1:38086/api/v1/prometheus - healthcheck: - enabled: false - -``` - -抓取 kubernetes 日志(以采集 DeepFlow Pod 日志为例,若需要采集其他 Pod 日志可修改 `extra_label_selector` 并加上具体条件) -```yaml -data_dir: /vector-log-checkpoint -sources: - kubernetes_logs: - self_node_name: ${K8S_NODE_NAME_FOR_DEEPFLOW} - type: kubernetes_logs - namespace_annotation_fields: - namespace_labels: "" - node_annotation_fields: - node_labels: "" - pod_annotation_fields: - pod_annotations: "" - pod_labels: "" - extra_label_selector: "app=deepflow,component!=front-end" - kubernetes_logs_frontend: - self_node_name: ${K8S_NODE_NAME_FOR_DEEPFLOW} - type: kubernetes_logs - namespace_annotation_fields: - namespace_labels: "" - node_annotation_fields: - node_labels: "" - pod_annotation_fields: - pod_annotations: "" - pod_labels: "" - extra_label_selector: "app=deepflow,component=front-end" -transforms: - multiline_kubernetes_logs: - type: reduce - inputs: - - kubernetes_logs - group_by: - - file - - stream - merge_strategies: - message: concat_newline - starts_when: match(string!(.message), r'^(.+=|\[|\[?\u001B\[[0-9;]*m|\[mysql\]\s|\{\".+\"|(::ffff:)?([0-9]{1,3}.){3}[0-9]{1,3}[\s\-]+(\[)?)?\d{4}[-\/\.]?\d{2}[-\/\.]?\d{2}[T\s]?\d{2}:\d{2}:\d{2}') - expire_after_ms: 2000 - flush_period_ms: 500 - flush_kubernetes_logs: - type: remap - inputs: - - multiline_kubernetes_logs - source: |- - .message = replace(string!(.message), r'\u001B\[([0-9]{1,3}(;[0-9]{1,3})*)?m', "") - remap_kubernetes_logs: - type: remap - inputs: - - flush_kubernetes_logs - - kubernetes_logs_frontend - source: |- - if is_string(.message) && is_json(string!(.message)) { - tags = parse_json(.message) ?? {} - ._df_log_type = tags._df_log_type - .org_id = to_int(tags.org_id) ?? 0 - .user_id = to_int(tags.user_id) ?? 0 - .message = tags.message || tags.msg - del(tags._df_log_type) - del(tags.org_id) - del(tags.user_id) - del(tags.message) - del(tags.msg) - .json = tags - } - if !exists(.level) { - if exists(.json) { - .level = to_string!(.json.level) - del(.json.level) - } else { - level_tags = parse_regex(.message, r'[\[\\<](?(?i)INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\]\\>]') ?? {} - if !exists(level_tags.level) { - level_tags = parse_regex(.message, r'[\s](?INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\s]') ?? {} - } - if exists(level_tags.level) { - level_tags.level = upcase(string!(level_tags.level)) - if level_tags.level == "INFORMATION" || level_tags.level == "INFOMATION" { - level_tags.level = "INFO" - } - if level_tags.level == "WARNING" { - level_tags.level = "WARN" - } - if level_tags.level == "DEBU" { - level_tags.level = "DEBUG" - } - if level_tags.level == "ERRO" { - level_tags.level = "ERROR" - } - if level_tags.level == "CRIT" || level_tags.level == "CRITICAL" { - level_tags.level = "FATAL" - } - .level = level_tags.level - } - } - } - if !exists(._df_log_type) { - ._df_log_type = "system" - } - if !exists(.app_service) { - .app_service = .kubernetes.container_name - } -sinks: - http: - type: http - inputs: [remap_kubernetes_logs] - uri: http://127.0.0.1:38086/api/v1/log - encoding: - codec: json - -``` - -使用 http_client 或者 socket 拨测一个远端服务 -```yaml -sources: - http_client_dial: - type: http_client - endpoint: http://$HOST:$PORT - method: GET - scrape_interval_secs: 10 - scrape_timeout_secs: 5 - internal_metrics: - type: internal_metrics - scrape_interval_secs: 10 - namespace: ${K8S_NAMESPACE_FOR_DEEPFLOW} - socket_dial_input: - type: demo_logs - interval: 10 - format: shuffle - lines: [""] -transforms: - internal_metrics_relabel: - type: remap - inputs: - - internal_metrics - source: |- - .tags.instance = "${K8S_NODE_IP_FOR_DEEPFLOW}" - internal_metrics_dispatch: - type: route - inputs: - - internal_metrics_relabel - route: - http_client_dial_metrics: '.tags.component_id == "http_client_dial"' - socket_dial_metrics: '.tags.component_id == "socket_dial"' - http_client_dial_metrics: - type: filter - inputs: - - internal_metrics_dispatch.http_client_dial_metrics - condition: "match(string!(.name),r'http_client_.*')" - socket_dial_metrics: - type: filter - inputs: - - internal_metrics_dispatch.socket_dial_metrics - condition: "match(string!(.name),r'buffer.*')" -sinks: - socket_dial: - type: socket - inputs: - - socket_dial_input - address: $HOST:$PORT - mode: tcp - encoding: - codec: raw_message - prometheus_remote_write: - type: prometheus_remote_write - inputs: - - http_client_dial_metrics - - socket_dial_metrics - endpoint: http://127.0.0.1:38086/api/v1/prometheus - healthcheck: - enabled: false - -``` - -# 处理器 {#processors} +开启后,deepflow-agent 将对集成的剖析数据进行压缩处理,压缩比例在 5:1~10:1 之间。注意: +开启此特性将增加 deepflow-agent 的 CPU 消耗。 -## Packet {#processors.packet} +### Prometheus 额外 Label {#inputs.integration.prometheus_extra_labels} -### Policy {#processors.packet.policy} +deepflow-agent 支持从 Prometheus RemoteWrite 的 http header 中获取额外的 label。 -#### Fast-path 字典大小 {#processors.packet.policy.fast_path_map_size} +#### Enabled {#inputs.integration.prometheus_extra_labels.enabled} **标签**: @@ -7368,30 +5199,28 @@ sinks: **FQCN**: -`processors.packet.policy.fast_path_map_size` +`inputs.integration.prometheus_extra_labels.enabled` -Upgrade from old version: `static_config.fast-path-map-size` +Upgrade from old version: `static_config.prometheus-extra-config.enabled` **默认值**: ```yaml -processors: - packet: - policy: - fast_path_map_size: 0 +inputs: + integration: + prometheus_extra_labels: + enabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [0, 10000000] | +| Type | bool | **详细描述**: -设置为`0`时,deepflow-agent 根据 `global.limits.max_memory` 参数自动调整 Fast-path 字典大小。 -注意:实践中不应配置小于 8000 的值。 +Prometheus 额外 lable 的获取开关。 -#### 禁用 Fast-path {#processors.packet.policy.fast_path_disabled} +#### 额外 Label {#inputs.integration.prometheus_extra_labels.extra_labels} **标签**: @@ -7399,28 +5228,28 @@ processors: **FQCN**: -`processors.packet.policy.fast_path_disabled` +`inputs.integration.prometheus_extra_labels.extra_labels` -Upgrade from old version: `static_config.fast-path-disabled` +Upgrade from old version: `static_config.prometheus-extra-config.labels` **默认值**: ```yaml -processors: - packet: - policy: - fast_path_disabled: false +inputs: + integration: + prometheus_extra_labels: + extra_labels: [] ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **详细描述**: -设置为 `true` 时,deepflow-agent 不启用 fast path。 +Prometheus 额外 label 的列表。 -#### Forward 表容量 {#processors.packet.policy.forward_table_capacity} +#### Label 键总长度限制 {#inputs.integration.prometheus_extra_labels.label_length} **标签**: @@ -7428,29 +5257,30 @@ processors: **FQCN**: -`processors.packet.policy.forward_table_capacity` +`inputs.integration.prometheus_extra_labels.label_length` -Upgrade from old version: `static_config.forward-capacity` +Upgrade from old version: `static_config.prometheus-extra-config.labels-limit` **默认值**: ```yaml -processors: - packet: - policy: - forward_table_capacity: 16384 +inputs: + integration: + prometheus_extra_labels: + label_length: 1024 ``` **模式**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [16384, 64000000] | +| Unit | byte | +| Range | [1024, 1048576] | **详细描述**: -转发表大小,用来存储 MAC-IP 信息,调大该参数,deepflow-agent 将消耗更多的内存。 +deepflow-agent 对 Prometheus 额外 label 解析并采集时,key 字段长度总和的上限。 -#### 最大 First-path 层级 {#processors.packet.policy.max_first_path_level} +#### Label 值总长度限制 {#inputs.integration.prometheus_extra_labels.value_length} **标签**: @@ -7458,285 +5288,258 @@ processors: **FQCN**: -`processors.packet.policy.max_first_path_level` +`inputs.integration.prometheus_extra_labels.value_length` -Upgrade from old version: `static_config.first-path-level` +Upgrade from old version: `static_config.prometheus-extra-config.values-limit` **默认值**: ```yaml -processors: - packet: - policy: - max_first_path_level: 8 +inputs: + integration: + prometheus_extra_labels: + value_length: 4096 ``` **模式**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [1, 16] | +| Unit | byte | +| Range | [4096, 4194304] | **详细描述**: -DDBS 算法等级。 - -该配置越大内存开销越小,但是性能会降低。 +deepflow-agent 对 Prometheus 额外 label 解析并采集时,value 字段长度总和的上限。 -### TCP 包头(时序图) {#processors.packet.tcp_header} +### 特性开关 {#inputs.integration.feature_control} -#### Block 大小 {#processors.packet.tcp_header.block_size} +#### 禁用 Profile 集成 {#inputs.integration.feature_control.profile_integration_disabled} **标签**: agent_restart -ee_feature **FQCN**: -`processors.packet.tcp_header.block_size` +`inputs.integration.feature_control.profile_integration_disabled` -Upgrade from old version: `static_config.packet-sequence-block-size` +Upgrade from old version: `static_config.external-profile-integration-disabled` **默认值**: ```yaml -processors: - packet: - tcp_header: - block_size: 256 +inputs: + integration: + feature_control: + profile_integration_disabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [16, 8192] | - -**详细描述**: - -压缩和保存多个 TCP 包头的缓冲区大小。 +| Type | bool | -#### Sender 队列大小 {#processors.packet.tcp_header.sender_queue_size} +#### 禁用 Trace 集成 {#inputs.integration.feature_control.trace_integration_disabled} **标签**: agent_restart -ee_feature **FQCN**: -`processors.packet.tcp_header.sender_queue_size` +`inputs.integration.feature_control.trace_integration_disabled` -Upgrade from old version: `static_config.packet-sequence-queue-size` +Upgrade from old version: `static_config.external-trace-integration-disabled` **默认值**: ```yaml -processors: - packet: - tcp_header: - sender_queue_size: 65536 +inputs: + integration: + feature_control: + trace_integration_disabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [65536, 64000000] | - -**详细描述**: - -TCP 包时序数据的单个发送队列的大小。 +| Type | bool | -#### 包头字段 Flag {#processors.packet.tcp_header.header_fields_flag} +#### 禁用 Metric 集成 {#inputs.integration.feature_control.metric_integration_disabled} **标签**: agent_restart -ee_feature **FQCN**: -`processors.packet.tcp_header.header_fields_flag` +`inputs.integration.feature_control.metric_integration_disabled` -Upgrade from old version: `static_config.packet-sequence-flag` +Upgrade from old version: `static_config.external-metric-integration-disabled` **默认值**: ```yaml -processors: - packet: - tcp_header: - header_fields_flag: 0 +inputs: + integration: + feature_control: + metric_integration_disabled: false ``` **模式**: | Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [0, 255] | - -**详细描述**: - -使用一个 8 bit 的 flag 对 deepflow-agent 采集上报的 TCP 报文时序数据内容进行控制,不同 -的 bit 位代表不同 TCP 字段的采集开关: -``` -| FLAG | SEQ | ACK | PAYLOAD_SIZE | WINDOW_SIZE | OPT_MSS | OPT_WS | OPT_SACK | - 7 6 5 4 3 2 1 0 -``` -flag 设置为`0`表示全部关闭,设置为`255`表示全部 - -### PCAP 字节流 {#processors.packet.pcap_stream} +| ---- | ---------------------------- | +| Type | bool | -#### Receiver 队列大小 {#processors.packet.pcap_stream.receiver_queue_size} +#### 禁用 Log 集成 {#inputs.integration.feature_control.log_integration_disabled} **标签**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.receiver_queue_size` +`inputs.integration.feature_control.log_integration_disabled` -Upgrade from old version: `static_config.pcap.queue-size` +Upgrade from old version: `static_config.external-log-integration-disabled` **默认值**: ```yaml -processors: - packet: - pcap_stream: - receiver_queue_size: 65536 +inputs: + integration: + feature_control: + log_integration_disabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [65536, 64000000] | +| Type | bool | -**详细描述**: +# 处理器 {#processors} -设置 deepflow-agent 的 1-mini-meta-packet-to-pcap 队列大小。 +## Packet {#processors.packet} + +### Policy {#processors.packet.policy} -#### Sender 队列大小 {#processors.packet.pcap_stream.sender_queue_size} +#### Fast-path 字典大小 {#processors.packet.policy.fast_path_map_size} **标签**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.sender_queue_size` +`processors.packet.policy.fast_path_map_size` + +Upgrade from old version: `static_config.fast-path-map-size` **默认值**: ```yaml processors: packet: - pcap_stream: - sender_queue_size: 8192 + policy: + fast_path_map_size: 0 ``` **模式**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [4096, 64000000] | +| Range | [0, 10000000] | **详细描述**: -设置 deepflow-agent 的 2-pcap-batch-to-sender 队列大小。 +设置为`0`时,deepflow-agent 根据 `global.limits.max_memory` 参数自动调整 Fast-path 字典大小。 +注意:实践中不应配置小于 8000 的值。 -#### 每个 Flow 的缓冲区大小 {#processors.packet.pcap_stream.buffer_size_per_flow} +#### 禁用 Fast-path {#processors.packet.policy.fast_path_disabled} **标签**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.buffer_size_per_flow` +`processors.packet.policy.fast_path_disabled` -Upgrade from old version: `static_config.pcap.flow-buffer-size` +Upgrade from old version: `static_config.fast-path-disabled` **默认值**: ```yaml processors: packet: - pcap_stream: - buffer_size_per_flow: 65536 + policy: + fast_path_disabled: false ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [64, 64000000] | +| Type | bool | **详细描述**: -按流的 PCap 缓冲区大小。到达该值时 flush 该条流的 PCap 数据。 +设置为 `true` 时,deepflow-agent 不启用 fast path。 -#### 总体缓冲区大小 {#processors.packet.pcap_stream.total_buffer_size} +#### Forward 表容量 {#processors.packet.policy.forward_table_capacity} **标签**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.total_buffer_size` +`processors.packet.policy.forward_table_capacity` -Upgrade from old version: `static_config.pcap.buffer-size` +Upgrade from old version: `static_config.forward-capacity` **默认值**: ```yaml processors: packet: - pcap_stream: - total_buffer_size: 88304 + policy: + forward_table_capacity: 16384 ``` **模式**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [65536, 64000000] | +| Range | [16384, 64000000] | **详细描述**: -PCap 总缓冲区大小。到达该值时 flush 所有流的 PCap 数据。 +转发表大小,用来存储 MAC-IP 信息,调大该参数,deepflow-agent 将消耗更多的内存。 -#### Flush 间隔 {#processors.packet.pcap_stream.flush_interval} +#### 最大 First-path 层级 {#processors.packet.policy.max_first_path_level} **标签**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.flush_interval` +`processors.packet.policy.max_first_path_level` -Upgrade from old version: `static_config.pcap.flush-interval` +Upgrade from old version: `static_config.first-path-level` **默认值**: ```yaml processors: packet: - pcap_stream: - flush_interval: 1m + policy: + max_first_path_level: 8 ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['1s', '10m'] | +| Type | int | +| Range | [1, 16] | **详细描述**: -如果一条流的 PCap buffer 超过这个时间没有进行过 flush,强制触发一次 flush。 +DDBS 算法等级。 + +该配置越大内存开销越小,但是性能会降低。 ### TOA (TCP Option Address) {#processors.packet.toa} @@ -8112,316 +5915,6 @@ processors: 在不同的 Oracle 版本中,ID 为 0x04 的响应会有不同的数据结构,如果环境中该响应数据的 `影响行数`前有 1byte 的额外数据,请开启此开关。 -##### ISO8583 {#processors.request_log.application_protocol_inference.protocol_special_config.iso8583} - -###### 数据翻译 {#processors.request_log.application_protocol_inference.protocol_special_config.iso8583.translation_enabled} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.iso8583.translation_enabled` - -**默认值**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - iso8583: - translation_enabled: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -是否对解析后的数据进行查表翻译后展示。 - - - 支持翻译的字段列表: - -| 支持翻译的字段 | 示例(翻译前)|示例(翻译后) | 备注 | -|-----------------------|-------------- |----------------------- |--------------| -| 0-报文类型标识符 | 0100 | 0100-授权类请求 | | -| 3-交易处理码 | 300000 | 300000-余额查询 | | -| 32-受理机构标识码 | 6100**** | 6100-中国邮政储蓄银行 | 翻译前4位 | -| 39-应答码 | 00 | 00-承兑或交易成功 | | -| 49-交易货币代码 | 156 | 156-人民币元 | | - -###### 卡号脱敏 {#processors.request_log.application_protocol_inference.protocol_special_config.iso8583.pan_obfuscate} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.iso8583.pan_obfuscate` - -**默认值**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - iso8583: - pan_obfuscate: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -是否对卡号脱敏。 - -###### 提取字段 {#processors.request_log.application_protocol_inference.protocol_special_config.iso8583.extract_fields} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.iso8583.extract_fields` - -**默认值**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - iso8583: - extract_fields: 2,7,11,32,33 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -提取字段展示在`数据原生标签` - - 配置样例: `extract_fields: 0,2-33` - -字段对照表: - -| 字段号 | 说明 | -|--------|----------| -| 0 | 报文类型标识符 | -| 1 | 位图 | -| 2 | 主账号 | -| 3 | 交易处理码 | -| 4 | 交易金额 | -| 5 | 清算金额 | -| 6 | 持卡人扣账金额 | -| 7 | 交易传输时间 | -| 9 | 清算汇率 | -| 10 | 持卡人扣账汇率 | -| 11 | 系统跟踪号 | -| 12 | 受卡方所在地时间 | -| 13 | 受卡方所在地日期 | -| 14 | 卡有效期 | -| 15 | 清算日期 | -| 16 | 兑换日期 | -| 18 | 商户类型 | -| 19 | 商户国家代码 | -| 22 | 服务点输入方式码 | -| 23 | 卡序列号 | -| 25 | 服务点条件码 | -| 26 | 服务点 PIN 获取码 | -| 28 | 交易费 | -| 32 | 受理机构标识码 | -| 33 | 发送机构标识码 | -| 35 | 第二磁道数据 | -| 36 | 第三磁道数据 | -| 37 | 检索参考号 | -| 38 | 授权标识应答码 | -| 39 | 应答码 | -| 41 | 受卡机终端标识码 | -| 42 | 受卡方标识码 | -| 43 | 受卡方名称地址 | -| 44 | 附加响应数据 | -| 45 | 第一磁道数据 | -| 48 | 附加数据-私有 | -| 49 | 交易货币代码 | -| 50 | 清算货币代码 | -| 51 | 持卡人账户货币代码 | -| 52 | 个人标识码数据 | -| 53 | 安全控制信息 | -| 54 | 实际余额 | -| 55 | IC 卡数据域 | -| 56 | 附加信息 | -| 57 | 附加交易信息 | -| 59 | 明细查询数据 | -| 60 | 自定义域 | -| 61 | 持卡人身份认证信息 | -| 62 | 交换中心数据 | -| 63 | 金融网络数据 | -| 70 | 网络管理信息码 | -| 90 | 原始数据元 | -| 96 | 报文安全码 | -| 100 | 接收机构标识码 | -| 102 | 账户标识 1 | -| 103 | 账户标识 2 | -| 104 | 附加信息 | -| 113 | 附加信息 | -| 116 | 附加信息 | -| 117 | 附加信息 | -| 121 | CUPS 保留 | -| 122 | 受理方保留 | -| 123 | 发卡方保留 | -| 125 | 附加信息 | -| 126 | 附加信息 | -| 128 | 报文鉴别码 | - -##### WebSphereMQ {#processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq} - -###### 解析 XML {#processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.parse_xml_enabled} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.parse_xml_enabled` - -**默认值**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - web_sphere_mq: - parse_xml_enabled: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -是否解析 XML 数据内容。 - -###### 解压数据包 {#processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.decompress_enabled} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.decompress_enabled` - -**默认值**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - web_sphere_mq: - decompress_enabled: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -部分 web_sphere_mq 的消息中使用 zlib 压缩,开启此选项后,agent 在解析时会对数据包进行解压。 - -###### 属性字段过滤器 {#processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.filter_attributes_enabled} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.filter_attributes_enabled` - -**默认值**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - web_sphere_mq: - filter_attributes_enabled: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -开启此选项后,agent 在解析时 XML 仅保留如下字段,减少数据存储。 -- Document.ComConf.ConfInf.MT -- Document.ComConf.ConfInf.MsgId -- Document.ComConf.ConfInf.MsgPrcCd -- Document.ComConf.ConfInf.MsgRefId -- Document.ComConf.ConfInf.OrigSndDt -- Document.ComConf.ConfInf.OrigSndr -- Document.ComuCnfm.MsgId -- Document.ComuCnfm.MsgProCd -- Document.ComuCnfm.MsgRefId -- Document.ComuCnfm.MsgTp -- Document.ComuCnfm.OrigSndDt -- Document.ComuCnfm.OrigSndr - -##### NetSign {#processors.request_log.application_protocol_inference.protocol_special_config.net_sign} - -###### 提取 Biz Data {#processors.request_log.application_protocol_inference.protocol_special_config.net_sign.extract_biz_data_enabled} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.net_sign.extract_biz_data_enabled` - -**默认值**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - net_sign: - extract_biz_data_enabled: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -是否将完整 bizData 字段提取到数据属性中。 - ##### MySQL {#processors.request_log.application_protocol_inference.protocol_special_config.mysql} ###### 解压 MySQL 数据包 {#processors.request_log.application_protocol_inference.protocol_special_config.mysql.decompress_payload} @@ -9131,42 +6624,9 @@ Upgrade from old version: `http_log_proxy_client` processors: request_log: tag_extraction: - tracing_tag: - http_real_client: - - X_Forwarded_For -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -配置该参数后,deepflow-agent 会尝试从 HTTP header 中匹配特征字段,并将匹配到 -的结果填充到应用调用日志的`http_proxy_client`字段中,作为调用链追踪的特征值。 -如果指定多个值,优先级从前到后降低。插件重写的字段优先级最高。 - -##### X-Request-ID {#processors.request_log.tag_extraction.tracing_tag.x_request_id} - -**标签**: - -`hot_update` - -**FQCN**: - -`processors.request_log.tag_extraction.tracing_tag.x_request_id` - -Upgrade from old version: `http_log_x_request_id` - -**默认值**: -```yaml -processors: - request_log: - tag_extraction: - tracing_tag: - x_request_id: - - X_Request_ID + tracing_tag: + http_real_client: + - X_Forwarded_For ``` **模式**: @@ -9177,19 +6637,20 @@ processors: **详细描述**: 配置该参数后,deepflow-agent 会尝试从 HTTP header 中匹配特征字段,并将匹配到 -的结果填充到应用调用日志的`x_request_id`字段中,作为调用链追踪的特征值。 +的结果填充到应用调用日志的`http_proxy_client`字段中,作为调用链追踪的特征值。 如果指定多个值,优先级从前到后降低。插件重写的字段优先级最高。 -##### 多 TraceID 采集 {#processors.request_log.tag_extraction.tracing_tag.multiple_trace_id_collection} +##### X-Request-ID {#processors.request_log.tag_extraction.tracing_tag.x_request_id} **标签**: `hot_update` -ee_feature **FQCN**: -`processors.request_log.tag_extraction.tracing_tag.multiple_trace_id_collection` +`processors.request_log.tag_extraction.tracing_tag.x_request_id` + +Upgrade from old version: `http_log_x_request_id` **默认值**: ```yaml @@ -9197,18 +6658,20 @@ processors: request_log: tag_extraction: tracing_tag: - multiple_trace_id_collection: true + x_request_id: + - X_Request_ID ``` **模式**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **详细描述**: -- 配置为 `false` 时,根据配置 `APM TraceID` 采集到第一个匹配的 TraceID 就不继续采集。 -- 配置为 `true` 时,采集所有匹配到的 TraceID。 +配置该参数后,deepflow-agent 会尝试从 HTTP header 中匹配特征字段,并将匹配到 +的结果填充到应用调用日志的`x_request_id`字段中,作为调用链追踪的特征值。 +如果指定多个值,优先级从前到后降低。插件重写的字段优先级最高。 ##### APM TraceID {#processors.request_log.tag_extraction.tracing_tag.apm_trace_id} @@ -10107,38 +7570,6 @@ processors: 对于虚拟网络流量,流聚合仅匹配 l2end 为 true 的一端的 MAC 地址, 设置为 `true` 流聚合会使用全部MAC地址。 -##### IDC 流量忽略 VLAN {#processors.flow_log.conntrack.flow_generation.idc_traffic_ignore_vlan} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.flow_log.conntrack.flow_generation.idc_traffic_ignore_vlan` - -Upgrade from old version: `static_config.flow.ignore-idc-vlan` - -**默认值**: -```yaml -processors: - flow_log: - conntrack: - flow_generation: - idc_traffic_ignore_vlan: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -当在同一位置采集的双向流量的 VLAN 不对称时,流量无法聚合为同一条流。您可以 -此时设置此值。仅适用于 IDC(非云)流量。 - #### 超时设置 {#processors.flow_log.conntrack.timeouts} ##### Established {#processors.flow_log.conntrack.timeouts.established} @@ -10557,44 +7988,6 @@ outputs: 配置 deepflow-agent 向 deepflow-server 回传数据所用的 Socket 类型。在独立部署 模式下,需配置为 FILE 类型,agent 将 l4_flow_log 和 l7_flow_log 写入本地文件。 -### NPB Socket 类型 {#outputs.socket.npb_socket_type} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.socket.npb_socket_type` - -Upgrade from old version: `npb_socket_type` - -**默认值**: -```yaml -outputs: - socket: - npb_socket_type: RAW_UDP -``` - -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| UDP | | -| RAW_UDP | | -| TCP | | -| ZMQ | | - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**详细描述**: - -设置 NPB 分发时使用的 Socket 类型。RAW_UDP 使用 RawSocket 发送 UDP 数据,有更高的 -分发性能,但是可能存在一些环境不兼容的情况。 - ### RAW_UDP QoS Bypass {#outputs.socket.raw_udp_qos_bypass} **标签**: @@ -11202,259 +8595,6 @@ outputs: 配置如下队列的大小: - 3-doc-to-collector-sender -## NPB (Network Packet Broker) {#outputs.npb} - -### 最大 MTU {#outputs.npb.max_mtu} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.max_mtu` - -Upgrade from old version: `mtu` - -**默认值**: -```yaml -outputs: - npb: - max_mtu: 1500 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [500, 10000] | - -**详细描述**: - -NPB 分发时的 UDP 传输的 MTU 值。注意:当 UDP 报文长度接近 1500 字节后,云平台可能会 -修改数据包的尾部数据,因此建议`max_mtu`的值小于 1500。 - -### RAW_UDP 的 VLAN 标签 {#outputs.npb.raw_udp_vlan_tag} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.raw_udp_vlan_tag` - -Upgrade from old version: `output_vlan` - -**默认值**: -```yaml -outputs: - npb: - raw_udp_vlan_tag: 0 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [0, 4095] | - -**详细描述**: - -当使用 RAW_UDP Socket 发送 NPB 数据时,通过该参数设置数据包 VLAN 标签。默认值为`0`,表示 -不使用 VLAN 标签。 - -### 额外的 VLAN 头 {#outputs.npb.extra_vlan_header} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.extra_vlan_header` - -Upgrade from old version: `npb_vlan_mode` - -**默认值**: -```yaml -outputs: - npb: - extra_vlan_header: 0 -``` - -**枚举可选值**: -| Value | Note | -| ----- | ---------------------------- | -| 0 | 无 | -| 1 | 802.1Q | -| 2 | QinQ | - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**详细描述**: - -设置 NPB 分发数据的 VLAN 模式。`无`表示不加 VLAN;`802.1Q`表示添加 802.1Q header; -`QinQ`表示添加 QinQ。 - -### 流量全局去重 {#outputs.npb.traffic_global_dedup} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.traffic_global_dedup` - -Upgrade from old version: `npb_dedup_enabled` - -**默认值**: -```yaml -outputs: - npb: - traffic_global_dedup: true -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -NPB 数据去重开关。开启开关后,将对 NPB 分发做全局去重,避免一份流量在客户端、服务端分发两次。 - -### 目的端口号 {#outputs.npb.target_port} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`outputs.npb.target_port` - -Upgrade from old version: `static_config.npb-port` - -**默认值**: -```yaml -outputs: - npb: - target_port: 4789 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [1, 65535] | - -**详细描述**: - -NPB 分发使用的目标端口号。 - -### 自定义 VXLAN Flags {#outputs.npb.custom_vxlan_flags} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`outputs.npb.custom_vxlan_flags` - -Upgrade from old version: `static_config.vxlan-flags` - -**默认值**: -```yaml -outputs: - npb: - custom_vxlan_flags: 255 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [0, 255] | - -**详细描述**: - -使用 VXLAN 分发时设置 VXLAN 内的 Flags 为该值。采集器不会采集分发流量。 - -这个配置默认会或上0b1000_0000,所以不能配置为 0b1000_0000。 - -### Overlay VLAN 头剥离 {#outputs.npb.overlay_vlan_header_trimming} - -**标签**: - -agent_restart -ee_feature - -**FQCN**: - -`outputs.npb.overlay_vlan_header_trimming` - -Upgrade from old version: `static_config.ignore-overlay-vlan` - -**默认值**: -```yaml -outputs: - npb: - overlay_vlan_header_trimming: false -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**详细描述**: - -开启开关后,deepflow-agent 在 NPB 分发时会剥离 overlay 原始数据包中的 VLAN 头。 - -### 最大 Tx 吞吐量 {#outputs.npb.max_tx_throughput} - -**标签**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.max_tx_throughput` - -Upgrade from old version: `max_npb_bps` - -**默认值**: -```yaml -outputs: - npb: - max_tx_throughput: 1000 -``` - -**模式**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | Mbps | -| Range | [1, 100000] | - -**详细描述**: - -设置 deepflow-agent 做 NPB 分发的最大吞吐率。 - ## 压缩 {#outputs.compression} ### Application_Log {#outputs.compression.application_log} diff --git a/server/agent_config/README.md b/server/agent_config/README.md index 801c71fed45..10bc9f8420e 100644 --- a/server/agent_config/README.md +++ b/server/agent_config/README.md @@ -508,78 +508,6 @@ global: The system load circuit breaker mechanism uses this metric, and the agent will check this metric every 10 seconds by default. -### Tx Throughput {#global.circuit_breakers.tx_throughput} - -#### Trigger Threshold {#global.circuit_breakers.tx_throughput.trigger_threshold} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`global.circuit_breakers.tx_throughput.trigger_threshold` - -Upgrade from old version: `max_tx_bandwidth` - -**Default value**: -```yaml -global: - circuit_breakers: - tx_throughput: - trigger_threshold: 0 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | Mbps | -| Range | [0, 100000] | - -**Description**: - -When the outbound throughput of the NPB interface reaches or exceeds -the threshold, the broker will be stopped, after that the broker will -be resumed if the throughput is lower than -`(trigger_threshold - outputs.npb.max_tx_throughput)*90%` -within 5 consecutive monitoring intervals. - -Attention: When configuring this value, it must be greater than -`outputs.npb.max_tx_throughput`. Set to 0 will disable this feature. - -#### Throughput Monitoring Interval {#global.circuit_breakers.tx_throughput.throughput_monitoring_interval} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`global.circuit_breakers.tx_throughput.throughput_monitoring_interval` - -Upgrade from old version: `bandwidth_probe_interval` - -**Default value**: -```yaml -global: - circuit_breakers: - tx_throughput: - throughput_monitoring_interval: 10s -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | duration | -| Range | ['1s', '60s'] | - -**Description**: - -Monitoring interval for outbound traffic rate of NPB interface. - ### Free Disk {#global.circuit_breakers.free_disk} #### Percentage Trigger Threshold {#global.circuit_breakers.free_disk.percentage_trigger_threshold} @@ -2299,97 +2227,6 @@ Also ensure the global configuration parameters for related features are enabled - ebpf.profile.off_cpu (Ensure `inputs.ebpf.profile.off_cpu.disabled` is configured to **false**) - ebpf.profile.memory (Ensure `inputs.ebpf.profile.memory.disabled` is configured to **false**) -### AI Agent {#inputs.proc.ai_agent} - -#### HTTP Endpoints {#inputs.proc.ai_agent.http_endpoints} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.proc.ai_agent.http_endpoints` - -**Default value**: -```yaml -inputs: - proc: - ai_agent: - http_endpoints: - - /v1/chat/completions - - /v1/embeddings - - /v1/responses -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -HTTP endpoints for AI agent recognition. Requests that match any prefix will mark the process as AI Agent. - -#### Max Payload Size {#inputs.proc.ai_agent.max_payload_size} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.proc.ai_agent.max_payload_size` - -**Default value**: -```yaml -inputs: - proc: - ai_agent: - max_payload_size: 0 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [0, 2147483647] | - -**Description**: - -Maximum payload size for AI agent reassembly. 0 means unlimited. - -#### File IO Enabled {#inputs.proc.ai_agent.file_io_enabled} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.proc.ai_agent.file_io_enabled` - -**Default value**: -```yaml -inputs: - proc: - ai_agent: - file_io_enabled: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Whether to enable AI Agent file IO event collection. - ### Symbol Table {#inputs.proc.symbol_table} #### Golang-specific {#inputs.proc.symbol_table.golang_specific} @@ -2765,40 +2602,6 @@ inputs: The slave interfaces of one bond interface. -#### Extra Network Namespace Regex {#inputs.cbpf.af_packet.extra_netns_regex} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.cbpf.af_packet.extra_netns_regex` - -Upgrade from old version: `extra_netns_regex` - -**Default value**: -```yaml -inputs: - cbpf: - af_packet: - extra_netns_regex: '' -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -Packet will be captured in regex matched namespaces besides the default -namespace. NICs captured in extra namespaces are also filtered with -`inputs.cbpf.af_packet.interface_regex`. - -Default value `""` means no extra network namespace (default namespace only). - #### Extra BPF Filter {#inputs.cbpf.af_packet.extra_bpf_filter} **Tags**: @@ -2856,39 +2659,6 @@ inputs: | ---- | ---------------------------- | | Type | string | -#### VLAN PCP in Physical Mirror Traffic {#inputs.cbpf.af_packet.vlan_pcp_in_physical_mirror_traffic} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`inputs.cbpf.af_packet.vlan_pcp_in_physical_mirror_traffic` - -Upgrade from old version: `static_config.mirror-traffic-pcp` - -**Default value**: -```yaml -inputs: - cbpf: - af_packet: - vlan_pcp_in_physical_mirror_traffic: 0 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [0, 9] | - -**Description**: - -- When this configuration <= 7 calculate TAP value from vlan tag only if vlan pcp matches this value. -- when this configuration is 8 calculate TAP value from outer vlan tag, -- when this configuration is 9 calculate TAP value from inner vlan tag. - #### BPF Filter Disabled {#inputs.cbpf.af_packet.bpf_filter_disabled} **Tags**: @@ -3175,16 +2945,15 @@ Note: After the NIC is enabled in promiscuous mode, more traffic will be collect #### DPDK {#inputs.cbpf.special_network.dpdk} -##### Data Source {#inputs.cbpf.special_network.dpdk.source} +##### reorder cache window size {#inputs.cbpf.special_network.dpdk.reorder_cache_window_size} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.special_network.dpdk.source` +`inputs.cbpf.special_network.dpdk.reorder_cache_window_size` **Default value**: ```yaml @@ -3192,28 +2961,22 @@ inputs: cbpf: special_network: dpdk: - source: None + reorder_cache_window_size: 60ms ``` -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| None | | -| eBPF | | -| pdump | | - **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | duration | +| Range | ['60ms', '100ms'] | **Description**: -Currently, there are two ways to collect DPDK traffic, including: -- pdump: See details [https://dpdk-docs.readthedocs.io/en/latest/prog_guide/multi_proc_support.html](https://dpdk-docs.readthedocs.io/en/latest/prog_guide/multi_proc_support.html) -- eBPF: Use eBPF Uprobe to obtain DPDK traffic, configuration `inputs.ebpf.socket.uprobe.dpdk` is also required. +When `inputs.cbpf.special_network.dpdk.source` is eBPF, the larger the time window will cause the agent to use more memory. -##### reorder cache window size {#inputs.cbpf.special_network.dpdk.reorder_cache_window_size} +### Tunning {#inputs.cbpf.tunning} + +#### Dispatcher Queue Enabled {#inputs.cbpf.tunning.dispatcher_queue_enabled} **Tags**: @@ -3221,167 +2984,176 @@ Currently, there are two ways to collect DPDK traffic, including: **FQCN**: -`inputs.cbpf.special_network.dpdk.reorder_cache_window_size` +`inputs.cbpf.tunning.dispatcher_queue_enabled` + +Upgrade from old version: `static_config.dispatcher-queue` **Default value**: ```yaml inputs: cbpf: - special_network: - dpdk: - reorder_cache_window_size: 60ms + tunning: + dispatcher_queue_enabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['60ms', '100ms'] | +| Type | bool | **Description**: -When `inputs.cbpf.special_network.dpdk.source` is eBPF, the larger the time window will cause the agent to use more memory. +The configuration takes effect when `inputs.cbpf.common.capture_mode` is `Local` or `Virtual Mirror`, +dispatcher-queue is always true when `inputs.cbpf.common.capture_mode` is `Physical Mirror`. -#### Libpcap {#inputs.cbpf.special_network.libpcap} +Available for all recv_engines. -##### Enabled {#inputs.cbpf.special_network.libpcap.enabled} +#### Maximum Capture Packet Size {#inputs.cbpf.tunning.max_capture_packet_size} **Tags**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.special_network.libpcap.enabled` +`inputs.cbpf.tunning.max_capture_packet_size` -Upgrade from old version: `static_config.libpcap-enabled` +Upgrade from old version: `capture_packet_size` **Default value**: ```yaml inputs: cbpf: - special_network: - libpcap: - enabled: false + tunning: + max_capture_packet_size: 65535 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Unit | byte | +| Range | [128, 65535] | **Description**: -Supports running on Windows and Linux, Low performance when using multiple interfaces. -Default to true in Windows, false in Linux. - -#### vHost User {#inputs.cbpf.special_network.vhost_user} +DPDK environment does not support this configuration. -##### vHost Socket Path {#inputs.cbpf.special_network.vhost_user.vhost_socket_path} +#### Max Capture PPS {#inputs.cbpf.tunning.max_capture_pps} **Tags**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.special_network.vhost_user.vhost_socket_path` +`inputs.cbpf.tunning.max_capture_pps` -Upgrade from old version: `static_config.vhost-socket-path` +Upgrade from old version: `max_collect_pps` **Default value**: ```yaml inputs: cbpf: - special_network: - vhost_user: - vhost_socket_path: '' + tunning: + max_capture_pps: 1048576 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Unit | pps | +| Range | [1, 10000000] | **Description**: -Supports running on Linux with mirror mode. +Maximum packet rate allowed for collection. + +Available for all recv_engines. -#### Physical Switch {#inputs.cbpf.special_network.physical_switch} +### Preprocess {#inputs.cbpf.preprocess} -##### sFlow Receiving Ports {#inputs.cbpf.special_network.physical_switch.sflow_ports} +#### Tunnel Decap Protocols {#inputs.cbpf.preprocess.tunnel_decap_protocols} **Tags**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.special_network.physical_switch.sflow_ports` +`inputs.cbpf.preprocess.tunnel_decap_protocols` -Upgrade from old version: `static_config.xflow-collector.sflow-ports` +Upgrade from old version: `decap_type` **Default value**: ```yaml inputs: cbpf: - special_network: - physical_switch: - sflow_ports: [] + preprocess: + tunnel_decap_protocols: + - 1 + - 2 ``` +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| 1 | VXLAN | +| 2 | IPIP | +| 3 | GRE | +| 4 | Geneve | +| 5 | VXLAN-NSH | + **Schema**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [1, 65535] | **Description**: -This feature is only supported by the Enterprise Edition of Trident. -In general, sFlow uses port 6343. Default value `[]` means that no sFlow -data will be collected. +Decapsulation tunnel protocols, Only the Enterprise Edition supports decap GRE and VXLAN-NSH. -##### NetFlow Receiving Ports {#inputs.cbpf.special_network.physical_switch.netflow_ports} +#### Tunnel Trim Protocols {#inputs.cbpf.preprocess.tunnel_trim_protocols} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.special_network.physical_switch.netflow_ports` +`inputs.cbpf.preprocess.tunnel_trim_protocols` -Upgrade from old version: `static_config.xflow-collector.netflow-ports` +Upgrade from old version: `static_config.trim-tunnel-types` **Default value**: ```yaml inputs: cbpf: - special_network: - physical_switch: - netflow_ports: [] -``` + preprocess: + tunnel_trim_protocols: [] +``` + +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| ERSPAN | | +| VXLAN | | +| TEB | | **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [1, 65535] | +| Type | string | **Description**: -This feature is only supported by the Enterprise Edition of Trident. -Additionally, only NetFlow v5 is currently supported. In general, NetFlow -uses port 2055. Default value `[]` means that no NetFlow data will be collected. +Whether to remove the tunnel header in mirrored traffic. +Only the Enterprise Edition supports decap ERSPAN and TEB. -### Tunning {#inputs.cbpf.tunning} +## eBPF {#inputs.ebpf} -#### Dispatcher Queue Enabled {#inputs.cbpf.tunning.dispatcher_queue_enabled} +### Disabled {#inputs.ebpf.disabled} **Tags**: @@ -3389,16 +3161,15 @@ uses port 2055. Default value `[]` means that no NetFlow data will be collected. **FQCN**: -`inputs.cbpf.tunning.dispatcher_queue_enabled` +`inputs.ebpf.disabled` -Upgrade from old version: `static_config.dispatcher-queue` +Upgrade from old version: `static_config.ebpf.disabled` **Default value**: ```yaml inputs: - cbpf: - tunning: - dispatcher_queue_enabled: false + ebpf: + disabled: false ``` **Schema**: @@ -3408,192 +3179,202 @@ inputs: **Description**: -The configuration takes effect when `inputs.cbpf.common.capture_mode` is `Local` or `Virtual Mirror`, -dispatcher-queue is always true when `inputs.cbpf.common.capture_mode` is `Physical Mirror`. +Whether to enable eBPF features. -Available for all recv_engines. +### Socket {#inputs.ebpf.socket} -#### Maximum Capture Packet Size {#inputs.cbpf.tunning.max_capture_packet_size} +#### Uprobe {#inputs.ebpf.socket.uprobe} + +##### Golang {#inputs.ebpf.socket.uprobe.golang} + +###### Enabled {#inputs.ebpf.socket.uprobe.golang.enabled} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.cbpf.tunning.max_capture_packet_size` +`inputs.ebpf.socket.uprobe.golang.enabled` -Upgrade from old version: `capture_packet_size` +Upgrade from old version: `static_config.ebpf.uprobe-golang-trace-enabled, static_config.ebpf.uprobe-process-name-regexs.golang` **Default value**: ```yaml inputs: - cbpf: - tunning: - max_capture_packet_size: 65535 + ebpf: + socket: + uprobe: + golang: + enabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [128, 65535] | +| Type | bool | **Description**: -DPDK environment does not support this configuration. +Whether golang process enables HTTP2/HTTPS protocol data collection +and auto-tracing. go auto-tracing also dependent go-tracing-timeout. -#### Raw Packet Buffer Block Size {#inputs.cbpf.tunning.raw_packet_buffer_block_size} +Note: When enabling this feature, the specific process list must also be specified in `inputs.proc.process_matcher`, +i.e., `ebpf.socket.uprobe.golang` must be included in `inputs.proc.process_matcher.[*].enabled_features`. + +###### Tracing Timeout {#inputs.ebpf.socket.uprobe.golang.tracing_timeout} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.tunning.raw_packet_buffer_block_size` +`inputs.ebpf.socket.uprobe.golang.tracing_timeout` -Upgrade from old version: `static_config.analyzer-raw-packet-block-size` +Upgrade from old version: `static_config.ebpf.go-tracing-timeout` **Default value**: ```yaml inputs: - cbpf: - tunning: - raw_packet_buffer_block_size: 65536 + ebpf: + socket: + uprobe: + golang: + tracing_timeout: 120s ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [65536, 16000000] | +| Type | duration | +| Range | ['0ns', '1d'] | **Description**: -In certain modes, raw packets will go through a queue before being processed. -To avoid memory allocation for each packet, a memory block of size -raw_packet_buffer_block_size is allocated for multiple packets. -Larger value will reduce memory allocation for raw packet, but will also -delay memory free. -This configuration is effective for the following `inputs.cbpf.common.capture_mode`: -- analyzer mode -- local mode with `inputs.cbpf.af_packet.inner_interface_capture_enabled` = true -- local mode with `inputs.cbpf.tunning.dispatcher_queue_enabled` = true -- mirror mode with `inputs.cbpf.tunning.dispatcher_queue_enabled` = true +The expected maximum time interval between the server receiving the request and returning +the response, If the value is '0ns', this feature is disabled. Tracing only considers the +thread number. + +##### TLS {#inputs.ebpf.socket.uprobe.tls} -#### Raw Packet Queue Size {#inputs.cbpf.tunning.raw_packet_queue_size} +###### Enabled {#inputs.ebpf.socket.uprobe.tls.enabled} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.tunning.raw_packet_queue_size` +`inputs.ebpf.socket.uprobe.tls.enabled` -Upgrade from old version: `static_config.analyzer-queue-size` +Upgrade from old version: `static_config.ebpf.uprobe-openssl-trace-enabled, static_config.ebpf.uprobe-process-name-regexs.openssl` **Default value**: ```yaml inputs: - cbpf: - tunning: - raw_packet_queue_size: 131072 + ebpf: + socket: + uprobe: + tls: + enabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [65536, 64000000] | +| Type | bool | **Description**: -The length of the following queues (only for `inputs.cbpf.common.capture_mode` = `Physical Mirror`): -- 0.1-bytes-to-parse -- 0.2-packet-to-flowgenerator -- 0.3-packet-to-pipeline +Whether the process that uses the openssl library to enable HTTPS protocol data collection. -#### Max Capture PPS {#inputs.cbpf.tunning.max_capture_pps} +One can use the following method to determine whether an application process can use +`Uprobe hook openssl library` to access encrypted data: +- Use the command `cat /proc//maps | grep "libssl.so"` to check if it contains + information about openssl. If it does, it indicates that this process is using the + openssl library. +- If "libssl.so" is not found above, it may indicate that the program + is statically linked with OpenSSL. In that case, you can verify it by: + running the command `sudo nm /proc//exe | grep SSL_write`. + If the output contains symbols such as `0000000000502ac0 T SSL_write`, + it means the process is using a statically linked OpenSSL library. + +After enabled, deepflow-agent will retrieve process information that +matches the regular expression, hooking the corresponding encryption/decryption +interfaces of the openssl library. In the logs, you will encounter a message similar +to the following: +``` +[eBPF] INFO openssl uprobe, pid:1005, path:/proc/1005/root/usr/lib64/libssl.so.1.0.2k +OR +[eBPF] INFO openssl uprobe, pid:28890, path:/proc/28890/root/usr/sbin/nginx +``` + +Note: When this feature is enabled, Envoy mTLS traffic can be automatically traced. +For non-Envoy traffic, the specific process list must also be specified in `inputs.proc.process_matcher`, +i.e., `ebpf.socket.uprobe.tls` must be included in `inputs.proc.process_matcher.[*].enabled_features`. + +#### Kprobe {#inputs.ebpf.socket.kprobe} + +##### kprobe disabled {#inputs.ebpf.socket.kprobe.disabled} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.cbpf.tunning.max_capture_pps` - -Upgrade from old version: `max_collect_pps` +`inputs.ebpf.socket.kprobe.disabled` **Default value**: ```yaml inputs: - cbpf: - tunning: - max_capture_pps: 1048576 + ebpf: + socket: + kprobe: + disabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Unit | pps | -| Range | [1, 10000000] | +| Type | bool | **Description**: -Maximum packet rate allowed for collection. - -Available for all recv_engines. - -### Preprocess {#inputs.cbpf.preprocess} +When set to true, kprobe will be disabled. -#### Tunnel Decap Protocols {#inputs.cbpf.preprocess.tunnel_decap_protocols} +##### Unix Socket Enabled {#inputs.ebpf.socket.kprobe.enable_unix_socket} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.cbpf.preprocess.tunnel_decap_protocols` - -Upgrade from old version: `decap_type` +`inputs.ebpf.socket.kprobe.enable_unix_socket` **Default value**: ```yaml inputs: - cbpf: - preprocess: - tunnel_decap_protocols: - - 1 - - 2 + ebpf: + socket: + kprobe: + enable_unix_socket: false ``` -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| 1 | VXLAN | -| 2 | IPIP | -| 3 | GRE | -| 4 | Geneve | -| 5 | VXLAN-NSH | - **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | +| Type | bool | **Description**: -Decapsulation tunnel protocols, Only the Enterprise Edition supports decap GRE and VXLAN-NSH. +When set to true, enable tracing of Unix domain sockets. -#### Tunnel Trim Protocols {#inputs.cbpf.preprocess.tunnel_trim_protocols} +##### Blacklist {#inputs.ebpf.socket.kprobe.blacklist} + +###### Port Numbers {#inputs.ebpf.socket.kprobe.blacklist.ports} **Tags**: @@ -3601,25 +3382,20 @@ Decapsulation tunnel protocols, Only the Enterprise Edition supports decap GRE a **FQCN**: -`inputs.cbpf.preprocess.tunnel_trim_protocols` +`inputs.ebpf.socket.kprobe.blacklist.ports` -Upgrade from old version: `static_config.trim-tunnel-types` +Upgrade from old version: `static_config.ebpf.kprobe-blacklist.port-list` **Default value**: ```yaml inputs: - cbpf: - preprocess: - tunnel_trim_protocols: [] + ebpf: + socket: + kprobe: + blacklist: + ports: '' ``` -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| ERSPAN | | -| VXLAN | | -| TEB | | - **Schema**: | Key | Value | | ---- | ---------------------------- | @@ -3627,28 +3403,32 @@ inputs: **Description**: -Whether to remove the tunnel header in mirrored traffic. -Only the Enterprise Edition supports decap ERSPAN and TEB. +TCP&UDP Port Blacklist, Priority higher than kprobe-whitelist. + +Example: `ports: 80,1000-2000` + +##### Whitelist {#inputs.ebpf.socket.kprobe.whitelist} -#### Packet Segmentation Reassembly Ports {#inputs.cbpf.preprocess.packet_segmentation_reassembly} +###### Port Numbers {#inputs.ebpf.socket.kprobe.whitelist.ports} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.cbpf.preprocess.packet_segmentation_reassembly` +`inputs.ebpf.socket.kprobe.whitelist.ports` -Upgrade from old version: `static_config.packet-segmentation-reassembly` +Upgrade from old version: `static_config.ebpf.kprobe-whitelist.port-list` **Default value**: ```yaml inputs: - cbpf: - preprocess: - packet_segmentation_reassembly: [] + ebpf: + socket: + kprobe: + whitelist: + ports: '' ``` **Schema**: @@ -3658,120 +3438,120 @@ inputs: **Description**: -For the specified ports, consecutive TCP packets will be aggregated together for application log parsing. +TCP&UDP Port Whitelist, Priority lower than kprobe-blacklist. +Use kprobe to collect data on ports that are not in the blacklist or whitelist. -Example: +Example: `ports: 80,1000-2000` -packet_segmentation_reassembly: -- 1000 -- 2000-2010 -- 5000 +#### SockOps {#inputs.ebpf.socket.sock_ops} -### Physical Mirror Traffic {#inputs.cbpf.physical_mirror} +##### TCP Option Trace {#inputs.ebpf.socket.sock_ops.tcp_option_trace} -#### Default Capture Network Type {#inputs.cbpf.physical_mirror.default_capture_network_type} +###### TCP Option Tracing {#inputs.ebpf.socket.sock_ops.tcp_option_trace.enabled} **Tags**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.physical_mirror.default_capture_network_type` - -Upgrade from old version: `static_config.default-tap-type` +`inputs.ebpf.socket.sock_ops.tcp_option_trace.enabled` **Default value**: ```yaml inputs: - cbpf: - physical_mirror: - default_capture_network_type: 3 + ebpf: + socket: + sock_ops: + tcp_option_trace: + enabled: false ``` -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| 3 | Cloud Network | -| _DYNAMIC_OPTIONS_ | _DYNAMIC_OPTIONS_ | - **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | +| Type | bool | **Description**: -deepflow-agent will mark the TAP (Traffic Access Point) location -according to the outer vlan tag in the mirrored traffic of the physical -switch. When the vlan tag has no corresponding TAP value, or the vlan -pcp does not match the `inputs.cbpf.af_packet.vlan_pcp_in_physical_mirror_traffic`, it will assign the TAP value. -This configuration item. Default value `3` means Cloud Network. +Whether to enable the tcp-option tracing SockOps program, which injects DeepFlow metadata +(for example, process PID) into a custom TCP option for eligible connections. +Note: This feature requires cgroup v2 (unified hierarchy) and kernel > 5.10. On hosts +using cgroup v1 the SockOps program will fail to attach and the agent will log a warning. +Compatibility: validated on x86 with kernel > 5.10; on arm we have only tested with +kernel 6.8 so far. +Limitation: PID tracking relies on the per-CPU syscall map in. Under CPU congestion, +softirqs handling TCP may run on a different CPU than the userspace thread, so the +injected metadata can be missing or stale. -#### Packet Dedup Disabled {#inputs.cbpf.physical_mirror.packet_dedup_disabled} +###### PID Injection Window {#inputs.ebpf.socket.sock_ops.tcp_option_trace.sampling_window_bytes} **Tags**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.physical_mirror.packet_dedup_disabled` - -Upgrade from old version: `static_config.analyzer-dedup-disabled` +`inputs.ebpf.socket.sock_ops.tcp_option_trace.sampling_window_bytes` **Default value**: ```yaml inputs: - cbpf: - physical_mirror: - packet_dedup_disabled: false -``` + ebpf: + socket: + sock_ops: + tcp_option_trace: + sampling_window_bytes: 16384 +``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Unit | Bytes | +| Range | [0, 1048576] | **Description**: -Whether to enable mirror traffic deduplication when `inputs.cbpf.common.capture_mode` = `Physical Mirror`. +Minimum number of TCP payload bytes between PID injections. Default 16KB matches the +legacy behavior; smaller windows increase frequency, larger windows decrease it. Set to +0 to disable sampling and inject on every eligible packet. + +#### Tunning {#inputs.ebpf.socket.tunning} -#### Gateway Traffic of Private Cloud {#inputs.cbpf.physical_mirror.private_cloud_gateway_traffic} +##### Max Capture Rate {#inputs.ebpf.socket.tunning.max_capture_rate} **Tags**: -agent_restart -ee_feature +`hot_update` **FQCN**: -`inputs.cbpf.physical_mirror.private_cloud_gateway_traffic` +`inputs.ebpf.socket.tunning.max_capture_rate` -Upgrade from old version: `static_config.cloud-gateway-traffic` +Upgrade from old version: `static_config.ebpf.global-ebpf-pps-threshold` **Default value**: ```yaml inputs: - cbpf: - physical_mirror: - private_cloud_gateway_traffic: false + ebpf: + socket: + tunning: + max_capture_rate: 0 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Unit | Per Second | +| Range | [0, 64000000] | **Description**: -Whether it is the mirrored traffic of NFVGW (cloud gateway) when `inputs.cbpf.common.capture_mode` = `Physical Mirror`. - -## eBPF {#inputs.ebpf} +Default value `0` means no limitation. -### Disabled {#inputs.ebpf.disabled} +##### Syscall_trace_id Disabled {#inputs.ebpf.socket.tunning.syscall_trace_id_disabled} **Tags**: @@ -3779,15 +3559,15 @@ Whether it is the mirrored traffic of NFVGW (cloud gateway) when `inputs.cbpf.co **FQCN**: -`inputs.ebpf.disabled` - -Upgrade from old version: `static_config.ebpf.disabled` +`inputs.ebpf.socket.tunning.syscall_trace_id_disabled` **Default value**: ```yaml inputs: ebpf: - disabled: false + socket: + tunning: + syscall_trace_id_disabled: false ``` **Schema**: @@ -3797,15 +3577,11 @@ inputs: **Description**: -Whether to enable eBPF features. - -### Socket {#inputs.ebpf.socket} - -#### Uprobe {#inputs.ebpf.socket.uprobe} - -##### Golang {#inputs.ebpf.socket.uprobe.golang} +When the trace_id is injected into all requests, the computation logic for all +syscall_trace_id can be turned off. This will significantly reduce the impact of the +eBPF hook on the CPU consumption of the application process. -###### Enabled {#inputs.ebpf.socket.uprobe.golang.enabled} +##### Disable Pre-allocating Memory {#inputs.ebpf.socket.tunning.map_prealloc_disabled} **Tags**: @@ -3813,18 +3589,17 @@ Whether to enable eBPF features. **FQCN**: -`inputs.ebpf.socket.uprobe.golang.enabled` +`inputs.ebpf.socket.tunning.map_prealloc_disabled` -Upgrade from old version: `static_config.ebpf.uprobe-golang-trace-enabled, static_config.ebpf.uprobe-process-name-regexs.golang` +Upgrade from old version: `static_config.ebpf.map-prealloc-disabled` **Default value**: ```yaml inputs: ebpf: socket: - uprobe: - golang: - enabled: false + tunning: + map_prealloc_disabled: false ``` **Schema**: @@ -3834,13 +3609,13 @@ inputs: **Description**: -Whether golang process enables HTTP2/HTTPS protocol data collection -and auto-tracing. go auto-tracing also dependent go-tracing-timeout. - -Note: When enabling this feature, the specific process list must also be specified in `inputs.proc.process_matcher`, -i.e., `ebpf.socket.uprobe.golang` must be included in `inputs.proc.process_matcher.[*].enabled_features`. +When full map preallocation is too expensive, set this configuration to `true` will +prevent memory pre-allocation during map definition, but it may result in some performance +degradation. This configuration only applies to maps of type 'BPF_MAP_TYPE_HASH'. +Currently applicable to socket trace and uprobe Golang/OpenSSL trace functionalities. +Disabling memory preallocation will approximately reduce memory usage by 45MB. -###### Tracing Timeout {#inputs.ebpf.socket.uprobe.golang.tracing_timeout} +##### Hooked Socket Syscalls {#inputs.ebpf.socket.tunning.hooked_socket_syscalls} **Tags**: @@ -3848,35 +3623,60 @@ i.e., `ebpf.socket.uprobe.golang` must be included in `inputs.proc.process_match **FQCN**: -`inputs.ebpf.socket.uprobe.golang.tracing_timeout` - -Upgrade from old version: `static_config.ebpf.go-tracing-timeout` +`inputs.ebpf.socket.tunning.hooked_socket_syscalls` **Default value**: ```yaml inputs: ebpf: socket: - uprobe: - golang: - tracing_timeout: 120s + tunning: + hooked_socket_syscalls: + - read + - readv + - recvfrom + - recvmsg + - recvmmsg + - sendmsg + - sendmmsg + - sendto + - write + - writev ``` +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| read | | +| readv | | +| recvfrom | | +| recvmsg | | +| recvmmsg | | +| sendmsg | | +| sendmmsg | | +| sendto | | +| write | | +| writev | | + **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['0ns', '1d'] | +| Type | string | **Description**: -The expected maximum time interval between the server receiving the request and returning -the response, If the value is '0ns', this feature is disabled. Tracing only considers the -thread number. +Controls which supported socket syscalls will have eBPF hooks installed. -##### TLS {#inputs.ebpf.socket.uprobe.tls} +This list only controls whether a syscall is hooked. The backend type used for each +enabled syscall still follows the current runtime mode selection logic. For example, +the mixed mode keeps its existing hybrid and tracepoint-only split, the pure-kprobe +mode keeps its existing kprobe behavior, and the kfunc mode keeps its existing kfunc +behavior plus the tracepoint fallback for `recvfrom` and `recvmmsg`. -###### Enabled {#inputs.ebpf.socket.uprobe.tls.enabled} +Supported values: `read`, `readv`, `recvfrom`, `recvmsg`, `recvmmsg`, `sendmsg`, +`sendmmsg`, `sendto`, `write`, `writev`. + +##### Enable the fentry/fexit feature {#inputs.ebpf.socket.tunning.fentry_enabled} **Tags**: @@ -3884,18 +3684,15 @@ thread number. **FQCN**: -`inputs.ebpf.socket.uprobe.tls.enabled` - -Upgrade from old version: `static_config.ebpf.uprobe-openssl-trace-enabled, static_config.ebpf.uprobe-process-name-regexs.openssl` +`inputs.ebpf.socket.tunning.fentry_enabled` **Default value**: ```yaml inputs: ebpf: socket: - uprobe: - tls: - enabled: false + tunning: + fentry_enabled: false ``` **Schema**: @@ -3905,222 +3702,156 @@ inputs: **Description**: -Whether the process that uses the openssl library to enable HTTPS protocol data collection. - -One can use the following method to determine whether an application process can use -`Uprobe hook openssl library` to access encrypted data: -- Use the command `cat /proc//maps | grep "libssl.so"` to check if it contains - information about openssl. If it does, it indicates that this process is using the - openssl library. -- If "libssl.so" is not found above, it may indicate that the program - is statically linked with OpenSSL. In that case, you can verify it by: - running the command `sudo nm /proc//exe | grep SSL_write`. - If the output contains symbols such as `0000000000502ac0 T SSL_write`, - it means the process is using a statically linked OpenSSL library. - -After enabled, deepflow-agent will retrieve process information that -matches the regular expression, hooking the corresponding encryption/decryption -interfaces of the openssl library. In the logs, you will encounter a message similar -to the following: -``` -[eBPF] INFO openssl uprobe, pid:1005, path:/proc/1005/root/usr/lib64/libssl.so.1.0.2k -OR -[eBPF] INFO openssl uprobe, pid:28890, path:/proc/28890/root/usr/sbin/nginx -``` +Explanation of Using fentry/fexit Features +- Compared to traditional kprobes, fentry and fexit programs offer higher performance and + availability, providing approximately 5%-10% performance improvement. +- Some Linux kernels do not fully support this feature, which may lead to kernel bugs and + node crashes. Known bug fixes include: + - Bug fix for TencentOS Linux kernel 5.4.119 + [https://github.com/torvalds/linux/commit/c3d6324f841bab2403be6419986e2b1d1068d423](https://github.com/torvalds/linux/commit/c3d6324f841bab2403be6419986e2b1d1068d423) + - Bug fix for Alibaba Cloud Linux kernel 5.10.23 + [https://github.com/gregkh/linux/commit/e21d2b92354b3cd25dd774ebb0f0e52ff04a7861](https://github.com/gregkh/linux/commit/e21d2b92354b3cd25dd774ebb0f0e52ff04a7861) +- Kernel recommendation: To enable the fentry/fexit feature, it is recommended to use Linux + kernel 5.10.28 or later to ensure stability and performance. -Note: When this feature is enabled, Envoy mTLS traffic can be automatically traced. -For non-Envoy traffic, the specific process list must also be specified in `inputs.proc.process_matcher`, -i.e., `ebpf.socket.uprobe.tls` must be included in `inputs.proc.process_matcher.[*].enabled_features`. +### File {#inputs.ebpf.file} -##### DPDK {#inputs.ebpf.socket.uprobe.dpdk} +#### IO Event {#inputs.ebpf.file.io_event} -###### DPDK Application Command Name {#inputs.ebpf.socket.uprobe.dpdk.command} +##### Collect Mode {#inputs.ebpf.file.io_event.collect_mode} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.uprobe.dpdk.command` +`inputs.ebpf.file.io_event.collect_mode` + +Upgrade from old version: `static_config.ebpf.io-event-collect-mode` **Default value**: ```yaml inputs: ebpf: - socket: - uprobe: - dpdk: - command: '' + file: + io_event: + collect_mode: 1 ``` +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| 0 | Disabled | +| 1 | Request Life Cycle | +| 2 | All | + **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | **Description**: -Set the command name of the DPDK application, eBPF will automatically -locate and trace packets for data collection. - -Example: In the command line `/usr/bin/mydpdk`, it can be set as `command: mydpdk`, and set `inputs.cbpf.special_network.dpdk.source = eBPF` +Collection modes: +- Disabled: Indicates that no IO events are collected. +- Request Life Cycle: Indicates that only IO events within the request life cycle are collected. +- All: Indicates that all IO events are collected. -In scenarios where DPDK acts as the vhost-user backend, data exchange between the virtual machine and the DPDK -application occurs through virtqueues (vrings). eBPF can automatically hook into the vring interface without -requiring any modifications to DPDK or the virtual machine, enabling packet capture and traffic observability -with zero additional configuration. In contrast, capturing packets on physical NICs requires explicit configuration -of the corresponding DPDK driver interfaces. +Note: +- To obtain the full file path, we need to combine it with the process's mount information. However, + some processes exit quickly after completing their tasks. When we attempt to process the file I/O + data generated by such processes, the corresponding /proc/[pid]/mountinfo entry may no longer be + available, resulting in incomplete paths (missing mount points). For processes with a lifetime + shorter than 50 ms, the file path may lack mount point information. This issue does not occur with + long-running processes. -###### DPDK Application RX Hooks Configuration {#inputs.ebpf.socket.uprobe.dpdk.rx_hooks} +##### Minimal Duration {#inputs.ebpf.file.io_event.minimal_duration} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.uprobe.dpdk.rx_hooks` +`inputs.ebpf.file.io_event.minimal_duration` + +Upgrade from old version: `static_config.ebpf.io-event-minimal-duration` **Default value**: ```yaml inputs: ebpf: - socket: - uprobe: - dpdk: - rx_hooks: [] + file: + io_event: + minimal_duration: 1ms ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | duration | +| Range | ['1ns', '1s'] | **Description**: -Fill in the appropriate packet reception hook point according to the actual network card driver. -You can use the command 'lspci -vmmk' to find the network card driver type. For example: -``` -Slot: 04:00.0 -Class: Ethernet controller -Vendor: Intel Corporation -Device: Ethernet Controller XL710 for 40GbE QSFP+ -SVendor: Unknown vendor 1e18 -SDevice: Device 4712 -Rev: 02 -Driver: igb_uio -Module: i40e -``` -In the example above, "Driver: igb_uio" indicates a DPDK-managed device (other options include -"vfio-pci" and "uio_pci_generic", which are also managed by DPDK). The actual driver is 'i40e' -(derived from 'Module: i40e'). - -You can use the sustainable profiling feature provided by DeepFlow to perform function profiling -on the DPDK application and check the specific interface names. Alternatively, you can run the -`perf` command on the node where the agent is located: -`perf record -F97 -a -g -p -- sleep 30` -and then use -`perf script | grep -E 'recv|xmit|rx|tx' | grep ` (`drive_name` may be `ixgbe/i40e/mlx5`) -to confirm the driver interfaces. - -Below are some common interface names for different drivers, for reference only: - 1. Physical NIC Drivers: - - Intel Drivers: - - ixgbe: Supports Intel 82598/82599/X520/X540/X550 series NICs. - - rx: ixgbe_recv_pkts, ixgbe_recv_pkts_vec - - tx: ixgbe_xmit_pkts, ixgbe_xmit_fixed_burst_vec, ixgbe_xmit_pkts_vec - - i40e: Supports Intel X710, XL710 series NICs. - - rx: i40e_recv_pkts - - tx: i40e_xmit_pkts - - ice: Supports Intel E810 series NICs. - - rx: ice_recv_pkts - - tx: ice_xmit_pkts - - Mellanox Drivers: - - mlx4: Supports Mellanox ConnectX-3 series NICs. - - rx: mlx4_rx_burst - - tx: mlx4_tx_burst - - mlx5: Supports Mellanox ConnectX-4, ConnectX-5, ConnectX-6 series NICs. - - rx: mlx5_rx_burst, mlx5_rx_burst_vec, mlx5_rx_burst_mprq - - tx: Pending confirmation - - Broadcom Drivers: - - bnxt: Supports Broadcom NetXtreme series NICs. - - rx: bnxt_recv_pkts, bnxt_recv_pkts_vec (x86, Vector mode receive) - - tx: bnxt_xmit_pkts, bnxt_xmit_pkts_vec (x86, Vector mode transmit) - 2. Virtual NIC Drivers: - - Virtio Driver: - - virtio: Supports Virtio-based virtual network interfaces. - - rx: virtio_recv_pkts, virtio_recv_mergeable_pkts_packed, virtio_recv_pkts_packed, - virtio_recv_pkts_vec, virtio_recv_pkts_inorder, virtio_recv_mergeable_pkts - - tx: virtio_xmit_pkts_packed, virtio_xmit_pkts - - VMXNET3 Driver: - - vmxnet3: Supports VMware's VMXNET3 virtual NICs. - - rx: vmxnet3_recv_pkts - - tx: vmxnet3_xmit_pkts - -Example: `rx_hooks: [ixgbe_recv_pkts, i40e_recv_pkts, virtio_recv_pkts, virtio_recv_mergeable_pkts]` - -Note: When using the burst mode of the current DPDK driver interface to send and receive packets, -the number of eBPF instructions is limited to 4096 in older Linux kernels (below Linux 5.2). As a -result, during DPDK packet capture, only a maximum of 16 packets can be captured. For Linux kernels -5.2 and above, up to 32 packets can be captured (this is typically the default value for DPDK -burst mode). For kernels older than Linux 5.2, packet loss may occur (if the burst size exceeds 16). +Only collect IO events with delay exceeding this threshold. -###### DPDK Application TX Hooks Configuration {#inputs.ebpf.socket.uprobe.dpdk.tx_hooks} +##### Virtual File Collection Enabled {#inputs.ebpf.file.io_event.enable_virtual_file_collect} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.uprobe.dpdk.tx_hooks` +`inputs.ebpf.file.io_event.enable_virtual_file_collect` **Default value**: ```yaml inputs: ebpf: - socket: - uprobe: - dpdk: - tx_hooks: [] + file: + io_event: + enable_virtual_file_collect: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | bool | **Description**: -Specify the appropriate packet transmission hook point according to the actual network card driver. -To obtain the driver method and configure the transmission hook point, as well as precautions,refer -to the description of `inputs.ebpf.socket.uprobe.dpdk.rx_hooks`. +When set to true, the agent will collect file I/O events generated on +virtual file systems (such as /proc, /sys, /run, and other kernel +pseudo file systems). +When set to false, the agent will not collect file I/O events from +virtual file systems. -Example: `tx_hooks: [i40e_xmit_pkts, virtio_xmit_pkts_packed, virtio_xmit_pkts]` +### Profile {#inputs.ebpf.profile} -#### Kprobe {#inputs.ebpf.socket.kprobe} +#### Unwinding {#inputs.ebpf.profile.unwinding} -##### kprobe disabled {#inputs.ebpf.socket.kprobe.disabled} +##### DWARF unwinding disabled {#inputs.ebpf.profile.unwinding.dwarf_disabled} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.socket.kprobe.disabled` +`inputs.ebpf.profile.unwinding.dwarf_disabled` + +Upgrade from old version: `static_config.ebpf.dwarf-disabled` **Default value**: ```yaml inputs: ebpf: - socket: - kprobe: - disabled: false + profile: + unwinding: + dwarf_disabled: true ``` **Schema**: @@ -4130,129 +3861,138 @@ inputs: **Description**: -When set to true, kprobe will be disabled. - -##### Unix Socket Enabled {#inputs.ebpf.socket.kprobe.enable_unix_socket} +The default setting is `true`, agent will use frame pointer based unwinding for +all processes. If a process does not contain frame pointers, the stack cannot be +displayed correctly. +Setting it to `false` will enable DWARF based stack unwinding for all processes that +do not contain frame pointers. Agent uses a heuristic algorithm to determine whether +the process being analyzed contains frame pointers. +Additionally, setting `dwarf_regex` to force DWARF based stack unwinding for certain +processes. + +##### DWARF unwinding process matching regular expression {#inputs.ebpf.profile.unwinding.dwarf_regex} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.socket.kprobe.enable_unix_socket` +`inputs.ebpf.profile.unwinding.dwarf_regex` + +Upgrade from old version: `static_config.ebpf.dwarf-regex` **Default value**: ```yaml inputs: ebpf: - socket: - kprobe: - enable_unix_socket: false + profile: + unwinding: + dwarf_regex: '' ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **Description**: -When set to true, enable tracing of Unix domain sockets. - -##### Blacklist {#inputs.ebpf.socket.kprobe.blacklist} +If set to empty, agennt will use a heuristic algorithm to determine whether the process +being analyzed contains frame pointers, and will use DWARF based stack unwinding for +processes that do not contain frame pointers. +If set to a valid regular expression, agent will no longer infer whether a process contains +frame pointers but will instead use the provided regular expression to match process names, +applying DWARF based stack unwinding only to the matching processes. -###### Port Numbers {#inputs.ebpf.socket.kprobe.blacklist.ports} +##### DWARF unwinding process map size {#inputs.ebpf.profile.unwinding.dwarf_process_map_size} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.socket.kprobe.blacklist.ports` +`inputs.ebpf.profile.unwinding.dwarf_process_map_size` -Upgrade from old version: `static_config.ebpf.kprobe-blacklist.port-list` +Upgrade from old version: `static_config.ebpf.dwarf-process-map-size` **Default value**: ```yaml inputs: ebpf: - socket: - kprobe: - blacklist: - ports: '' + profile: + unwinding: + dwarf_process_map_size: 1024 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Range | [1, 131072] | **Description**: -TCP&UDP Port Blacklist, Priority higher than kprobe-whitelist. - -Example: `ports: 80,1000-2000` - -##### Whitelist {#inputs.ebpf.socket.kprobe.whitelist} +Each process using DWARF unwind has an entry in this map, relating process id to DWARF unwind entries. +The size of each one of these entries is arount 8K, the default setting will allocate around 8M kernel memory. +This is a hash map, so size can be lower than max process id. +The configuration is only effective if DWARF is enabled. -###### Port Numbers {#inputs.ebpf.socket.kprobe.whitelist.ports} +##### DWARF unwinding shard map size {#inputs.ebpf.profile.unwinding.dwarf_shard_map_size} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.ebpf.socket.kprobe.whitelist.ports` +`inputs.ebpf.profile.unwinding.dwarf_shard_map_size` -Upgrade from old version: `static_config.ebpf.kprobe-whitelist.port-list` +Upgrade from old version: `static_config.ebpf.dwarf-shard-map-size` **Default value**: ```yaml inputs: ebpf: - socket: - kprobe: - whitelist: - ports: '' + profile: + unwinding: + dwarf_shard_map_size: 128 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Range | [1, 4096] | **Description**: -TCP&UDP Port Whitelist, Priority lower than kprobe-blacklist. -Use kprobe to collect data on ports that are not in the blacklist or whitelist. - -Example: `ports: 80,1000-2000` - -#### SockOps {#inputs.ebpf.socket.sock_ops} +The number of unwind entry shards for DWARF unwinding. +The size of each one of these entries is 1M, the default setting will allocate around 128M kernel memory. +The configuration is only effective if DWARF is enabled. -##### TCP Option Trace {#inputs.ebpf.socket.sock_ops.tcp_option_trace} +#### On-CPU {#inputs.ebpf.profile.on_cpu} -###### TCP Option Tracing {#inputs.ebpf.socket.sock_ops.tcp_option_trace.enabled} +##### Disabled {#inputs.ebpf.profile.on_cpu.disabled} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.ebpf.socket.sock_ops.tcp_option_trace.enabled` +`inputs.ebpf.profile.on_cpu.disabled` + +Upgrade from old version: `static_config.ebpf.on-cpu-profile.disabled` **Default value**: ```yaml inputs: ebpf: - socket: - sock_ops: - tcp_option_trace: - enabled: false + profile: + on_cpu: + disabled: false ``` **Schema**: @@ -4262,84 +4002,80 @@ inputs: **Description**: -Whether to enable the tcp-option tracing SockOps program, which injects DeepFlow metadata -(for example, process PID) into a custom TCP option for eligible connections. -Note: This feature requires cgroup v2 (unified hierarchy) and kernel > 5.10. On hosts -using cgroup v1 the SockOps program will fail to attach and the agent will log a warning. -Compatibility: validated on x86 with kernel > 5.10; on arm we have only tested with -kernel 6.8 so far. -Limitation: PID tracking relies on the per-CPU syscall map in. Under CPU congestion, -softirqs handling TCP may run on a different CPU than the userspace thread, so the -injected metadata can be missing or stale. +eBPF On-CPU profile switch. -###### PID Injection Window {#inputs.ebpf.socket.sock_ops.tcp_option_trace.sampling_window_bytes} +Note: When enabling this feature, the specific process list must also be specified in `inputs.proc.process_matcher`, +i.e., `ebpf.profile.on_cpu` must be included in `inputs.proc.process_matcher.[*].enabled_features`. + +##### Sampling Frequency {#inputs.ebpf.profile.on_cpu.sampling_frequency} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.ebpf.socket.sock_ops.tcp_option_trace.sampling_window_bytes` +`inputs.ebpf.profile.on_cpu.sampling_frequency` + +Upgrade from old version: `static_config.ebpf.on-cpu-profile.frequency` **Default value**: ```yaml inputs: ebpf: - socket: - sock_ops: - tcp_option_trace: - sampling_window_bytes: 16384 + profile: + on_cpu: + sampling_frequency: 99 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Unit | Bytes | -| Range | [0, 1048576] | +| Range | [1, 1000] | **Description**: -Minimum number of TCP payload bytes between PID injections. Default 16KB matches the -legacy behavior; smaller windows increase frequency, larger windows decrease it. Set to -0 to disable sampling and inject on every eligible packet. - -#### Tunning {#inputs.ebpf.socket.tunning} +eBPF On-CPU profile sampling frequency. -##### Max Capture Rate {#inputs.ebpf.socket.tunning.max_capture_rate} +##### Aggregate by CPU {#inputs.ebpf.profile.on_cpu.aggregate_by_cpu} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.ebpf.socket.tunning.max_capture_rate` +`inputs.ebpf.profile.on_cpu.aggregate_by_cpu` -Upgrade from old version: `static_config.ebpf.global-ebpf-pps-threshold` +Upgrade from old version: `static_config.ebpf.on-cpu-profile.cpu` **Default value**: ```yaml inputs: ebpf: - socket: - tunning: - max_capture_rate: 0 + profile: + on_cpu: + aggregate_by_cpu: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Unit | Per Second | -| Range | [0, 64000000] | +| Type | bool | **Description**: -Default value `0` means no limitation. +Whether to obtain the value of CPUID and decide whether to participate in aggregation. +- `true`: Obtain the value of CPUID and will be included in the aggregation of stack + trace data. +- `false`: It will not be included in the aggregation. Any other value is considered + invalid, the CPU value for stack trace data reporting is a special value + `CPU_INVALID: 0xfff` used to indicate that it is an invalid value. -##### Syscall_trace_id Disabled {#inputs.ebpf.socket.tunning.syscall_trace_id_disabled} +#### Preprocess {#inputs.ebpf.profile.preprocess} + +##### Stack Compression {#inputs.ebpf.profile.preprocess.stack_compression} **Tags**: @@ -4347,15 +4083,17 @@ Default value `0` means no limitation. **FQCN**: -`inputs.ebpf.socket.tunning.syscall_trace_id_disabled` +`inputs.ebpf.profile.preprocess.stack_compression` + +Upgrade from old version: `static_config.ebpf.preprocess.stack-compression` **Default value**: ```yaml inputs: ebpf: - socket: - tunning: - syscall_trace_id_disabled: false + profile: + preprocess: + stack_compression: true ``` **Schema**: @@ -4365,11 +4103,18 @@ inputs: **Description**: -When the trace_id is injected into all requests, the computation logic for all -syscall_trace_id can be turned off. This will significantly reduce the impact of the -eBPF hook on the CPU consumption of the application process. +Compress the call stack before sending data. Compression can effectively reduce the agent's +memory usage, data transmission bandwidth consumption, and ingester's CPU overhead. However, +it also increases the CPU usage of the agent. Tests have shown that compressing the on-cpu +function call stack of the deepflow-agent can reduce bandwidth consumption by `x` times, but +it will result in an additional `y%` CPU usage for the agent. -##### Disable Pre-allocating Memory {#inputs.ebpf.socket.tunning.map_prealloc_disabled} +#### Language-specific Profiling {#inputs.ebpf.profile.languages} + +Control which interpreter languages to profile. Disabling unused languages can save ~5-6 MB memory per language. +Total memory: ~17-20 MB (all enabled), ~6.1 MB (Python only), ~5.2 MB (PHP only), ~6.4 MB (Node.js only). + +##### Python profiling disabled {#inputs.ebpf.profile.languages.python_disabled} **Tags**: @@ -4377,17 +4122,15 @@ eBPF hook on the CPU consumption of the application process. **FQCN**: -`inputs.ebpf.socket.tunning.map_prealloc_disabled` - -Upgrade from old version: `static_config.ebpf.map-prealloc-disabled` +`inputs.ebpf.profile.languages.python_disabled` **Default value**: ```yaml inputs: ebpf: - socket: - tunning: - map_prealloc_disabled: false + profile: + languages: + python_disabled: false ``` **Schema**: @@ -4397,13 +4140,10 @@ inputs: **Description**: -When full map preallocation is too expensive, set this configuration to `true` will -prevent memory pre-allocation during map definition, but it may result in some performance -degradation. This configuration only applies to maps of type 'BPF_MAP_TYPE_HASH'. -Currently applicable to socket trace and uprobe Golang/OpenSSL trace functionalities. -Disabling memory preallocation will approximately reduce memory usage by 45MB. +Disable Python interpreter profiling. When disabled, Python process stack traces will not be collected, +saving approximately 6.1 MB of kernel memory (python_tstate_addr_map, python_unwind_info_map, python_offsets_map). -##### Hooked Socket Syscalls {#inputs.ebpf.socket.tunning.hooked_socket_syscalls} +##### PHP profiling disabled {#inputs.ebpf.profile.languages.php_disabled} **Tags**: @@ -4411,60 +4151,28 @@ Disabling memory preallocation will approximately reduce memory usage by 45MB. **FQCN**: -`inputs.ebpf.socket.tunning.hooked_socket_syscalls` +`inputs.ebpf.profile.languages.php_disabled` **Default value**: ```yaml inputs: ebpf: - socket: - tunning: - hooked_socket_syscalls: - - read - - readv - - recvfrom - - recvmsg - - recvmmsg - - sendmsg - - sendmmsg - - sendto - - write - - writev + profile: + languages: + php_disabled: false ``` -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| read | | -| readv | | -| recvfrom | | -| recvmsg | | -| recvmmsg | | -| sendmsg | | -| sendmmsg | | -| sendto | | -| write | | -| writev | | - **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | bool | **Description**: -Controls which supported socket syscalls will have eBPF hooks installed. +Disable PHP interpreter profiling. When disabled, PHP process stack traces will not be collected, +saving approximately 5.2 MB of kernel memory (php_unwind_info_map, php_offsets_map). -This list only controls whether a syscall is hooked. The backend type used for each -enabled syscall still follows the current runtime mode selection logic. For example, -the mixed mode keeps its existing hybrid and tracepoint-only split, the pure-kprobe -mode keeps its existing kprobe behavior, and the kfunc mode keeps its existing kfunc -behavior plus the tracepoint fallback for `recvfrom` and `recvmmsg`. - -Supported values: `read`, `readv`, `recvfrom`, `recvmsg`, `recvmmsg`, `sendmsg`, -`sendmmsg`, `sendto`, `write`, `writev`. - -##### Enable the fentry/fexit feature {#inputs.ebpf.socket.tunning.fentry_enabled} +##### Node.js profiling disabled {#inputs.ebpf.profile.languages.nodejs_disabled} **Tags**: @@ -4472,15 +4180,15 @@ Supported values: `read`, `readv`, `recvfrom`, `recvmsg`, `recvmmsg`, `sendmsg`, **FQCN**: -`inputs.ebpf.socket.tunning.fentry_enabled` +`inputs.ebpf.profile.languages.nodejs_disabled` **Default value**: ```yaml inputs: ebpf: - socket: - tunning: - fentry_enabled: false + profile: + languages: + nodejs_disabled: false ``` **Schema**: @@ -4490,186 +4198,154 @@ inputs: **Description**: -Explanation of Using fentry/fexit Features -- Compared to traditional kprobes, fentry and fexit programs offer higher performance and - availability, providing approximately 5%-10% performance improvement. -- Some Linux kernels do not fully support this feature, which may lead to kernel bugs and - node crashes. Known bug fixes include: - - Bug fix for TencentOS Linux kernel 5.4.119 - [https://github.com/torvalds/linux/commit/c3d6324f841bab2403be6419986e2b1d1068d423](https://github.com/torvalds/linux/commit/c3d6324f841bab2403be6419986e2b1d1068d423) - - Bug fix for Alibaba Cloud Linux kernel 5.10.23 - [https://github.com/gregkh/linux/commit/e21d2b92354b3cd25dd774ebb0f0e52ff04a7861](https://github.com/gregkh/linux/commit/e21d2b92354b3cd25dd774ebb0f0e52ff04a7861) -- Kernel recommendation: To enable the fentry/fexit feature, it is recommended to use Linux - kernel 5.10.28 or later to ensure stability and performance. +Disable Node.js (V8) interpreter profiling. When disabled, Node.js process stack traces will not be collected, +saving approximately 6.4 MB of kernel memory (v8_unwind_info_map). -#### Preprocess {#inputs.ebpf.socket.preprocess} +### Tunning {#inputs.ebpf.tunning} -##### OOOR Cache Size {#inputs.ebpf.socket.preprocess.out_of_order_reassembly_cache_size} +#### Collector Queue Size {#inputs.ebpf.tunning.collector_queue_size} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.preprocess.out_of_order_reassembly_cache_size` +`inputs.ebpf.tunning.collector_queue_size` -Upgrade from old version: `static_config.ebpf.syscall-out-of-order-cache-size` +Upgrade from old version: `static_config.ebpf-collector-queue-size` **Default value**: ```yaml inputs: ebpf: - socket: - preprocess: - out_of_order_reassembly_cache_size: 256 + tunning: + collector_queue_size: 65535 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [8, 1024] | +| Range | [4096, 64000000] | **Description**: -OOOR: Out Of Order Reassembly - -When `out_of_order_reassembly_protocols` is enabled, up to `out_of_order_reassembly_cache_size` -eBPF socket events (each event consuming up to `processors.request_log.tunning.payload_truncation` bytes) will be cached -in each TCP/UDP flow to prevent out-of-order events from impacting application protocol -parsing. Since eBPF socket events are sent to user space in batches, out-of-order scenarios -mainly occur when requests and responses within a single session are processed by different -CPUs, causing the response to reach user space before the request. +The length of the following queues: +- 0-ebpf-to-ebpf-collector +- 1-proc-event-to-sender +- 1-profile-to-sender -##### OOOR Protocols {#inputs.ebpf.socket.preprocess.out_of_order_reassembly_protocols} +#### Userspace Worker Threads {#inputs.ebpf.tunning.userspace_worker_threads} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.preprocess.out_of_order_reassembly_protocols` +`inputs.ebpf.tunning.userspace_worker_threads` -Upgrade from old version: `static_config.ebpf.syscall-out-of-order-reassembly` +Upgrade from old version: `static_config.ebpf.thread-num` **Default value**: ```yaml inputs: ebpf: - socket: - preprocess: - out_of_order_reassembly_protocols: [] + tunning: + userspace_worker_threads: 1 ``` -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| _DYNAMIC_OPTIONS_ | | - **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Range | [1, 1024] | **Description**: -OOOR: Out Of Order Reassembly - -When this capability is enabled for a specific application protocol, the agent will add -out-of-order-reassembly processing for it. Note that the agent will consume more memory -in this case, so please adjust the syscall-out-of-order-cache-size accordingly and monitor -the agent's memory usage. - -Supported protocols: [https://www.deepflow.io/docs/features/l7-protocols/overview/](https://www.deepflow.io/docs/features/l7-protocols/overview/) - -Attention: configuring `HTTP2` or `gRPC` will enable both protocols. +The number of worker threads refers to how many threads participate +in data processing in user-space. The actual maximal value is the number +of CPU logical cores on the host. -##### OOOR Timeout {#inputs.ebpf.socket.preprocess.out_of_order_reassembly_timeout} +#### Kick Thread Nice Value {#inputs.ebpf.tunning.kick_kern_nice} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.preprocess.out_of_order_reassembly_timeout` +`inputs.ebpf.tunning.kick_kern_nice` **Default value**: ```yaml inputs: ebpf: - socket: - preprocess: - out_of_order_reassembly_timeout: 100ms + tunning: + kick_kern_nice: 0 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['100ms', '1s'] | +| Type | int | +| Range | [-20, 19] | **Description**: -When the OOOR cache data times out, it will be output directly. This parameter can be adjusted according to metric -`deepflow_agent_ebpf_collect.metrics.time_backtrack_max`. +Controls the Linux nice value of per-CPU kick threads. + +These threads wake up after the periodic timer expires and issue a +lightweight syscall to trigger kernel-side timeout checks that flush +batched eBPF data. + +Pay attention to this option when `metrics.period_push_max_delay` +under `deepflow_tenant -> deepflow_agent_ebpf_collector` in Metrics +Center reaches 199 ms. This means the periodic push delay has hit +the exceeded marker, and the value can be decreased appropriately to +give the kick threads more scheduling preference. -Note: Increasing this value will consume more memory +Smaller nice values mean higher scheduling preference. Larger nice +values mean lower scheduling preference. Valid values range from +-20 to 19. A negative value may require CAP_SYS_NICE or a sufficient +RLIMIT_NICE. This can still affect other workloads. -##### SR Protocols {#inputs.ebpf.socket.preprocess.segmentation_reassembly_protocols} +#### Perf Pages Count {#inputs.ebpf.tunning.perf_pages_count} **Tags**: agent_restart -ee_feature **FQCN**: -`inputs.ebpf.socket.preprocess.segmentation_reassembly_protocols` +`inputs.ebpf.tunning.perf_pages_count` -Upgrade from old version: `static_config.ebpf.syscall-segmentation-reassembly` +Upgrade from old version: `static_config.ebpf.perf-pages-count` **Default value**: ```yaml inputs: ebpf: - socket: - preprocess: - segmentation_reassembly_protocols: [] + tunning: + perf_pages_count: 128 ``` -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| _DYNAMIC_OPTIONS_ | | - **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | +| Range | [32, 8192] | **Description**: -SR: Segmentation Reassembly - -When this capability is enabled for a specific application protocol, the agent will add -segmentation-reassembly processing to merge application protocol content spread across -multiple syscalls before parsing it. This enhances the success rate of application -protocol parsing. Note that `out_of_order_reassembly_protocols` must also be enabled for -this feature to be effective. -Supported protocols: [https://www.deepflow.io/docs/features/l7-protocols/overview/](https://www.deepflow.io/docs/features/l7-protocols/overview/) -Attention: configuring `HTTP2` or `gRPC` will enable both protocols. - -### File {#inputs.ebpf.file} - -#### IO Event {#inputs.ebpf.file.io_event} +The number of page occupied by the shared memory of the kernel. The +value is `2^n (5 <= n <= 13)`. Used for perf data transfer. If the +value is between `2^n` and `2^(n+1)`, it will be automatically adjusted +by the ebpf configurator to the minimum value `2^n`. +The page size is 4 KB. -##### Collect Mode {#inputs.ebpf.file.io_event.collect_mode} +#### Kernel Ring Size {#inputs.ebpf.tunning.kernel_ring_size} **Tags**: @@ -4677,47 +4353,31 @@ Attention: configuring `HTTP2` or `gRPC` will enable both protocols. **FQCN**: -`inputs.ebpf.file.io_event.collect_mode` +`inputs.ebpf.tunning.kernel_ring_size` -Upgrade from old version: `static_config.ebpf.io-event-collect-mode` +Upgrade from old version: `static_config.ebpf.ring-size` **Default value**: ```yaml inputs: ebpf: - file: - io_event: - collect_mode: 1 + tunning: + kernel_ring_size: 65536 ``` -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| 0 | Disabled | -| 1 | Request Life Cycle | -| 2 | All | - **Schema**: | Key | Value | | ---- | ---------------------------- | | Type | int | +| Range | [8192, 131072] | **Description**: -Collection modes: -- Disabled: Indicates that no IO events are collected. -- Request Life Cycle: Indicates that only IO events within the request life cycle are collected. -- All: Indicates that all IO events are collected. - -Note: -- To obtain the full file path, we need to combine it with the process's mount information. However, - some processes exit quickly after completing their tasks. When we attempt to process the file I/O - data generated by such processes, the corresponding /proc/[pid]/mountinfo entry may no longer be - available, resulting in incomplete paths (missing mount points). For processes with a lifetime - shorter than 50 ms, the file path may lack mount point information. This issue does not occur with - long-running processes. +The size of the ring cache queue, The value is `2^n (13 <= n <= 17)`. +If the value is between `2^n` and `2^(n+1)`, it will be automatically +adjusted by the ebpf configurator to the minimum value `2^n`. -##### Minimal Duration {#inputs.ebpf.file.io_event.minimal_duration} +#### Maximum Socket Entries {#inputs.ebpf.tunning.max_socket_entries} **Tags**: @@ -4725,30 +4385,30 @@ Note: **FQCN**: -`inputs.ebpf.file.io_event.minimal_duration` +`inputs.ebpf.tunning.max_socket_entries` -Upgrade from old version: `static_config.ebpf.io-event-minimal-duration` +Upgrade from old version: `static_config.ebpf.max-socket-entries` **Default value**: ```yaml inputs: ebpf: - file: - io_event: - minimal_duration: 1ms + tunning: + max_socket_entries: 131072 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['1ns', '1s'] | +| Type | int | +| Range | [10000, 2000000] | **Description**: -Only collect IO events with delay exceeding this threshold. +Set the maximum value of hash table entries for socket tracking, depending +on the number of concurrent requests in the actual scenario -##### Virtual File Collection Enabled {#inputs.ebpf.file.io_event.enable_virtual_file_collect} +#### Socket Map Reclaim Threshold {#inputs.ebpf.tunning.socket_map_reclaim_threshold} **Tags**: @@ -4756,72 +4416,61 @@ Only collect IO events with delay exceeding this threshold. **FQCN**: -`inputs.ebpf.file.io_event.enable_virtual_file_collect` +`inputs.ebpf.tunning.socket_map_reclaim_threshold` + +Upgrade from old version: `static_config.ebpf.socket-map-max-reclaim` **Default value**: ```yaml inputs: ebpf: - file: - io_event: - enable_virtual_file_collect: false + tunning: + socket_map_reclaim_threshold: 120000 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Range | [8000, 2000000] | **Description**: -When set to true, the agent will collect file I/O events generated on -virtual file systems (such as /proc, /sys, /run, and other kernel -pseudo file systems). -When set to false, the agent will not collect file I/O events from -virtual file systems. - -### Profile {#inputs.ebpf.profile} - -#### Unwinding {#inputs.ebpf.profile.unwinding} +The threshold for cleaning socket map table entries. -##### DWARF unwinding disabled {#inputs.ebpf.profile.unwinding.dwarf_disabled} +#### Maximum Trace Entries {#inputs.ebpf.tunning.max_trace_entries} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.ebpf.profile.unwinding.dwarf_disabled` +`inputs.ebpf.tunning.max_trace_entries` -Upgrade from old version: `static_config.ebpf.dwarf-disabled` +Upgrade from old version: `static_config.ebpf.max-trace-entries` **Default value**: ```yaml inputs: ebpf: - profile: - unwinding: - dwarf_disabled: true + tunning: + max_trace_entries: 131072 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Range | [10000, 2000000] | **Description**: -The default setting is `true`, agent will use frame pointer based unwinding for -all processes. If a process does not contain frame pointers, the stack cannot be -displayed correctly. -Setting it to `false` will enable DWARF based stack unwinding for all processes that -do not contain frame pointers. Agent uses a heuristic algorithm to determine whether -the process being analyzed contains frame pointers. -Additionally, setting `dwarf_regex` to force DWARF based stack unwinding for certain -processes. +Set the maximum value of hash table entries for thread/coroutine tracking sessions. -##### DWARF unwinding process matching regular expression {#inputs.ebpf.profile.unwinding.dwarf_regex} +## Resources {#inputs.resources} + +### Push Interval {#inputs.resources.push_interval} **Tags**: @@ -4829,34 +4478,29 @@ processes. **FQCN**: -`inputs.ebpf.profile.unwinding.dwarf_regex` +`inputs.resources.push_interval` -Upgrade from old version: `static_config.ebpf.dwarf-regex` +Upgrade from old version: `platform_sync_interval` **Default value**: ```yaml inputs: - ebpf: - profile: - unwinding: - dwarf_regex: '' + resources: + push_interval: 10s ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | duration | +| Range | ['10s', '3600s'] | **Description**: -If set to empty, agennt will use a heuristic algorithm to determine whether the process -being analyzed contains frame pointers, and will use DWARF based stack unwinding for -processes that do not contain frame pointers. -If set to a valid regular expression, agent will no longer infer whether a process contains -frame pointers but will instead use the provided regular expression to match process names, -applying DWARF based stack unwinding only to the matching processes. +The interval at which deepflow-agent actively reports resource information +to deepflow-server. -##### DWARF unwinding process map size {#inputs.ebpf.profile.unwinding.dwarf_process_map_size} +### Workload Resource Sync Enabled {#inputs.resources.workload_resource_sync_enabled} **Tags**: @@ -4864,1717 +4508,137 @@ applying DWARF based stack unwinding only to the matching processes. **FQCN**: -`inputs.ebpf.profile.unwinding.dwarf_process_map_size` - -Upgrade from old version: `static_config.ebpf.dwarf-process-map-size` +`inputs.resources.workload_resource_sync_enabled` **Default value**: ```yaml inputs: - ebpf: - profile: - unwinding: - dwarf_process_map_size: 1024 + resources: + workload_resource_sync_enabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [1, 131072] | +| Type | bool | **Description**: -Each process using DWARF unwind has an entry in this map, relating process id to DWARF unwind entries. -The size of each one of these entries is arount 8K, the default setting will allocate around 8M kernel memory. -This is a hash map, so size can be lower than max process id. -The configuration is only effective if DWARF is enabled. - -##### DWARF unwinding shard map size {#inputs.ebpf.profile.unwinding.dwarf_shard_map_size} - -**Tags**: - -`hot_update` - -**FQCN**: - -`inputs.ebpf.profile.unwinding.dwarf_shard_map_size` - -Upgrade from old version: `static_config.ebpf.dwarf-shard-map-size` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - unwinding: - dwarf_shard_map_size: 128 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [1, 4096] | - -**Description**: - -The number of unwind entry shards for DWARF unwinding. -The size of each one of these entries is 1M, the default setting will allocate around 128M kernel memory. -The configuration is only effective if DWARF is enabled. - -#### On-CPU {#inputs.ebpf.profile.on_cpu} - -##### Disabled {#inputs.ebpf.profile.on_cpu.disabled} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.on_cpu.disabled` - -Upgrade from old version: `static_config.ebpf.on-cpu-profile.disabled` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - on_cpu: - disabled: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -eBPF On-CPU profile switch. - -Note: When enabling this feature, the specific process list must also be specified in `inputs.proc.process_matcher`, -i.e., `ebpf.profile.on_cpu` must be included in `inputs.proc.process_matcher.[*].enabled_features`. - -##### Sampling Frequency {#inputs.ebpf.profile.on_cpu.sampling_frequency} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.on_cpu.sampling_frequency` - -Upgrade from old version: `static_config.ebpf.on-cpu-profile.frequency` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - on_cpu: - sampling_frequency: 99 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [1, 1000] | - -**Description**: - -eBPF On-CPU profile sampling frequency. - -##### Aggregate by CPU {#inputs.ebpf.profile.on_cpu.aggregate_by_cpu} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.on_cpu.aggregate_by_cpu` - -Upgrade from old version: `static_config.ebpf.on-cpu-profile.cpu` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - on_cpu: - aggregate_by_cpu: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Whether to obtain the value of CPUID and decide whether to participate in aggregation. -- `true`: Obtain the value of CPUID and will be included in the aggregation of stack - trace data. -- `false`: It will not be included in the aggregation. Any other value is considered - invalid, the CPU value for stack trace data reporting is a special value - `CPU_INVALID: 0xfff` used to indicate that it is an invalid value. - -#### Off-CPU {#inputs.ebpf.profile.off_cpu} - -##### Disabled {#inputs.ebpf.profile.off_cpu.disabled} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`inputs.ebpf.profile.off_cpu.disabled` - -Upgrade from old version: `static_config.ebpf.off-cpu-profile.disabled` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - off_cpu: - disabled: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -eBPF Off-CPU profile switch. - -Note: When enabling this feature, the specific process list must also be specified in `inputs.proc.process_matcher`, -i.e., `ebpf.profile.off_cpu` must be included in `inputs.proc.process_matcher.[*].enabled_features`. - -##### Aggregate by CPU {#inputs.ebpf.profile.off_cpu.aggregate_by_cpu} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`inputs.ebpf.profile.off_cpu.aggregate_by_cpu` - -Upgrade from old version: `static_config.ebpf.off-cpu-profile.cpu` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - off_cpu: - aggregate_by_cpu: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Whether to obtain the value of CPUID and decide whether to participate in aggregation. -- `true`: Obtain the value of CPUID and will be included in the aggregation of stack - trace data. -- `false`: It will not be included in the aggregation. Any other value is considered - invalid, the CPU value for stack trace data reporting is a special value - `CPU_INVALID: 0xfff` used to indicate that it is an invalid value. - -##### Minimum Blocking Time {#inputs.ebpf.profile.off_cpu.min_blocking_time} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`inputs.ebpf.profile.off_cpu.min_blocking_time` - -Upgrade from old version: `static_config.ebpf.off-cpu-profile.minblock` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - off_cpu: - min_blocking_time: 50us -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | duration | -| Range | ['0ns', '1h'] | - -**Description**: - -If set to '0ns', there will be no minimum value limitation. Scheduler events are still -high-frequency events, as their rate may exceed 1 million events per second, so -caution should still be exercised. - -If overhead remains an issue, you can configure the 'minblock' tunable parameter here. -If the off-CPU time is less than the value configured in this item, the data will be -discarded. If your goal is to trace longer blocking events, increasing this parameter -can filter out shorter blocking events, further reducing overhead. Additionally, we -will not collect events with a blocking time exceeding 1 hour. - -#### Memory {#inputs.ebpf.profile.memory} - -##### Disabled {#inputs.ebpf.profile.memory.disabled} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.ebpf.profile.memory.disabled` - -Upgrade from old version: `static_config.ebpf.memory-profile.disabled` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - memory: - disabled: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -eBPF memory profile switch. - -Note: When enabling this feature, the specific process list must also be specified in `inputs.proc.process_matcher`, -i.e., `ebpf.profile.memory` must be included in `inputs.proc.process_matcher.[*].enabled_features`. - -##### Memory profile report interval {#inputs.ebpf.profile.memory.report_interval} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.ebpf.profile.memory.report_interval` - -Upgrade from old version: `static_config.ebpf.memory-profile.report-interval` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - memory: - report_interval: 10s -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | duration | -| Range | ['1s', '60s'] | - -**Description**: - -The interval at which deepflow-agent aggregates and reports memory profile data. - -##### LRU length for process allocated addresses {#inputs.ebpf.profile.memory.allocated_addresses_lru_len} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.ebpf.profile.memory.allocated_addresses_lru_len` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - memory: - allocated_addresses_lru_len: 131072 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [1024, 4194704] | - -**Description**: - -Agent uses LRU cache to record process allocated addresses to avoid uncontrolled -memory usage. Each record in this LRU is about 80B. - -##### Sort length {#inputs.ebpf.profile.memory.sort_length} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.ebpf.profile.memory.sort_length` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - memory: - sort_length: 16384 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [0, 65536] | - -**Description**: - -In order to match mallocs and frees, memory profiler will sort data by timestamp before processing. -This parameter is the length of the sorted array. -When configuring this option, first adjust the `sort_interval` parameter according to the instructions, -and then refer to the agent performance statistics in `deepflow_agent_ebpf_memory_profiler` -`dequeued_by_length` and `dequeued_by_interval` metrics, appropriately reduce this parameter -while ensuring that the former is several times smaller than the latter. - -##### Sort interval {#inputs.ebpf.profile.memory.sort_interval} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.ebpf.profile.memory.sort_interval` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - memory: - sort_interval: 1500ms -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | duration | -| Range | ['1ns', '10s'] | - -**Description**: - -In order to match mallocs and frees, memory profiler will sort data by timestamp before processing. -This parameter controls the max span of interval between the first and last item in the sorted array. -Refer to agent performance statistics in `deepflow_agent_ebpf_memory_profiler`, -making `time_backtracked` to 0. Configurion `sort_length` may also need to be increased. - -##### Queue Size {#inputs.ebpf.profile.memory.queue_size} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`inputs.ebpf.profile.memory.queue_size` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - memory: - queue_size: 32768 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [4096, 64000000] | - -**Description**: - -Memory profiler inner queue size. -Refer to agent performance statistics in `deepflow_agent_ebpf_memory_profiler`, -making `overwritten` to 0 and `pending` not exceeding this configuration. - -#### Preprocess {#inputs.ebpf.profile.preprocess} - -##### Stack Compression {#inputs.ebpf.profile.preprocess.stack_compression} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.preprocess.stack_compression` - -Upgrade from old version: `static_config.ebpf.preprocess.stack-compression` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - preprocess: - stack_compression: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Compress the call stack before sending data. Compression can effectively reduce the agent's -memory usage, data transmission bandwidth consumption, and ingester's CPU overhead. However, -it also increases the CPU usage of the agent. Tests have shown that compressing the on-cpu -function call stack of the deepflow-agent can reduce bandwidth consumption by `x` times, but -it will result in an additional `y%` CPU usage for the agent. - -#### Language-specific Profiling {#inputs.ebpf.profile.languages} - -Control which interpreter languages to profile. Disabling unused languages can save ~5-6 MB memory per language. -Total memory: ~17-20 MB (all enabled), ~6.1 MB (Python only), ~5.2 MB (PHP only), ~6.4 MB (Node.js only). - -##### Python profiling disabled {#inputs.ebpf.profile.languages.python_disabled} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.languages.python_disabled` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - languages: - python_disabled: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Disable Python interpreter profiling. When disabled, Python process stack traces will not be collected, -saving approximately 6.1 MB of kernel memory (python_tstate_addr_map, python_unwind_info_map, python_offsets_map). - -##### PHP profiling disabled {#inputs.ebpf.profile.languages.php_disabled} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.languages.php_disabled` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - languages: - php_disabled: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Disable PHP interpreter profiling. When disabled, PHP process stack traces will not be collected, -saving approximately 5.2 MB of kernel memory (php_unwind_info_map, php_offsets_map). - -##### Node.js profiling disabled {#inputs.ebpf.profile.languages.nodejs_disabled} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.profile.languages.nodejs_disabled` - -**Default value**: -```yaml -inputs: - ebpf: - profile: - languages: - nodejs_disabled: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Disable Node.js (V8) interpreter profiling. When disabled, Node.js process stack traces will not be collected, -saving approximately 6.4 MB of kernel memory (v8_unwind_info_map). - -### Network {#inputs.ebpf.network} - -#### NIC optimization Enabled {#inputs.ebpf.network.nic_opt_enabled} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.ebpf.network.nic_opt_enabled` - -**Default value**: -```yaml -inputs: - ebpf: - network: - nic_opt_enabled: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Whether to enable NIC optimization for enhanced multi-core packet -processing and burst traffic resilience. - -When enabled, the system applies a combination of: - - RSS hardware queue configuration - - RX ring descriptor size tuning - - IRQ (interrupt) CPU affinity binding - - Optional XDP CPUMAP-based CPU redirection - -This optimization mitigates scenarios where RSS hardware cannot hash -inner headers of encapsulated traffic (e.g., GRE, Double VLAN, -VXLAN, ERSPAN), which may otherwise cause traffic to be concentrated -on a single CPU core and lead to packet drops or performance bottlenecks. - -RX ring tuning improves burst handling capability by increasing -the number of descriptors available for packet reception, reducing -the likelihood of ring overflow under high traffic conditions. - -When XDP CPU redirect is enabled, packets are redistributed in -software across multiple CPU cores after initial reception, -providing better load balancing beyond hardware RSS capabilities. - -Recommended to enable this feature when: - 1) Traffic on the interface consists primarily of encapsulated - packets (e.g., verified via tcpdump showing GRE, Double VLAN, - VXLAN, etc.). - 2) One CPU core shows near 100% softirq utilization (e.g., - observed via `top` with per-CPU view), while other CPUs - remain underutilized. - -For optimal performance, IRQ CPUs and XDP redirect CPUs should be -configured on the same NUMA node as the physical NIC. - -#### NIC Optimize {#inputs.ebpf.network.nic_optimize} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`inputs.ebpf.network.nic_optimize` - -**Default value**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - interface: '' - irq_cpu_list: '' - rss_channel_count: 0 - rx_ring_size: 0 - xdp_cpu_redirect: false - xdp_cpu_redirect_list: '' - xdp_queue_size: 2048 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | dict | - -**Description**: - -Configure NIC-level performance optimizations for specific interfaces. - -This feature improves packet processing scalability and burst handling -by tuning hardware RSS queues, interrupt CPU affinity, RX ring size, -and optional XDP CPUMAP-based CPU redirection. - -Recommended when: - - Traffic is primarily encapsulated (GRE, Double VLAN, VXLAN, ERSPAN). - - One CPU shows near 100% softirq usage while others are idle. - -To achieve better performance, the program will automatically disable the -irqbalance service to prevent network interface interrupts from migrating -between CPUs. - -Multiple NIC optimize entries can be configured for different interfaces. - -Example: -```yaml -inputs: - ebpf: - network: - nic_opt_enabled: true - nic_optimize: - - interface: eth0 - rx_ring_size: 4096 - rss_channel_count: 2 - irq_cpu_list: 1,2 - xdp_cpu_redirect: true - xdp_queue_size: 2048 - xdp_cpu_redirect_list: 4,5,6,7 - - interface: eth1 - rx_ring_size: 4096 - rss_channel_count: 2 - irq_cpu_list: 1,2 - xdp_cpu_redirect: true - xdp_queue_size: 2048 - xdp_cpu_redirect_list: 4,5,6,7 -``` - -##### Interface {#inputs.ebpf.network.nic_optimize.interface} - -**Tags**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.interface` - -**Default value**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - interface: '' -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -Name of the network interface to optimize. - -##### RX Ring Size {#inputs.ebpf.network.nic_optimize.rx_ring_size} - -**Tags**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.rx_ring_size` - -**Default value**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - rx_ring_size: 0 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**Description**: - -Number of RX descriptors in NIC receive ring. - -Increasing this value improves burst traffic buffering -and reduces packet drops caused by ring overflow. -Specifically, use `ethtool -g ` to check the current -configuration, and adjust to an appropriate value based on your workload. - -0 (default) means keep the original state and ignore this setting. - -##### RSS Channel Count {#inputs.ebpf.network.nic_optimize.rss_channel_count} - -**Tags**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.rss_channel_count` - -**Default value**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - rss_channel_count: 0 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**Description**: - -Number of hardware RSS queues. -Determines how many queues packets are distributed to after -hardware hash calculation. - -Maximum supported value is typically 16 and must not exceed -the number of logical CPU cores. -Specifically, use `ethtool -l ` to check the current configuration -and adjust to an appropriate value based on your workload. - -When XDP CPU redirect is enabled, it is recommended to set this to 1. -0 (default) means keep the original state and ignore this setting. - -##### Hardware IRQ CPU List {#inputs.ebpf.network.nic_optimize.irq_cpu_list} - -**Tags**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.irq_cpu_list` - -**Default value**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - irq_cpu_list: '' -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -CPU ID or comma-separated CPU list used for handling NIC interrupts. - -Recommended to match the number of RSS queues. -If XDP CPU redirect is enabled, only one CPU is required. - -Value can be: - - Specific CPU list (e.g., 2,4,6) - - "local" (auto match CPUs in local NUMA node) - -CPUs should be located on the same NUMA node as the NIC. - -##### Enable XDP CPU Redirect {#inputs.ebpf.network.nic_optimize.xdp_cpu_redirect} - -**Tags**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.xdp_cpu_redirect` - -**Default value**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - xdp_cpu_redirect: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Enable XDP CPUMAP redirect to redistribute packets across CPUs -in software. - -Useful when hardware RSS cannot distribute encapsulated traffic -(e.g., Double VLAN, ERSPAN) evenly across CPUs, resulting in -single-core overload and packet drops. - -##### XDP Queue Size {#inputs.ebpf.network.nic_optimize.xdp_queue_size} - -**Tags**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.xdp_queue_size` - -**Default value**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - xdp_queue_size: 2048 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**Description**: - -Size of the XDP CPUMAP queue. - -Valid range: [512, 8192]. Powers of two are recommended. - -Larger values improve burst tolerance but consume more memory. - -##### XDP Redirect CPU List {#inputs.ebpf.network.nic_optimize.xdp_cpu_redirect_list} - -**Tags**: - - - -**FQCN**: - -`inputs.ebpf.network.nic_optimize.xdp_cpu_redirect_list` - -**Default value**: -```yaml -inputs: - ebpf: - network: - nic_optimize: - - xdp_cpu_redirect_list: '' -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -CPU list used for processing packets after XDP redirection. - -Format example: 4,6,8 - -### Tunning {#inputs.ebpf.tunning} - -#### Collector Queue Size {#inputs.ebpf.tunning.collector_queue_size} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.collector_queue_size` - -Upgrade from old version: `static_config.ebpf-collector-queue-size` - -**Default value**: -```yaml -inputs: - ebpf: - tunning: - collector_queue_size: 65535 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [4096, 64000000] | - -**Description**: - -The length of the following queues: -- 0-ebpf-to-ebpf-collector -- 1-proc-event-to-sender -- 1-profile-to-sender - -#### Userspace Worker Threads {#inputs.ebpf.tunning.userspace_worker_threads} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.userspace_worker_threads` - -Upgrade from old version: `static_config.ebpf.thread-num` - -**Default value**: -```yaml -inputs: - ebpf: - tunning: - userspace_worker_threads: 1 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [1, 1024] | - -**Description**: - -The number of worker threads refers to how many threads participate -in data processing in user-space. The actual maximal value is the number -of CPU logical cores on the host. - -#### Kick Thread Nice Value {#inputs.ebpf.tunning.kick_kern_nice} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.kick_kern_nice` - -**Default value**: -```yaml -inputs: - ebpf: - tunning: - kick_kern_nice: 0 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [-20, 19] | - -**Description**: - -Controls the Linux nice value of per-CPU kick threads. - -These threads wake up after the periodic timer expires and issue a -lightweight syscall to trigger kernel-side timeout checks that flush -batched eBPF data. - -Pay attention to this option when `metrics.period_push_max_delay` -under `deepflow_tenant -> deepflow_agent_ebpf_collector` in Metrics -Center reaches 199 ms. This means the periodic push delay has hit -the exceeded marker, and the value can be decreased appropriately to -give the kick threads more scheduling preference. - -Smaller nice values mean higher scheduling preference. Larger nice -values mean lower scheduling preference. Valid values range from --20 to 19. A negative value may require CAP_SYS_NICE or a sufficient -RLIMIT_NICE. This can still affect other workloads. - -#### Perf Pages Count {#inputs.ebpf.tunning.perf_pages_count} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.perf_pages_count` - -Upgrade from old version: `static_config.ebpf.perf-pages-count` - -**Default value**: -```yaml -inputs: - ebpf: - tunning: - perf_pages_count: 128 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [32, 8192] | - -**Description**: - -The number of page occupied by the shared memory of the kernel. The -value is `2^n (5 <= n <= 13)`. Used for perf data transfer. If the -value is between `2^n` and `2^(n+1)`, it will be automatically adjusted -by the ebpf configurator to the minimum value `2^n`. -The page size is 4 KB. - -#### Kernel Ring Size {#inputs.ebpf.tunning.kernel_ring_size} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.kernel_ring_size` - -Upgrade from old version: `static_config.ebpf.ring-size` - -**Default value**: -```yaml -inputs: - ebpf: - tunning: - kernel_ring_size: 65536 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [8192, 131072] | - -**Description**: - -The size of the ring cache queue, The value is `2^n (13 <= n <= 17)`. -If the value is between `2^n` and `2^(n+1)`, it will be automatically -adjusted by the ebpf configurator to the minimum value `2^n`. - -#### Maximum Socket Entries {#inputs.ebpf.tunning.max_socket_entries} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.max_socket_entries` - -Upgrade from old version: `static_config.ebpf.max-socket-entries` - -**Default value**: -```yaml -inputs: - ebpf: - tunning: - max_socket_entries: 131072 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [10000, 2000000] | - -**Description**: - -Set the maximum value of hash table entries for socket tracking, depending -on the number of concurrent requests in the actual scenario - -#### Socket Map Reclaim Threshold {#inputs.ebpf.tunning.socket_map_reclaim_threshold} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.socket_map_reclaim_threshold` - -Upgrade from old version: `static_config.ebpf.socket-map-max-reclaim` - -**Default value**: -```yaml -inputs: - ebpf: - tunning: - socket_map_reclaim_threshold: 120000 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [8000, 2000000] | - -**Description**: - -The threshold for cleaning socket map table entries. - -#### Maximum Trace Entries {#inputs.ebpf.tunning.max_trace_entries} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.ebpf.tunning.max_trace_entries` - -Upgrade from old version: `static_config.ebpf.max-trace-entries` - -**Default value**: -```yaml -inputs: - ebpf: - tunning: - max_trace_entries: 131072 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [10000, 2000000] | - -**Description**: - -Set the maximum value of hash table entries for thread/coroutine tracking sessions. - -## Resources {#inputs.resources} - -### Push Interval {#inputs.resources.push_interval} - -**Tags**: - -`hot_update` - -**FQCN**: - -`inputs.resources.push_interval` - -Upgrade from old version: `platform_sync_interval` - -**Default value**: -```yaml -inputs: - resources: - push_interval: 10s -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | duration | -| Range | ['10s', '3600s'] | - -**Description**: - -The interval at which deepflow-agent actively reports resource information -to deepflow-server. - -### Workload Resource Sync Enabled {#inputs.resources.workload_resource_sync_enabled} - -**Tags**: - -`hot_update` - -**FQCN**: - -`inputs.resources.workload_resource_sync_enabled` - -**Default value**: -```yaml -inputs: - resources: - workload_resource_sync_enabled: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -When enabled, deepflow-server will abstract VM based on the runtime -environment information reported by deepflow-agent. - -### Collect Private Cloud Resource {#inputs.resources.private_cloud} - -#### Hypervisor Resource Enabled {#inputs.resources.private_cloud.hypervisor_resource_enabled} - -**Tags**: - -`hot_update` - -**FQCN**: - -`inputs.resources.private_cloud.hypervisor_resource_enabled` - -Upgrade from old version: `platform_enabled` - -**Default value**: -```yaml -inputs: - resources: - private_cloud: - hypervisor_resource_enabled: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -When enabled, deepflow-agent will automatically synchronize virtual -machine and network information on KVM or Linux Host to deepflow-server. -Information collected includes: -- raw_all_vm_xml -- raw_vm_states -- raw_ovs_interfaces -- raw_ovs_ports -- raw_brctl_show -- raw_vlan_config - -#### VM MAC Source {#inputs.resources.private_cloud.vm_mac_source} - -**Tags**: - -`hot_update` - -**FQCN**: - -`inputs.resources.private_cloud.vm_mac_source` - -Upgrade from old version: `if_mac_source` - -**Default value**: -```yaml -inputs: - resources: - private_cloud: - vm_mac_source: 0 -``` - -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| 0 | Interface MAC Address | -| 1 | Interface Name | -| 2 | Qemu XML File | - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**Description**: - -How to extract the real MAC address of the virtual machine when the -agent runs on the KVM host. - -Explanation of the options: -- Interface MAC Address: extracted from tap interface MAC address -- Interface Name: extracted from tap interface name -- Qemu XML File: extracted from the XML file of the virtual machine - -#### VM XML Directory {#inputs.resources.private_cloud.vm_xml_directory} - -**Tags**: - -`hot_update` - -**FQCN**: - -`inputs.resources.private_cloud.vm_xml_directory` - -Upgrade from old version: `vm_xml_path` - -**Default value**: -```yaml -inputs: - resources: - private_cloud: - vm_xml_directory: /etc/libvirt/qemu/ -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | -| Range | [0, 100] | - -**Description**: - -VM XML file directory. - -#### VM MAC Mapping Script {#inputs.resources.private_cloud.vm_mac_mapping_script} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.resources.private_cloud.vm_mac_mapping_script` - -Upgrade from old version: `static_config.tap-mac-script` - -**Default value**: -```yaml -inputs: - resources: - private_cloud: - vm_mac_mapping_script: '' -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | -| Range | [0, 100] | - -**Description**: - -The MAC address mapping relationship of TAP NIC in complex environment can be -constructed by writing a script. The following conditions must be met to use this -script: -1. if_mac_source = 2 -2. tap_mode = 0 -3. The name of the TAP NIC is the same as in the virtual machine XML file -4. The format of the script output is as follows: - - tap2d283dfe,11:22:33:44:55:66 - - tap2d283223,aa:bb:cc:dd:ee:ff - -### Collect K8s Resource {#inputs.resources.kubernetes} - -#### K8s Namespace {#inputs.resources.kubernetes.kubernetes_namespace} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.kubernetes_namespace` - -Upgrade from old version: `static_config.kubernetes-namespace` - -**Default value**: -```yaml -inputs: - resources: - kubernetes: - kubernetes_namespace: null -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -Specify the namespace for agent to query K8s resources. - -#### K8s API Resources {#inputs.resources.kubernetes.api_resources} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_resources` - -Upgrade from old version: `static_config.kubernetes-resources` - -**Default value**: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: namespaces - - name: nodes - - name: pods - - name: replicationcontrollers - - name: services - - name: daemonsets - - name: deployments - - name: replicasets - - name: statefulsets - - name: ingresses - - name: configmaps -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | dict | - -**Description**: - -Specify kubernetes resources to watch. - -The schematics of entries in list is: -{ - name: string - group: string - version: string - disabled: bool - field_selector: string -} - -Agent will watch the following resources by default: -- namespaces -- nodes -- pods -- replicationcontrollers -- services -- daemonsets -- deployments -- replicasets -- statefulsets -- ingresses -- configmaps - -To disable a resource, add an entry to the list with `disabled: true`: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: services - disabled: true -``` - -To enable a resource, add an entry of this resource to the list. Be advised that -this setting overrides the default of the same resource. For example, to enable -`statefulsets` in both group `apps` (the default) and `apps.kruise.io` will require -two entries: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: statefulsets - group: apps - - name: statefulsets - group: apps.kruise.io - version: v1beta1 -``` - -To watching `routes` in openshift you can use the following settings: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: ingresses - disabled: true - - name: routes -``` - -##### Name {#inputs.resources.kubernetes.api_resources.name} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.api_resources.name` - -Upgrade from old version: `static_config.kubernetes-resources.name` - -**Default value**: -```yaml -inputs: - resources: - kubernetes: - api_resources: - - name: '' -``` - -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| namespaces | | -| nodes | | -| pods | | -| replicationcontrollers | | -| services | | -| daemonsets | | -| deployments | | -| replicasets | | -| statefulsets | | -| ingresses | | -| routes | | -| servicerules | | -| clonesets | | -| ippools | | -| opengaussclusters | | -| configmaps | | - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: +When enabled, deepflow-server will abstract VM based on the runtime +environment information reported by deepflow-agent. -K8s API resource name. +### Collect Private Cloud Resource {#inputs.resources.private_cloud} -##### Group {#inputs.resources.kubernetes.api_resources.group} +#### Hypervisor Resource Enabled {#inputs.resources.private_cloud.hypervisor_resource_enabled} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.resources.kubernetes.api_resources.group` +`inputs.resources.private_cloud.hypervisor_resource_enabled` -Upgrade from old version: `static_config.kubernetes-resources.group` +Upgrade from old version: `platform_enabled` **Default value**: ```yaml inputs: resources: - kubernetes: - api_resources: - - group: '' + private_cloud: + hypervisor_resource_enabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | bool | **Description**: -K8s API resource group. +When enabled, deepflow-agent will automatically synchronize virtual +machine and network information on KVM or Linux Host to deepflow-server. +Information collected includes: +- raw_all_vm_xml +- raw_vm_states +- raw_ovs_interfaces +- raw_ovs_ports +- raw_brctl_show +- raw_vlan_config -##### Version {#inputs.resources.kubernetes.api_resources.version} +#### VM MAC Source {#inputs.resources.private_cloud.vm_mac_source} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.resources.kubernetes.api_resources.version` +`inputs.resources.private_cloud.vm_mac_source` -Upgrade from old version: `static_config.kubernetes-resources.version` +Upgrade from old version: `if_mac_source` **Default value**: ```yaml inputs: resources: - kubernetes: - api_resources: - - version: '' + private_cloud: + vm_mac_source: 0 ``` +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| 0 | Interface MAC Address | +| 1 | Interface Name | +| 2 | Qemu XML File | + **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | int | **Description**: -K8s API version. +How to extract the real MAC address of the virtual machine when the +agent runs on the KVM host. -##### Disabled {#inputs.resources.kubernetes.api_resources.disabled} +Explanation of the options: +- Interface MAC Address: extracted from tap interface MAC address +- Interface Name: extracted from tap interface name +- Qemu XML File: extracted from the XML file of the virtual machine + +#### VM XML Directory {#inputs.resources.private_cloud.vm_xml_directory} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.resources.kubernetes.api_resources.disabled` +`inputs.resources.private_cloud.vm_xml_directory` -Upgrade from old version: `static_config.kubernetes-resources.disabled` +Upgrade from old version: `vm_xml_path` **Default value**: ```yaml inputs: resources: - kubernetes: - api_resources: - - disabled: false + private_cloud: + vm_xml_directory: /etc/libvirt/qemu/ ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | +| Range | [0, 100] | **Description**: -K8s API resource disabled. +VM XML file directory. -##### Field Selector {#inputs.resources.kubernetes.api_resources.field_selector} +#### VM MAC Mapping Script {#inputs.resources.private_cloud.vm_mac_mapping_script} **Tags**: @@ -6582,29 +4646,39 @@ K8s API resource disabled. **FQCN**: -`inputs.resources.kubernetes.api_resources.field_selector` +`inputs.resources.private_cloud.vm_mac_mapping_script` -Upgrade from old version: `static_config.kubernetes-resources.field-selector` +Upgrade from old version: `static_config.tap-mac-script` **Default value**: ```yaml inputs: resources: - kubernetes: - api_resources: - - field_selector: '' + private_cloud: + vm_mac_mapping_script: '' ``` **Schema**: | Key | Value | | ---- | ---------------------------- | | Type | string | +| Range | [0, 100] | **Description**: -K8s API resource field selector. +The MAC address mapping relationship of TAP NIC in complex environment can be +constructed by writing a script. The following conditions must be met to use this +script: +1. if_mac_source = 2 +2. tap_mode = 0 +3. The name of the TAP NIC is the same as in the virtual machine XML file +4. The format of the script output is as follows: + - tap2d283dfe,11:22:33:44:55:66 + - tap2d283223,aa:bb:cc:dd:ee:ff -#### K8s API List Page Size {#inputs.resources.kubernetes.api_list_page_size} +### Collect K8s Resource {#inputs.resources.kubernetes} + +#### K8s Namespace {#inputs.resources.kubernetes.kubernetes_namespace} **Tags**: @@ -6612,29 +4686,28 @@ K8s API resource field selector. **FQCN**: -`inputs.resources.kubernetes.api_list_page_size` +`inputs.resources.kubernetes.kubernetes_namespace` -Upgrade from old version: `static_config.kubernetes-api-list-limit` +Upgrade from old version: `static_config.kubernetes-namespace` **Default value**: ```yaml inputs: resources: kubernetes: - api_list_page_size: 1000 + kubernetes_namespace: null ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [10, 4294967295] | +| Type | string | **Description**: -Used when limit k8s api list entry size. +Specify the namespace for agent to query K8s resources. -#### K8s API List Maximum Interval {#inputs.resources.kubernetes.api_list_max_interval} +#### K8s API Resources {#inputs.resources.kubernetes.api_resources} **Tags**: @@ -6642,124 +4715,137 @@ Used when limit k8s api list entry size. **FQCN**: -`inputs.resources.kubernetes.api_list_max_interval` +`inputs.resources.kubernetes.api_resources` -Upgrade from old version: `static_config.kubernetes-api-list-interval` +Upgrade from old version: `static_config.kubernetes-resources` **Default value**: ```yaml inputs: resources: kubernetes: - api_list_max_interval: 10m + api_resources: + - name: namespaces + - name: nodes + - name: pods + - name: replicationcontrollers + - name: services + - name: daemonsets + - name: deployments + - name: replicasets + - name: statefulsets + - name: ingresses + - name: configmaps ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['10m', '30d'] | +| Type | dict | **Description**: -Interval of listing resource when watcher idles - -#### Ingress Flavour {#inputs.resources.kubernetes.ingress_flavour} - -**Tags**: - - -deprecated - -**FQCN**: +Specify kubernetes resources to watch. -`inputs.resources.kubernetes.ingress_flavour` +The schematics of entries in list is: +{ + name: string + group: string + version: string + disabled: bool + field_selector: string +} -Upgrade from old version: `static_config.ingress-flavour` +Agent will watch the following resources by default: +- namespaces +- nodes +- pods +- replicationcontrollers +- services +- daemonsets +- deployments +- replicasets +- statefulsets +- ingresses +- configmaps -**Default value**: +To disable a resource, add an entry to the list with `disabled: true`: ```yaml inputs: resources: kubernetes: - ingress_flavour: kubernetes + api_resources: + - name: services + disabled: true ``` -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -#### Pod MAC Collection Method {#inputs.resources.kubernetes.pod_mac_collection_method} - -**Tags**: - -agent_restart - -**FQCN**: - -`inputs.resources.kubernetes.pod_mac_collection_method` - -Upgrade from old version: `static_config.kubernetes-poller-type` - -**Default value**: +To enable a resource, add an entry of this resource to the list. Be advised that +this setting overrides the default of the same resource. For example, to enable +`statefulsets` in both group `apps` (the default) and `apps.kruise.io` will require +two entries: ```yaml inputs: resources: kubernetes: - pod_mac_collection_method: adaptive + api_resources: + - name: statefulsets + group: apps + - name: statefulsets + group: apps.kruise.io + version: v1beta1 ``` -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| adaptive | | -| active | | -| passive | | - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -In active mode, deepflow-agent enters the netns of other Pods through -setns syscall to query the MAC and IP addresses. In this mode, the setns -operation requires the SYS_ADMIN permission. In passive mode deepflow-agent -calculates the MAC and IP addresses used by Pods by capturing ARP/ND traffic. -When set to adaptive, active mode will be used first. - -### Pull Resource From Controller {#inputs.resources.pull_resource_from_controller} - -Configurations for deepflow-server on pulling resources from controller. -DeepFlow-agent will not read this section. +To watching `routes` in openshift you can use the following settings: +```yaml +inputs: + resources: + kubernetes: + api_resources: + - name: ingresses + disabled: true + - name: routes +``` -#### Domain Filter {#inputs.resources.pull_resource_from_controller.domain_filter} +##### Name {#inputs.resources.kubernetes.api_resources.name} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.resources.pull_resource_from_controller.domain_filter` +`inputs.resources.kubernetes.api_resources.name` -Upgrade from old version: `domains` +Upgrade from old version: `static_config.kubernetes-resources.name` **Default value**: ```yaml inputs: resources: - pull_resource_from_controller: - domain_filter: - - '0' + kubernetes: + api_resources: + - name: '' ``` **Enum options**: | Value | Note | | ----- | ---------------------------- | -| _DYNAMIC_OPTIONS_ | _DYNAMIC_OPTIONS_ | +| namespaces | | +| nodes | | +| pods | | +| replicationcontrollers | | +| services | | +| daemonsets | | +| deployments | | +| replicasets | | +| statefulsets | | +| ingresses | | +| routes | | +| servicerules | | +| clonesets | | +| ippools | | +| opengaussclusters | | +| configmaps | | **Schema**: | Key | Value | @@ -6768,112 +4854,69 @@ inputs: **Description**: -Default value `0` means all domains, or can be set to a list of lcuuid of a -series of domains, you can get lcuuid through 'deepflow-ctl domain list'. - -Note: The list of MAC and IP addresses is used by deepflow-agent to inject tags -into data. This configuration can reduce the number and frequency of MAC and -IP addresses delivered by deepflow-server to deepflow-agent. When there is no -cross-domain service request, deepflow-server can be configured to only deliver -the information in the local domain to deepflow-agent. +K8s API resource name. -#### Only K8s Pod IP in Local Cluster {#inputs.resources.pull_resource_from_controller.only_kubernetes_pod_ip_in_local_cluster} +##### Group {#inputs.resources.kubernetes.api_resources.group} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.resources.pull_resource_from_controller.only_kubernetes_pod_ip_in_local_cluster` +`inputs.resources.kubernetes.api_resources.group` -Upgrade from old version: `pod_cluster_internal_ip` +Upgrade from old version: `static_config.kubernetes-resources.group` **Default value**: ```yaml inputs: resources: - pull_resource_from_controller: - only_kubernetes_pod_ip_in_local_cluster: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -The list of MAC and IP addresses is used by deepflow-agent to inject tags -into data. This configuration can reduce the number and frequency of MAC and IP -addresses delivered by deepflow-server to deepflow-agent. When the Pod IP is not -used for direct communication between the K8s cluster and the outside world, -deepflow-server can be configured to only deliver the information in the local -K8s cluster to deepflow-agent. - -## Integration {#inputs.integration} - -### Enabled {#inputs.integration.enabled} - -**Tags**: - -`hot_update` - -**FQCN**: - -`inputs.integration.enabled` - -Upgrade from old version: `external_agent_http_proxy_enabled` - -**Default value**: -```yaml -inputs: - integration: - enabled: true + kubernetes: + api_resources: + - group: '' ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **Description**: -Whether to enable receiving external data sources such as Prometheus, -Telegraf, OpenTelemetry, SkyWalking and Vector. +K8s API resource group. -### Listen Port {#inputs.integration.listen_port} +##### Version {#inputs.resources.kubernetes.api_resources.version} **Tags**: -`hot_update` +agent_restart **FQCN**: -`inputs.integration.listen_port` +`inputs.resources.kubernetes.api_resources.version` -Upgrade from old version: `external_agent_http_proxy_port` +Upgrade from old version: `static_config.kubernetes-resources.version` **Default value**: ```yaml inputs: - integration: - listen_port: 38086 + resources: + kubernetes: + api_resources: + - version: '' ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [1, 65535] | +| Type | string | **Description**: -Listen port of the data integration socket. - -### Compression {#inputs.integration.compression} +K8s API version. -#### Trace {#inputs.integration.compression.trace} +##### Disabled {#inputs.resources.kubernetes.api_resources.disabled} **Tags**: @@ -6881,16 +4924,17 @@ Listen port of the data integration socket. **FQCN**: -`inputs.integration.compression.trace` +`inputs.resources.kubernetes.api_resources.disabled` -Upgrade from old version: `static_config.external-agent-http-proxy-compressed` +Upgrade from old version: `static_config.kubernetes-resources.disabled` **Default value**: ```yaml inputs: - integration: - compression: - trace: true + resources: + kubernetes: + api_resources: + - disabled: false ``` **Schema**: @@ -6900,11 +4944,9 @@ inputs: **Description**: -Whether to compress the integrated trace data received by deepflow-agent. The compression -ratio is about 5:1~10:1. Turning on this feature will result in higher CPU consumption -of deepflow-agent. +K8s API resource disabled. -#### Profile {#inputs.integration.compression.profile} +##### Field Selector {#inputs.resources.kubernetes.api_resources.field_selector} **Tags**: @@ -6912,34 +4954,29 @@ of deepflow-agent. **FQCN**: -`inputs.integration.compression.profile` +`inputs.resources.kubernetes.api_resources.field_selector` -Upgrade from old version: `static_config.external-agent-http-proxy-profile-compressed` +Upgrade from old version: `static_config.kubernetes-resources.field-selector` **Default value**: ```yaml inputs: - integration: - compression: - profile: true + resources: + kubernetes: + api_resources: + - field_selector: '' ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **Description**: -Whether to compress the integrated profile data received by deepflow-agent. The compression -ratio is about 5:1~10:1. Turning on this feature will result in higher CPU consumption -of deepflow-agent. - -### Prometheus Extra Labels {#inputs.integration.prometheus_extra_labels} - -Support for getting extra labels from headers in http requests from RemoteWrite. +K8s API resource field selector. -#### Enabled {#inputs.integration.prometheus_extra_labels.enabled} +#### K8s API List Page Size {#inputs.resources.kubernetes.api_list_page_size} **Tags**: @@ -6947,28 +4984,29 @@ Support for getting extra labels from headers in http requests from RemoteWrite. **FQCN**: -`inputs.integration.prometheus_extra_labels.enabled` +`inputs.resources.kubernetes.api_list_page_size` -Upgrade from old version: `static_config.prometheus-extra-config.enabled` +Upgrade from old version: `static_config.kubernetes-api-list-limit` **Default value**: ```yaml inputs: - integration: - prometheus_extra_labels: - enabled: false + resources: + kubernetes: + api_list_page_size: 1000 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Range | [10, 4294967295] | **Description**: -Prometheus extra labels switch. +Used when limit k8s api list entry size. -#### Extra Labels {#inputs.integration.prometheus_extra_labels.extra_labels} +#### K8s API List Maximum Interval {#inputs.resources.kubernetes.api_list_max_interval} **Tags**: @@ -6976,60 +5014,55 @@ Prometheus extra labels switch. **FQCN**: -`inputs.integration.prometheus_extra_labels.extra_labels` +`inputs.resources.kubernetes.api_list_max_interval` -Upgrade from old version: `static_config.prometheus-extra-config.labels` +Upgrade from old version: `static_config.kubernetes-api-list-interval` **Default value**: ```yaml inputs: - integration: - prometheus_extra_labels: - extra_labels: [] + resources: + kubernetes: + api_list_max_interval: 10m ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | string | +| Type | duration | +| Range | ['10m', '30d'] | **Description**: -Labels list. Labels in this list are sent. Label is a string -matching the regular expression `[a-zA-Z_][a-zA-Z0-9_]*` +Interval of listing resource when watcher idles -#### Label Key Total Length Limit {#inputs.integration.prometheus_extra_labels.label_length} +#### Ingress Flavour {#inputs.resources.kubernetes.ingress_flavour} **Tags**: -agent_restart + +deprecated **FQCN**: -`inputs.integration.prometheus_extra_labels.label_length` +`inputs.resources.kubernetes.ingress_flavour` -Upgrade from old version: `static_config.prometheus-extra-config.labels-limit` +Upgrade from old version: `static_config.ingress-flavour` **Default value**: ```yaml inputs: - integration: - prometheus_extra_labels: - label_length: 1024 + resources: + kubernetes: + ingress_flavour: kubernetes ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [1024, 1048576] | - -**Description**: - -The limit of the total length of parsed extra Prometheus label keys. +| Type | string | -#### Value Total Length Limit {#inputs.integration.prometheus_extra_labels.value_length} +#### Pod MAC Collection Method {#inputs.resources.kubernetes.pod_mac_collection_method} **Tags**: @@ -7037,74 +5070,103 @@ The limit of the total length of parsed extra Prometheus label keys. **FQCN**: -`inputs.integration.prometheus_extra_labels.value_length` +`inputs.resources.kubernetes.pod_mac_collection_method` -Upgrade from old version: `static_config.prometheus-extra-config.values-limit` +Upgrade from old version: `static_config.kubernetes-poller-type` **Default value**: ```yaml inputs: - integration: - prometheus_extra_labels: - value_length: 4096 + resources: + kubernetes: + pod_mac_collection_method: adaptive ``` +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| adaptive | | +| active | | +| passive | | + **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [4096, 4194304] | +| Type | string | **Description**: -The limit of the total length of parsed extra Prometheus label values. +In active mode, deepflow-agent enters the netns of other Pods through +setns syscall to query the MAC and IP addresses. In this mode, the setns +operation requires the SYS_ADMIN permission. In passive mode deepflow-agent +calculates the MAC and IP addresses used by Pods by capturing ARP/ND traffic. +When set to adaptive, active mode will be used first. -### Feature Control {#inputs.integration.feature_control} +### Pull Resource From Controller {#inputs.resources.pull_resource_from_controller} -#### Profile Integration Disabled {#inputs.integration.feature_control.profile_integration_disabled} +Configurations for deepflow-server on pulling resources from controller. +DeepFlow-agent will not read this section. + +#### Domain Filter {#inputs.resources.pull_resource_from_controller.domain_filter} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.integration.feature_control.profile_integration_disabled` +`inputs.resources.pull_resource_from_controller.domain_filter` -Upgrade from old version: `static_config.external-profile-integration-disabled` +Upgrade from old version: `domains` **Default value**: ```yaml inputs: - integration: - feature_control: - profile_integration_disabled: false + resources: + pull_resource_from_controller: + domain_filter: + - '0' ``` +**Enum options**: +| Value | Note | +| ----- | ---------------------------- | +| _DYNAMIC_OPTIONS_ | _DYNAMIC_OPTIONS_ | + **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | -#### Trace Integration Disabled {#inputs.integration.feature_control.trace_integration_disabled} +**Description**: + +Default value `0` means all domains, or can be set to a list of lcuuid of a +series of domains, you can get lcuuid through 'deepflow-ctl domain list'. + +Note: The list of MAC and IP addresses is used by deepflow-agent to inject tags +into data. This configuration can reduce the number and frequency of MAC and +IP addresses delivered by deepflow-server to deepflow-agent. When there is no +cross-domain service request, deepflow-server can be configured to only deliver +the information in the local domain to deepflow-agent. + +#### Only K8s Pod IP in Local Cluster {#inputs.resources.pull_resource_from_controller.only_kubernetes_pod_ip_in_local_cluster} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.integration.feature_control.trace_integration_disabled` +`inputs.resources.pull_resource_from_controller.only_kubernetes_pod_ip_in_local_cluster` -Upgrade from old version: `static_config.external-trace-integration-disabled` +Upgrade from old version: `pod_cluster_internal_ip` **Default value**: ```yaml inputs: - integration: - feature_control: - trace_integration_disabled: false + resources: + pull_resource_from_controller: + only_kubernetes_pod_ip_in_local_cluster: false ``` **Schema**: @@ -7112,24 +5174,34 @@ inputs: | ---- | ---------------------------- | | Type | bool | -#### Metric Integration Disabled {#inputs.integration.feature_control.metric_integration_disabled} +**Description**: + +The list of MAC and IP addresses is used by deepflow-agent to inject tags +into data. This configuration can reduce the number and frequency of MAC and IP +addresses delivered by deepflow-server to deepflow-agent. When the Pod IP is not +used for direct communication between the K8s cluster and the outside world, +deepflow-server can be configured to only deliver the information in the local +K8s cluster to deepflow-agent. + +## Integration {#inputs.integration} + +### Enabled {#inputs.integration.enabled} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.integration.feature_control.metric_integration_disabled` +`inputs.integration.enabled` -Upgrade from old version: `static_config.external-metric-integration-disabled` +Upgrade from old version: `external_agent_http_proxy_enabled` **Default value**: ```yaml inputs: integration: - feature_control: - metric_integration_disabled: false + enabled: true ``` **Schema**: @@ -7137,49 +5209,60 @@ inputs: | ---- | ---------------------------- | | Type | bool | -#### Log Integration Disabled {#inputs.integration.feature_control.log_integration_disabled} +**Description**: + +Whether to enable receiving external data sources such as Prometheus, +Telegraf, OpenTelemetry, SkyWalking and Vector. + +### Listen Port {#inputs.integration.listen_port} **Tags**: -agent_restart +`hot_update` **FQCN**: -`inputs.integration.feature_control.log_integration_disabled` +`inputs.integration.listen_port` -Upgrade from old version: `static_config.external-log-integration-disabled` +Upgrade from old version: `external_agent_http_proxy_port` **Default value**: ```yaml inputs: integration: - feature_control: - log_integration_disabled: false + listen_port: 38086 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | int | +| Range | [1, 65535] | + +**Description**: + +Listen port of the data integration socket. -## Vector {#inputs.vector} +### Compression {#inputs.integration.compression} -### Vector Component Enabled {#inputs.vector.enabled} +#### Trace {#inputs.integration.compression.trace} **Tags**: -`hot_update` -ee_feature +agent_restart **FQCN**: -`inputs.vector.enabled` +`inputs.integration.compression.trace` + +Upgrade from old version: `static_config.external-agent-http-proxy-compressed` **Default value**: ```yaml inputs: - vector: - enabled: false + integration: + compression: + trace: true ``` **Schema**: @@ -7189,354 +5272,46 @@ inputs: **Description**: -The switcher control for Vector component running. +Whether to compress the integrated trace data received by deepflow-agent. The compression +ratio is about 5:1~10:1. Turning on this feature will result in higher CPU consumption +of deepflow-agent. -### Vector Component Config {#inputs.vector.config} +#### Profile {#inputs.integration.compression.profile} **Tags**: -`hot_update` -ee_feature +agent_restart **FQCN**: -`inputs.vector.config` +`inputs.integration.compression.profile` + +Upgrade from old version: `static_config.external-agent-http-proxy-profile-compressed` **Default value**: ```yaml inputs: - vector: - config: null + integration: + compression: + profile: true ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | dict | +| Type | bool | **Description**: -The detail config for Vector Component, all availble config keys could be found in [vector.dev](https://vector.dev/docs/reference/configuration) -Here's an example for how to capture kubernetes logs、host metrics in virtual machine and kubelet metrics in kubernetes. It'll send to DeepFlow-Agent as output. - -scrape host metrics: -`K8S_NODE_NAME_FOR_DEEPFLOW` only required in k8s container environment -```yaml -sources: - host_metrics: - type: host_metrics - scrape_interval_secs: 10 - namespace: node -transforms: - host_process_filter: - type: filter - condition: '!starts_with(string!(.name), "process_")' - inputs: - - host_metrics - host_metrics_relabel: - type: remap - inputs: - - host_process_filter - source: | - .tags.instance = "${K8S_NODE_IP_FOR_DEEPFLOW}" - host_name, _ = get_env_var("K8S_NODE_NAME_FOR_DEEPFLOW") - if !is_empty(host_name) { - .tags.host = host_name - } - metrics_map = { - "boot_time": "boot_time_seconds", - "memory_active_bytes": "memory_Active_bytes", - "memory_available_bytes": "memory_MemAvailable_bytes", - "memory_buffers_bytes": "memory_Buffers_bytes", - "memory_cached_bytes": "memory_Cached_bytes", - "memory_free_bytes": "memory_MemFree_bytes", - "memory_swap_free_bytes": "memory_SwapFree_bytes", - "memory_swap_total_bytes": "memory_SwapTotal_bytes", - "memory_swap_used_bytes": "memory_SwapCached_bytes", - "memory_total_bytes": "memory_MemTotal_bytes", - "network_transmit_packets_drop_total": "network_transmit_drop_total", - "uptime": "uname_info", - "filesystem_total_bytes": "filesystem_size_bytes", - } - metric_name = get!(value: metrics_map, path: [.name]) - if !is_null(metric_name) { - .name = metric_name - } - if .tags.collector == "filesystem" { - .tags.fstype = .tags.filesystem - del(.tags.filesystem) - } -sinks: - prometheus_remote_write: - type: prometheus_remote_write - inputs: - - host_metrics_relabel - endpoint: http://127.0.0.1:38086/api/v1/prometheus - healthcheck: - enabled: false - -``` - -scrape kubernetes metrics -```yaml -secret: - kube_token: - type: directory - path: /var/run/secrets/kubernetes.io/serviceaccount -sources: - cadvisor_metrics: - type: prometheus_scrape - endpoints: - - https://${K8S_NODE_IP_FOR_DEEPFLOW}:10250/metrics/cadvisor - auth: - strategy: bearer - token: SECRET[kube_token.token] - scrape_interval_secs: 10 - scrape_timeout_secs: 10 - honor_labels: true - instance_tag: instance - endpoint_tag: metrics_endpoint - tls: - verify_certificate: false - kubelet_metrics: - type: prometheus_scrape - endpoints: - - https://${K8S_NODE_IP_FOR_DEEPFLOW}:10250/metrics - auth: - strategy: bearer - token: SECRET[kube_token.token] - scrape_interval_secs: 10 - scrape_timeout_secs: 10 - honor_labels: true - instance_tag: instance - endpoint_tag: metrics_endpoint - tls: - verify_certificate: false - kube_state_metrics: - type: prometheus_scrape - endpoints: - - http://opensource-kube-state-metrics:8080/metrics - scrape_interval_secs: 10 - scrape_timeout_secs: 10 - honor_labels: true - instance_tag: instance - endpoint_tag: metrics_endpoint -transforms: - cadvisor_relabel_filter: - type: filter - inputs: - - cadvisor_metrics - condition: "!match(string!(.name), r'container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)|container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)|container_memory_(mapped_file|swap)|container_(file_descriptors|tasks_state|threads_max)')" - kubelet_relabel_filter: - type: filter - inputs: - - kubelet_metrics - condition: "match(string!(.name), r'kubelet_cgroup_(manager_duration_seconds_bucket|manager_duration_seconds_count)|kubelet_node_(config_error|node_name)|kubelet_pleg_relist_(duration_seconds_bucket|duration_seconds_count|interval_seconds_bucket)|kubelet_pod_(start_duration_seconds_count|worker_duration_seconds_bucket|worker_duration_seconds_count)|kubelet_running_(container_count|containers|pod_count|pods)|kubelet_runtime_(operations_duration_seconds_bucket|perations_errors_total|operations_total)|kubelet_volume_stats_(available_bytes|capacity_bytes|inodes|inodes_used)|process_(cpu_seconds_total|resident_memory_bytes)|rest_client_(request_duration_seconds_bucket|requests_total)|storage_operation_(duration_seconds_bucket|duration_seconds_count|errors_total)|up|volume_manager_total_volumes')" - kube_state_relabel_filter: - type: filter - inputs: - - kube_state_metrics - condition: "!match(string!(.name), r'kube_endpoint_address_not_ready|kube_endpoint_address_available')" - common_relabel_config: - type: remap - inputs: - - cadvisor_relabel_filter - - kubelet_relabel_filter - - kube_state_relabel_filter - source: |- - if !is_null(.tags) && is_string(.tags.metrics_endpoint) { - .tags.metrics_path = parse_regex!(.tags.metrics_endpoint, r'https?:\/\/[^\/]+(?\/.*)$').path - } -sinks: - prometheus_remote_write: - type: prometheus_remote_write - inputs: - - common_relabel_config - endpoint: http://127.0.0.1:38086/api/v1/prometheus - healthcheck: - enabled: false - -``` - -scrape kubernentes logs (capture DeepFlow Pod logs as example, if other Pod logs is required, update `extra_label_selector` add custom filters) -```yaml -data_dir: /vector-log-checkpoint -sources: - kubernetes_logs: - self_node_name: ${K8S_NODE_NAME_FOR_DEEPFLOW} - type: kubernetes_logs - namespace_annotation_fields: - namespace_labels: "" - node_annotation_fields: - node_labels: "" - pod_annotation_fields: - pod_annotations: "" - pod_labels: "" - extra_label_selector: "app=deepflow,component!=front-end" - kubernetes_logs_frontend: - self_node_name: ${K8S_NODE_NAME_FOR_DEEPFLOW} - type: kubernetes_logs - namespace_annotation_fields: - namespace_labels: "" - node_annotation_fields: - node_labels: "" - pod_annotation_fields: - pod_annotations: "" - pod_labels: "" - extra_label_selector: "app=deepflow,component=front-end" -transforms: - multiline_kubernetes_logs: - type: reduce - inputs: - - kubernetes_logs - group_by: - - file - - stream - merge_strategies: - message: concat_newline - starts_when: match(string!(.message), r'^(.+=|\[|\[?\u001B\[[0-9;]*m|\[mysql\]\s|\{\".+\"|(::ffff:)?([0-9]{1,3}.){3}[0-9]{1,3}[\s\-]+(\[)?)?\d{4}[-\/\.]?\d{2}[-\/\.]?\d{2}[T\s]?\d{2}:\d{2}:\d{2}') - expire_after_ms: 2000 - flush_period_ms: 500 - flush_kubernetes_logs: - type: remap - inputs: - - multiline_kubernetes_logs - source: |- - .message = replace(string!(.message), r'\u001B\[([0-9]{1,3}(;[0-9]{1,3})*)?m', "") - remap_kubernetes_logs: - type: remap - inputs: - - flush_kubernetes_logs - - kubernetes_logs_frontend - source: |- - if is_string(.message) && is_json(string!(.message)) { - tags = parse_json(.message) ?? {} - ._df_log_type = tags._df_log_type - .org_id = to_int(tags.org_id) ?? 0 - .user_id = to_int(tags.user_id) ?? 0 - .message = tags.message || tags.msg - del(tags._df_log_type) - del(tags.org_id) - del(tags.user_id) - del(tags.message) - del(tags.msg) - .json = tags - } - if !exists(.level) { - if exists(.json) { - .level = to_string!(.json.level) - del(.json.level) - } else { - level_tags = parse_regex(.message, r'[\[\\<](?(?i)INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\]\\>]') ?? {} - if !exists(level_tags.level) { - level_tags = parse_regex(.message, r'[\s](?INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\s]') ?? {} - } - if exists(level_tags.level) { - level_tags.level = upcase(string!(level_tags.level)) - if level_tags.level == "INFORMATION" || level_tags.level == "INFOMATION" { - level_tags.level = "INFO" - } - if level_tags.level == "WARNING" { - level_tags.level = "WARN" - } - if level_tags.level == "DEBU" { - level_tags.level = "DEBUG" - } - if level_tags.level == "ERRO" { - level_tags.level = "ERROR" - } - if level_tags.level == "CRIT" || level_tags.level == "CRITICAL" { - level_tags.level = "FATAL" - } - .level = level_tags.level - } - } - } - if !exists(._df_log_type) { - ._df_log_type = "system" - } - if !exists(.app_service) { - .app_service = .kubernetes.container_name - } -sinks: - http: - type: http - inputs: [remap_kubernetes_logs] - uri: http://127.0.0.1:38086/api/v1/log - encoding: - codec: json - -``` - -use http_client or socket to dial a remote server for testing -```yaml -sources: - http_client_dial: - type: http_client - endpoint: http://$HOST:$PORT - method: GET - scrape_interval_secs: 10 - scrape_timeout_secs: 5 - internal_metrics: - type: internal_metrics - scrape_interval_secs: 10 - namespace: ${K8S_NAMESPACE_FOR_DEEPFLOW} - socket_dial_input: - type: demo_logs - interval: 10 - format: shuffle - lines: [""] -transforms: - internal_metrics_relabel: - type: remap - inputs: - - internal_metrics - source: |- - .tags.instance = "${K8S_NODE_IP_FOR_DEEPFLOW}" - internal_metrics_dispatch: - type: route - inputs: - - internal_metrics_relabel - route: - http_client_dial_metrics: '.tags.component_id == "http_client_dial"' - socket_dial_metrics: '.tags.component_id == "socket_dial"' - http_client_dial_metrics: - type: filter - inputs: - - internal_metrics_dispatch.http_client_dial_metrics - condition: "match(string!(.name),r'http_client_.*')" - socket_dial_metrics: - type: filter - inputs: - - internal_metrics_dispatch.socket_dial_metrics - condition: "match(string!(.name),r'buffer.*')" -sinks: - socket_dial: - type: socket - inputs: - - socket_dial_input - address: $HOST:$PORT - mode: tcp - encoding: - codec: raw_message - prometheus_remote_write: - type: prometheus_remote_write - inputs: - - http_client_dial_metrics - - socket_dial_metrics - endpoint: http://127.0.0.1:38086/api/v1/prometheus - healthcheck: - enabled: false - -``` - -# Processors {#processors} +Whether to compress the integrated profile data received by deepflow-agent. The compression +ratio is about 5:1~10:1. Turning on this feature will result in higher CPU consumption +of deepflow-agent. -## Packet {#processors.packet} +### Prometheus Extra Labels {#inputs.integration.prometheus_extra_labels} -### Policy {#processors.packet.policy} +Support for getting extra labels from headers in http requests from RemoteWrite. -#### Fast-path Map Size {#processors.packet.policy.fast_path_map_size} +#### Enabled {#inputs.integration.prometheus_extra_labels.enabled} **Tags**: @@ -7544,31 +5319,28 @@ sinks: **FQCN**: -`processors.packet.policy.fast_path_map_size` +`inputs.integration.prometheus_extra_labels.enabled` -Upgrade from old version: `static_config.fast-path-map-size` +Upgrade from old version: `static_config.prometheus-extra-config.enabled` **Default value**: ```yaml -processors: - packet: - policy: - fast_path_map_size: 0 +inputs: + integration: + prometheus_extra_labels: + enabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [0, 10000000] | +| Type | bool | **Description**: -When set to 0, deepflow-agent will automatically adjust the map size -according to `global.limits.max_memory`. -Note: In practice, it should not be set to less than 8000. +Prometheus extra labels switch. -#### Fast-path Disabled {#processors.packet.policy.fast_path_disabled} +#### Extra Labels {#inputs.integration.prometheus_extra_labels.extra_labels} **Tags**: @@ -7576,28 +5348,29 @@ Note: In practice, it should not be set to less than 8000. **FQCN**: -`processors.packet.policy.fast_path_disabled` +`inputs.integration.prometheus_extra_labels.extra_labels` -Upgrade from old version: `static_config.fast-path-disabled` +Upgrade from old version: `static_config.prometheus-extra-config.labels` **Default value**: ```yaml -processors: - packet: - policy: - fast_path_disabled: false +inputs: + integration: + prometheus_extra_labels: + extra_labels: [] ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **Description**: -When set to `true`, deepflow-agent will not use fast path. +Labels list. Labels in this list are sent. Label is a string +matching the regular expression `[a-zA-Z_][a-zA-Z0-9_]*` -#### Forward Table Capacity {#processors.packet.policy.forward_table_capacity} +#### Label Key Total Length Limit {#inputs.integration.prometheus_extra_labels.label_length} **Tags**: @@ -7605,30 +5378,30 @@ When set to `true`, deepflow-agent will not use fast path. **FQCN**: -`processors.packet.policy.forward_table_capacity` +`inputs.integration.prometheus_extra_labels.label_length` -Upgrade from old version: `static_config.forward-capacity` +Upgrade from old version: `static_config.prometheus-extra-config.labels-limit` **Default value**: ```yaml -processors: - packet: - policy: - forward_table_capacity: 16384 +inputs: + integration: + prometheus_extra_labels: + label_length: 1024 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [16384, 64000000] | +| Unit | byte | +| Range | [1024, 1048576] | **Description**: -The size of the forwarding table, which is used to store MAC-IP information, -When this value is larger, the more memory usage may be. +The limit of the total length of parsed extra Prometheus label keys. -#### Max First-path Level {#processors.packet.policy.max_first_path_level} +#### Value Total Length Limit {#inputs.integration.prometheus_extra_labels.value_length} **Tags**: @@ -7636,290 +5409,261 @@ When this value is larger, the more memory usage may be. **FQCN**: -`processors.packet.policy.max_first_path_level` +`inputs.integration.prometheus_extra_labels.value_length` -Upgrade from old version: `static_config.first-path-level` +Upgrade from old version: `static_config.prometheus-extra-config.values-limit` **Default value**: ```yaml -processors: - packet: - policy: - max_first_path_level: 8 +inputs: + integration: + prometheus_extra_labels: + value_length: 4096 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [1, 16] | +| Unit | byte | +| Range | [4096, 4194304] | **Description**: -DDBS algorithm level. - -When this value is larger, the memory overhead is smaller, but the -performance of policy matching is worse. +The limit of the total length of parsed extra Prometheus label values. -### TCP Header {#processors.packet.tcp_header} +### Feature Control {#inputs.integration.feature_control} -#### Block Size {#processors.packet.tcp_header.block_size} +#### Profile Integration Disabled {#inputs.integration.feature_control.profile_integration_disabled} **Tags**: agent_restart -ee_feature **FQCN**: -`processors.packet.tcp_header.block_size` +`inputs.integration.feature_control.profile_integration_disabled` -Upgrade from old version: `static_config.packet-sequence-block-size` +Upgrade from old version: `static_config.external-profile-integration-disabled` **Default value**: ```yaml -processors: - packet: - tcp_header: - block_size: 256 +inputs: + integration: + feature_control: + profile_integration_disabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [16, 8192] | - -**Description**: - -When generating TCP header data, each flow uses one block to compress and -store multiple TCP headers, and the block size can be set here. +| Type | bool | -#### Sender Queue Size {#processors.packet.tcp_header.sender_queue_size} +#### Trace Integration Disabled {#inputs.integration.feature_control.trace_integration_disabled} **Tags**: agent_restart -ee_feature **FQCN**: -`processors.packet.tcp_header.sender_queue_size` +`inputs.integration.feature_control.trace_integration_disabled` -Upgrade from old version: `static_config.packet-sequence-queue-size` +Upgrade from old version: `static_config.external-trace-integration-disabled` **Default value**: ```yaml -processors: - packet: - tcp_header: - sender_queue_size: 65536 +inputs: + integration: + feature_control: + trace_integration_disabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [65536, 64000000] | - -**Description**: - -The length of the following queues (to UniformCollectSender): -- 1-packet-sequence-block-to-uniform-collect-sender +| Type | bool | -#### Header Fields Flag {#processors.packet.tcp_header.header_fields_flag} +#### Metric Integration Disabled {#inputs.integration.feature_control.metric_integration_disabled} **Tags**: agent_restart -ee_feature **FQCN**: -`processors.packet.tcp_header.header_fields_flag` +`inputs.integration.feature_control.metric_integration_disabled` -Upgrade from old version: `static_config.packet-sequence-flag` +Upgrade from old version: `static_config.external-metric-integration-disabled` **Default value**: ```yaml -processors: - packet: - tcp_header: - header_fields_flag: 0 +inputs: + integration: + feature_control: + metric_integration_disabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [0, 255] | - -**Description**: - -packet-sequence-flag determines which fields need to be reported, the default -value is `0`, which means the feature is disabled, and `255`, which means all fields -need to be reported all fields corresponding to each bit: -``` -| FLAG | SEQ | ACK | PAYLOAD_SIZE | WINDOW_SIZE | OPT_MSS | OPT_WS | OPT_SACK | - 7 6 5 4 3 2 1 0 -``` - -### PCAP Stream {#processors.packet.pcap_stream} +| Type | bool | -#### Receiver Queue Size {#processors.packet.pcap_stream.receiver_queue_size} +#### Log Integration Disabled {#inputs.integration.feature_control.log_integration_disabled} **Tags**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.receiver_queue_size` +`inputs.integration.feature_control.log_integration_disabled` -Upgrade from old version: `static_config.pcap.queue-size` +Upgrade from old version: `static_config.external-log-integration-disabled` **Default value**: ```yaml -processors: - packet: - pcap_stream: - receiver_queue_size: 65536 +inputs: + integration: + feature_control: + log_integration_disabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [65536, 64000000] | +| Type | bool | -**Description**: +# Processors {#processors} -The length of the following queues: -- 1-mini-meta-packet-to-pcap +## Packet {#processors.packet} + +### Policy {#processors.packet.policy} -#### Sender Queue Size {#processors.packet.pcap_stream.sender_queue_size} +#### Fast-path Map Size {#processors.packet.policy.fast_path_map_size} **Tags**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.sender_queue_size` +`processors.packet.policy.fast_path_map_size` + +Upgrade from old version: `static_config.fast-path-map-size` **Default value**: ```yaml processors: packet: - pcap_stream: - sender_queue_size: 8192 + policy: + fast_path_map_size: 0 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [4096, 64000000] | +| Range | [0, 10000000] | **Description**: -The length of the following queues: -- 2-pcap-batch-to-sender +When set to 0, deepflow-agent will automatically adjust the map size +according to `global.limits.max_memory`. +Note: In practice, it should not be set to less than 8000. -#### Buffer Size Per Flow {#processors.packet.pcap_stream.buffer_size_per_flow} +#### Fast-path Disabled {#processors.packet.policy.fast_path_disabled} **Tags**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.buffer_size_per_flow` +`processors.packet.policy.fast_path_disabled` -Upgrade from old version: `static_config.pcap.flow-buffer-size` +Upgrade from old version: `static_config.fast-path-disabled` **Default value**: ```yaml processors: packet: - pcap_stream: - buffer_size_per_flow: 65536 + policy: + fast_path_disabled: false ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | int | -| Range | [64, 64000000] | +| Type | bool | **Description**: -PCap buffer size per flow. Will flush the flow when reach this limit. +When set to `true`, deepflow-agent will not use fast path. -#### Total Buffer Size {#processors.packet.pcap_stream.total_buffer_size} +#### Forward Table Capacity {#processors.packet.policy.forward_table_capacity} **Tags**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.total_buffer_size` +`processors.packet.policy.forward_table_capacity` -Upgrade from old version: `static_config.pcap.buffer-size` +Upgrade from old version: `static_config.forward-capacity` **Default value**: ```yaml processors: packet: - pcap_stream: - total_buffer_size: 88304 + policy: + forward_table_capacity: 16384 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | | Type | int | -| Range | [65536, 64000000] | +| Range | [16384, 64000000] | **Description**: -Total PCap buffer size. Will flush all flows when reach this limit. +The size of the forwarding table, which is used to store MAC-IP information, +When this value is larger, the more memory usage may be. -#### Flush Interval {#processors.packet.pcap_stream.flush_interval} +#### Max First-path Level {#processors.packet.policy.max_first_path_level} **Tags**: agent_restart -ee_feature **FQCN**: -`processors.packet.pcap_stream.flush_interval` +`processors.packet.policy.max_first_path_level` -Upgrade from old version: `static_config.pcap.flush-interval` +Upgrade from old version: `static_config.first-path-level` **Default value**: ```yaml processors: packet: - pcap_stream: - flush_interval: 1m + policy: + max_first_path_level: 8 ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | duration | -| Range | ['1s', '10m'] | +| Type | int | +| Range | [1, 16] | **Description**: -Flushes the PCap buffer of a flow if it has not been flushed for this interval. +DDBS algorithm level. + +When this value is larger, the memory overhead is smaller, but the +performance of policy matching is worse. ### TOA (TCP Option Address) {#processors.packet.toa} @@ -8309,306 +6053,6 @@ processors: Due to the response with data id 0x04 has different struct in different version, it may has one byte before row affect. -##### ISO8583 {#processors.request_log.application_protocol_inference.protocol_special_config.iso8583} - -###### Value Translation {#processors.request_log.application_protocol_inference.protocol_special_config.iso8583.translation_enabled} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.iso8583.translation_enabled` - -**Default value**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - iso8583: - translation_enabled: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Whether to perform field value translation. - -###### PAN Obfuscate {#processors.request_log.application_protocol_inference.protocol_special_config.iso8583.pan_obfuscate} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.iso8583.pan_obfuscate` - -**Default value**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - iso8583: - pan_obfuscate: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Whether to obfuscate the Primary Account Number (PAN). - -###### Extract Fields {#processors.request_log.application_protocol_inference.protocol_special_config.iso8583.extract_fields} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.iso8583.extract_fields` - -**Default value**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - iso8583: - extract_fields: 2,7,11,32,33 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -Extracted fields are displayed in `data native tags`. - - Example: `extract_fields: 0,2-33` -Field Reference: - -| Field No. | Description | -|-----------|-------------| -| 0 | Message Type Identifier (MTI) | -| 1 | Bitmap | -| 2 | Primary Account Number (PAN) | -| 3 | Processing Code | -| 4 | Amount, Transaction | -| 5 | Amount, Settlement | -| 6 | Amount, Cardholder Billing | -| 7 | Transmission Date & Time | -| 9 | Conversion Rate, Settlement | -| 10 | Conversion Rate, Cardholder Billing | -| 11 | System Trace Audit Number (STAN) | -| 12 | Local Transaction Time | -| 13 | Local Transaction Date | -| 14 | Expiration Date | -| 15 | Settlement Date | -| 16 | Conversion Date | -| 18 | Merchant Type | -| 19 | Acquiring Institution Country Code | -| 22 | POS Entry Mode Code | -| 23 | Card Sequence Number | -| 25 | POS Condition Code | -| 26 | POS PIN Capture Code | -| 28 | Transaction Fee | -| 32 | Acquiring Institution Identification Code | -| 33 | Forwarding Institution Identification Code | -| 35 | Track 2 Data | -| 36 | Track 3 Data | -| 37 | Retrieval Reference Number (RRN) | -| 38 | Authorization Identification Response | -| 39 | Response Code | -| 41 | Card Acceptor Terminal ID | -| 42 | Card Acceptor ID Code | -| 43 | Card Acceptor Name/Location | -| 44 | Additional Response Data | -| 45 | Track 1 Data | -| 48 | Additional Data – Private | -| 49 | Currency Code, Transaction | -| 50 | Currency Code, Settlement | -| 51 | Currency Code, Cardholder Billing | -| 52 | PIN Data | -| 53 | Security Related Control Information | -| 54 | Additional Amounts (Balance) | -| 55 | ICC Data (EMV Data) | -| 56 | Additional Data | -| 57 | Additional Transaction Data | -| 59 | Detail Data / Reserved for National Use | -| 60 | Reserved for Private Use | -| 61 | Cardholder Authentication Information | -| 62 | Switch Data | -| 63 | Network Data | -| 70 | Network Management Information Code | -| 90 | Original Data Elements | -| 96 | Message Security Code | -| 100 | Receiving Institution Identification Code | -| 102 | Account Identification 1 | -| 103 | Account Identification 2 | -| 104 | Additional Data | -| 113 | Additional Data | -| 116 | Additional Data | -| 117 | Additional Data | -| 121 | Reserved by China UnionPay (CUPS) | -| 122 | Reserved for Acquirer | -| 123 | Reserved for Issuer | -| 125 | Additional Data | -| 126 | Additional Data | -| 128 | Message Authentication Code (MAC) | - -##### WebSphereMQ {#processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq} - -###### Parse XML {#processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.parse_xml_enabled} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.parse_xml_enabled` - -**Default value**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - web_sphere_mq: - parse_xml_enabled: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Whether to parse XML. - -###### Decompress Payload {#processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.decompress_enabled} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.decompress_enabled` - -**Default value**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - web_sphere_mq: - decompress_enabled: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Some web_sphere_mq messages use zlib compression. When this option is enabled, -the agent will decompress the data packets during parsing. - -###### Attribute Field Filter {#processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.filter_attributes_enabled} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.web_sphere_mq.filter_attributes_enabled` - -**Default value**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - web_sphere_mq: - filter_attributes_enabled: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Enabling this option will cause the agent to retain only the following fields in the XML during parsing, reducing data storage. -- Document.ComConf.ConfInf.MT -- Document.ComConf.ConfInf.MsgId -- Document.ComConf.ConfInf.MsgPrcCd -- Document.ComConf.ConfInf.MsgRefId -- Document.ComConf.ConfInf.OrigSndDt -- Document.ComConf.ConfInf.OrigSndr -- Document.ComuCnfm.MsgId -- Document.ComuCnfm.MsgProCd -- Document.ComuCnfm.MsgRefId -- Document.ComuCnfm.MsgTp -- Document.ComuCnfm.OrigSndDt -- Document.ComuCnfm.OrigSndr - -##### NetSign {#processors.request_log.application_protocol_inference.protocol_special_config.net_sign} - -###### Extract Biz Data {#processors.request_log.application_protocol_inference.protocol_special_config.net_sign.extract_biz_data_enabled} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.request_log.application_protocol_inference.protocol_special_config.net_sign.extract_biz_data_enabled` - -**Default value**: -```yaml -processors: - request_log: - application_protocol_inference: - protocol_special_config: - net_sign: - extract_biz_data_enabled: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Whether to extract the full bizData field into data attributes. - ##### MySQL {#processors.request_log.application_protocol_inference.protocol_special_config.mysql} ###### Decompress MySQL Payload {#processors.request_log.application_protocol_inference.protocol_special_config.mysql.decompress_payload} @@ -9347,56 +6791,21 @@ processors: **Description**: It is used to extract the real client IP field in the HTTP header, -such as X-Forwarded-For, etc. Leave it empty to disable this feature. -If multiple values are specified, the first match will be used. -Fields rewritten by plugins have the highest priority. - -##### X-Request-ID {#processors.request_log.tag_extraction.tracing_tag.x_request_id} - -**Tags**: - -`hot_update` - -**FQCN**: - -`processors.request_log.tag_extraction.tracing_tag.x_request_id` - -Upgrade from old version: `http_log_x_request_id` - -**Default value**: -```yaml -processors: - request_log: - tag_extraction: - tracing_tag: - x_request_id: - - X_Request_ID -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -It is used to extract the fields in the HTTP header that are used -to uniquely identify the same request before and after the gateway, -such as X-Request-ID, etc. This feature can be turned off by setting -it to empty. +such as X-Forwarded-For, etc. Leave it empty to disable this feature. If multiple values are specified, the first match will be used. Fields rewritten by plugins have the highest priority. -##### Multiple TraceID Collection {#processors.request_log.tag_extraction.tracing_tag.multiple_trace_id_collection} +##### X-Request-ID {#processors.request_log.tag_extraction.tracing_tag.x_request_id} **Tags**: `hot_update` -ee_feature **FQCN**: -`processors.request_log.tag_extraction.tracing_tag.multiple_trace_id_collection` +`processors.request_log.tag_extraction.tracing_tag.x_request_id` + +Upgrade from old version: `http_log_x_request_id` **Default value**: ```yaml @@ -9404,18 +6813,23 @@ processors: request_log: tag_extraction: tracing_tag: - multiple_trace_id_collection: true + x_request_id: + - X_Request_ID ``` **Schema**: | Key | Value | | ---- | ---------------------------- | -| Type | bool | +| Type | string | **Description**: -When configured as `false`, only one TraceID is collected. -When configured as `true`, multiple TraceIDs will be collected. +It is used to extract the fields in the HTTP header that are used +to uniquely identify the same request before and after the gateway, +such as X-Request-ID, etc. This feature can be turned off by setting +it to empty. +If multiple values are specified, the first match will be used. +Fields rewritten by plugins have the highest priority. ##### APM TraceID {#processors.request_log.tag_extraction.tracing_tag.apm_trace_id} @@ -10347,39 +7761,6 @@ L2End = true is matched when generating the flow. Set this value to `true` to force a double-sided MAC address match and only aggregate traffic with exactly equal MAC addresses. -##### IDC Traffic Ignore VLAN {#processors.flow_log.conntrack.flow_generation.idc_traffic_ignore_vlan} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`processors.flow_log.conntrack.flow_generation.idc_traffic_ignore_vlan` - -Upgrade from old version: `static_config.flow.ignore-idc-vlan` - -**Default value**: -```yaml -processors: - flow_log: - conntrack: - flow_generation: - idc_traffic_ignore_vlan: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -When the VLAN of the two-way traffic collected at the same location -are asymmetrical, the traffic cannot be aggregated into a Flow. You can -set this value at this time. Only valid for IDC (not Cloud) traffic. - #### Timeouts {#processors.flow_log.conntrack.timeouts} ##### Established {#processors.flow_log.conntrack.timeouts.established} @@ -10803,44 +8184,6 @@ outputs: It can only be set to FILE in standalone mode, in which case l4_flow_log and l7_flow_log will be written to local files. -### NPB Socket Type {#outputs.socket.npb_socket_type} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.socket.npb_socket_type` - -Upgrade from old version: `npb_socket_type` - -**Default value**: -```yaml -outputs: - socket: - npb_socket_type: RAW_UDP -``` - -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| UDP | | -| RAW_UDP | | -| TCP | | -| ZMQ | | - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | string | - -**Description**: - -RAW_UDP uses RawSocket to send UDP packets, which has the highest -performance, but there may be compatibility issues in some environments. - ### RAW_UDP QoS Bypass {#outputs.socket.raw_udp_qos_bypass} **Tags**: @@ -11460,268 +8803,6 @@ outputs: The length of the following queues: - 3-doc-to-collector-sender -## NPB (Network Packet Broker) {#outputs.npb} - -### Maximum MTU {#outputs.npb.max_mtu} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.max_mtu` - -Upgrade from old version: `mtu` - -**Default value**: -```yaml -outputs: - npb: - max_mtu: 1500 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | byte | -| Range | [500, 10000] | - -**Description**: - -Maximum MTU allowed when using UDP for NPB. - -Attention: Public cloud service providers may modify the content of the -tail of the UDP packet whose packet length is close to 1500 bytes. When -using UDP transmission, it is recommended to set a slightly smaller value. - -### RAW_UDP VLAN Tag {#outputs.npb.raw_udp_vlan_tag} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.raw_udp_vlan_tag` - -Upgrade from old version: `output_vlan` - -**Default value**: -```yaml -outputs: - npb: - raw_udp_vlan_tag: 0 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [0, 4095] | - -**Description**: - -When using RAW_UDP Socket to transmit UDP data, this value can be used to -set the VLAN tag. Default value `0` means no VLAN tag. - -### Extra VLAN Header {#outputs.npb.extra_vlan_header} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.extra_vlan_header` - -Upgrade from old version: `npb_vlan_mode` - -**Default value**: -```yaml -outputs: - npb: - extra_vlan_header: 0 -``` - -**Enum options**: -| Value | Note | -| ----- | ---------------------------- | -| 0 | None | -| 1 | 802.1Q | -| 2 | QinQ | - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | - -**Description**: - -Whether to add an extra 802.1Q header to NPB traffic, when this value is -set, deepflow-agent will insert a VLAN Tag into the NPB traffic header, and -the value is the lower 12 bits of TunnelID in the VXLAN header. - -### Traffic Global Dedup {#outputs.npb.traffic_global_dedup} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.traffic_global_dedup` - -Upgrade from old version: `npb_dedup_enabled` - -**Default value**: -```yaml -outputs: - npb: - traffic_global_dedup: true -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -Whether to enable global (distributed) traffic deduplication for the -NPB feature. - -### Target Port {#outputs.npb.target_port} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`outputs.npb.target_port` - -Upgrade from old version: `static_config.npb-port` - -**Default value**: -```yaml -outputs: - npb: - target_port: 4789 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [1, 65535] | - -**Description**: - -Server port for NPB. - -### Custom VXLAN Flags {#outputs.npb.custom_vxlan_flags} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`outputs.npb.custom_vxlan_flags` - -Upgrade from old version: `static_config.vxlan-flags` - -**Default value**: -```yaml -outputs: - npb: - custom_vxlan_flags: 255 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Range | [0, 255] | - -**Description**: - -NPB uses the first byte of the VXLAN Flag to identify the sending traffic to -prevent the traffic sent by NPB from being collected by deepflow-agent. - -Attention: To ensure that the VNI bit is set, the value configured here will -be used after |= 0b1000_0000. Therefore, this value cannot be directly -configured as 0b1000_0000. - -### Overlay VLAN Header Trimming {#outputs.npb.overlay_vlan_header_trimming} - -**Tags**: - -agent_restart -ee_feature - -**FQCN**: - -`outputs.npb.overlay_vlan_header_trimming` - -Upgrade from old version: `static_config.ignore-overlay-vlan` - -**Default value**: -```yaml -outputs: - npb: - overlay_vlan_header_trimming: false -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | bool | - -**Description**: - -This configuration only ignores the VLAN header in the captured original message -and does not affect the configuration item: npb_vlan_mode - -### Maximum Tx Throughput {#outputs.npb.max_tx_throughput} - -**Tags**: - -`hot_update` -ee_feature - -**FQCN**: - -`outputs.npb.max_tx_throughput` - -Upgrade from old version: `max_npb_bps` - -**Default value**: -```yaml -outputs: - npb: - max_tx_throughput: 1000 -``` - -**Schema**: -| Key | Value | -| ---- | ---------------------------- | -| Type | int | -| Unit | Mbps | -| Range | [1, 100000] | - -**Description**: - -Maximum traffic rate allowed for npb sender. - ## Compression {#outputs.compression} ### Application_Log {#outputs.compression.application_log} diff --git a/server/agent_config/template.yaml b/server/agent_config/template.yaml index b10a5ba85c8..773a04461be 100644 --- a/server/agent_config/template.yaml +++ b/server/agent_config/template.yaml @@ -350,57 +350,6 @@ global: metric: load15 # type: section # name: - # en: Tx Throughput - # ch: 发送吞吐 - # description: - tx_throughput: - # type: int - # name: - # en: Trigger Threshold - # ch: 触发阈值 - # unit: Mbps - # range: [0, 100000] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # When the outbound throughput of the NPB interface reaches or exceeds - # the threshold, the broker will be stopped, after that the broker will - # be resumed if the throughput is lower than - # `(trigger_threshold - outputs.npb.max_tx_throughput)*90%` - # within 5 consecutive monitoring intervals. - # - # Attention: When configuring this value, it must be greater than - # `outputs.npb.max_tx_throughput`. Set to 0 will disable this feature. - # ch: |- - # 如果流量分发所用网络接口的出方向吞吐量达到或超出此阈值,deepflow-agent 停止流量分发; - # 如果该网络接口的出方向吞吐量连续 5 个监控周期低于`(trigger_threshold - - # outputs.npb.max_tx_throughput)*90%`,deepflow-agent 恢复流量分发。 - # - # 注意: - # 1. 取值为 0 时,该特性不生效; - # 2. 若取非 0 值,必须大于 `outputs.npb.max_tx_throughput`。 - # upgrade_from: max_tx_bandwidth - trigger_threshold: 0 - # type: duration - # name: - # en: Throughput Monitoring Interval - # ch: 吞吐监控间隔 - # unit: - # range: [1s, 60s] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # Monitoring interval for outbound traffic rate of NPB interface. - # ch: |- - # deepflow-agent 对流量分发所使用网络接口的出方向吞吐量指标的监控周期。 - # upgrade_from: bandwidth_probe_interval - throughput_monitoring_interval: 10s - # type: section - # name: # en: Free Disk # ch: 空闲磁盘 # description: @@ -2022,28 +1971,6 @@ inputs: bond_interfaces: [] # type: string # name: - # en: Extra Network Namespace Regex - # ch: 需要采集的额外网络 Namespace - # unit: - # range: [] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # Packet will be captured in regex matched namespaces besides the default - # namespace. NICs captured in extra namespaces are also filtered with - # `inputs.cbpf.af_packet.interface_regex`. - # - # Default value `""` means no extra network namespace (default namespace only). - # ch: |- - # 除默认网络 namespace 之外,deepflow-agent 还会根据此参数正则匹配额外的网络 namespace, - # 在匹配命中的网络 namespace 中根据`inputs.cbpf.af_packet.interface_regex`正则匹配网络接口并采集流量。默认 - # 配置 `""` 表示仅采集默认网络 namesapce,不采集额外的网络 namespace 流量。 - # upgrade_from: extra_netns_regex - extra_netns_regex: "" - # type: string - # name: # en: Extra BPF Filter # ch: 额外的 BPF 过滤器 # unit: @@ -2064,26 +1991,6 @@ inputs: # upgrade_from: static_config.src-interfaces # deprecated: true src_interfaces: [] - # type: int - # name: - # en: VLAN PCP in Physical Mirror Traffic - # ch: 物理网络镜像流量中的 VLAN PCP - # unit: - # range: [0, 9] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # - When this configuration <= 7 calculate TAP value from vlan tag only if vlan pcp matches this value. - # - when this configuration is 8 calculate TAP value from outer vlan tag, - # - when this configuration is 9 calculate TAP value from inner vlan tag. - # ch: |- - # - 当此配置值小于等于 7 时,仅当 VLAN PCP 与该值匹配时,从 VLAN tag 中计算 TAP。 - # - 当此配置值为 8 时,从外层 VLAN tag 中计算 TAP, - # - 当此配置值为 9 时,从内层 VLAN tag 中计算 TAP。 - # upgrade_from: static_config.mirror-traffic-pcp - vlan_pcp_in_physical_mirror_traffic: 0 # type: bool # name: # en: BPF Filter Disabled @@ -2276,25 +2183,6 @@ inputs: # name: DPDK # description: dpdk: - # type: string - # name: - # en: Data Source - # ch: 数据源 - # unit: - # range: [] - # enum_options: [None, eBPF, pdump] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Currently, there are two ways to collect DPDK traffic, including: - # - pdump: See details [https://dpdk-docs.readthedocs.io/en/latest/prog_guide/multi_proc_support.html](https://dpdk-docs.readthedocs.io/en/latest/prog_guide/multi_proc_support.html) - # - eBPF: Use eBPF Uprobe to obtain DPDK traffic, configuration `inputs.ebpf.socket.uprobe.dpdk` is also required. - # ch: |- - # 目前支持两种采集 DPDK 流量的方式,包括: - # - pdump: 详情见 [https://dpdk-docs.readthedocs.io/en/latest/prog_guide/multi_proc_support.html](https://dpdk-docs.readthedocs.io/en/latest/prog_guide/multi_proc_support.html) - # - eBPF: 使用 eBPF Uprobe 的方式获取 DPDK 流量,同时需要配置 `inputs.ebpf.socket.uprobe.dpdk` - source: None # type: duration # name: # en: reorder cache window size @@ -2310,88 +2198,6 @@ inputs: # ch: |- # 当 `inputs.cbpf.special_network.dpdk.source` 为 eBPF 时该配置生效,时间窗口变大会导致 agent 占用更多的内存。 reorder_cache_window_size: 60ms - # type: section - # name: Libpcap - # description: - libpcap: - # type: bool - # name: Enabled - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Supports running on Windows and Linux, Low performance when using multiple interfaces. - # Default to true in Windows, false in Linux. - # ch: |- - # libpcap 的启动开关,该参数在 Windows 系统中默认开启,在 Linux 系统中默认关闭。libcap 在 Windows - # 和 Linux 系统中均支持,但在多接口的环境中流量采集性能较低。 - # upgrade_from: static_config.libpcap-enabled - enabled: false - # type: section - # name: vHost User - # description: - vhost_user: - # type: string - # name: vHost Socket Path - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Supports running on Linux with mirror mode. - # ch: |- - # 支持在 Linux 环境中以虚拟网络镜像模式运行。 - # upgrade_from: static_config.vhost-socket-path - vhost_socket_path: "" - # type: section - # name: - # en: Physical Switch - # ch: 物理交换机 - # description: - physical_switch: - # type: int - # name: - # en: sFlow Receiving Ports - # ch: sFlow 接收端口号 - # unit: - # range: [1, 65535] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # This feature is only supported by the Enterprise Edition of Trident. - # In general, sFlow uses port 6343. Default value `[]` means that no sFlow - # data will be collected. - # ch: |- - # 配置 sFlow 的接收端口号,默认值`[]`表示不采集 sFlow 数据。通常 sFlow 使用 6343 端口。 - # 注意,该特性仅对企业版的 Trident 有效。 - # upgrade_from: static_config.xflow-collector.sflow-ports - sflow_ports: [] - # type: int - # name: - # en: NetFlow Receiving Ports - # ch: NetFlow 接收端口号 - # unit: - # range: [1, 65535] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # This feature is only supported by the Enterprise Edition of Trident. - # Additionally, only NetFlow v5 is currently supported. In general, NetFlow - # uses port 2055. Default value `[]` means that no NetFlow data will be collected. - # ch: |- - # 配置 NetFlow 的接收端口号,默认值`[]`表示不采集 NetFlow 数据。通常 sFlow 使用 2055 端口。 - # 注意,该特性仅对企业版的 Trident 有效,且目前仅支持 NetFlow v5 协议。 - # upgrade_from: static_config.xflow-collector.netflow-ports - netflow_ports: [] # type: section # name: # en: Tunning @@ -2437,60 +2243,6 @@ inputs: max_capture_packet_size: 65535 # type: int # name: - # en: Raw Packet Buffer Block Size - # ch: 裸包缓冲区 Block 大小 - # unit: - # range: [65536, 16000000] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # In certain modes, raw packets will go through a queue before being processed. - # To avoid memory allocation for each packet, a memory block of size - # raw_packet_buffer_block_size is allocated for multiple packets. - # Larger value will reduce memory allocation for raw packet, but will also - # delay memory free. - # This configuration is effective for the following `inputs.cbpf.common.capture_mode`: - # - analyzer mode - # - local mode with `inputs.cbpf.af_packet.inner_interface_capture_enabled` = true - # - local mode with `inputs.cbpf.tunning.dispatcher_queue_enabled` = true - # - mirror mode with `inputs.cbpf.tunning.dispatcher_queue_enabled` = true - # ch: |- - # Analyzer 模式下采集到的包进入队列前需要分配内存暂存。为避免每个包进行内存申请,每次开辟 - # raw_packet_buffer_block_size 大小的内存块给数个包使用。 - # 更大的配置可以减少内存分配,但会延迟内存释放。 - # 该配置对以下采集模式(`inputs.cbpf.common.capture_mode`)生效: - # - analyzer 模式 - # - local 模式,且 `inputs.cbpf.af_packet.inner_interface_capture_enabled` = true - # - local 模式,且 `inputs.cbpf.tunning.dispatcher_queue_enabled` = true - # - mirror 模式,且 `inputs.cbpf.tunning.dispatcher_queue_enabled` = true - # upgrade_from: static_config.analyzer-raw-packet-block-size - raw_packet_buffer_block_size: 65536 - # type: int - # name: - # en: Raw Packet Queue Size - # ch: 裸包队列大小 - # unit: - # range: [65536, 64000000] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # The length of the following queues (only for `inputs.cbpf.common.capture_mode` = `Physical Mirror`): - # - 0.1-bytes-to-parse - # - 0.2-packet-to-flowgenerator - # - 0.3-packet-to-pipeline - # ch: |- - # 以下队列的长度(仅在 `inputs.cbpf.common.capture_mode` = `物理网络镜像`时有效): - # - 0.1-bytes-to-parse - # - 0.2-packet-to-flowgenerator - # - 0.3-packet-to-pipeline - # upgrade_from: static_config.analyzer-queue-size - raw_packet_queue_size: 131072 - # type: int - # name: # en: Max Capture PPS # ch: 最大采集 PPS # unit: pps @@ -2552,103 +2304,6 @@ inputs: # 仅企业版支持解析 ERSPAN 和 TEB。 # upgrade_from: static_config.trim-tunnel-types tunnel_trim_protocols: [] - - # type: string - # name: - # en: Packet Segmentation Reassembly Ports - # ch: TCP分段重组端口 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # For the specified ports, consecutive TCP packets will be aggregated together for application log parsing. - # - # Example: - # - # packet_segmentation_reassembly: - # - 1000 - # - 2000-2010 - # - 5000 - # ch: |- - # 对指定端口的流,相邻的两个TCP分段 Packet 聚合在一起解析应用日志 - # - # 配置示例: - # - # packet_segmentation_reassembly: - # - 1000 - # - 2000-2010 - # - 5000 - # upgrade_from: static_config.packet-segmentation-reassembly - packet_segmentation_reassembly: [] - - # type: section - # name: - # en: Physical Mirror Traffic - # ch: 物理网络流量镜像 - # description: - physical_mirror: - # type: int - # name: - # en: Default Capture Network Type - # ch: 默认采集网络类型 - # unit: - # range: [] - # enum_options: - # - 3: - # en: Cloud Network - # ch: 云网络 - # - _DYNAMIC_OPTIONS_: _DYNAMIC_OPTIONS_ - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # deepflow-agent will mark the TAP (Traffic Access Point) location - # according to the outer vlan tag in the mirrored traffic of the physical - # switch. When the vlan tag has no corresponding TAP value, or the vlan - # pcp does not match the `inputs.cbpf.af_packet.vlan_pcp_in_physical_mirror_traffic`, it will assign the TAP value. - # This configuration item. Default value `3` means Cloud Network. - # ch: |- - # 在 `inputs.cbpf.common.capture_mode` 为`物理网络镜像`模式下,deepflow-agent 通过镜像流量的外层 VLAN 标签识别并标记采集数据的 - # TAP(Traffic Access Point)值。当流量外层 VLAN 标签没有对应的 TAP 值,或 VLAN pcp 值与 - # `inputs.cbpf.af_packet.vlan_pcp_in_physical_mirror_traffic` 的配置不一致时,deepflow-agent 使用本参数值 - # 标记数据的 TAP 值。 - # upgrade_from: static_config.default-tap-type - default_capture_network_type: 3 - # type: bool - # name: - # en: Packet Dedup Disabled - # ch: 禁用 Packet 去重 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Whether to enable mirror traffic deduplication when `inputs.cbpf.common.capture_mode` = `Physical Mirror`. - # ch: |- - # 当 `inputs.cbpf.common.capture_mode` 为`物理网络镜像`模式, 该参数配置为 `true` 时,deepflow-agent 将不对数据包做去重处理。 - # upgrade_from: static_config.analyzer-dedup-disabled - packet_dedup_disabled: false - # type: bool - # name: - # en: Gateway Traffic of Private Cloud - # ch: 专有云网关流量 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Whether it is the mirrored traffic of NFVGW (cloud gateway) when `inputs.cbpf.common.capture_mode` = `Physical Mirror`. - # ch: |- - # 当 `inputs.cbpf.common.capture_mode` 为 `物理网络镜像` 模式,该参数配置为 `true` 时,deepflow-agent 会将流量识别为 NFVGW 流量。 - # upgrade_from: static_config.cloud-gateway-traffic - private_cloud_gateway_traffic: false # type: section # name: eBPF # description: @@ -2782,203 +2437,6 @@ inputs: # 即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.socket.uprobe.tls`。 # upgrade_from: static_config.ebpf.uprobe-openssl-trace-enabled, static_config.ebpf.uprobe-process-name-regexs.openssl enabled: false - # type: section - # name: DPDK - # description: - dpdk: - # type: string - # name: - # en: DPDK Application Command Name - # ch: DPDK 应用命令名称 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Set the command name of the DPDK application, eBPF will automatically - # locate and trace packets for data collection. - # - # Example: In the command line `/usr/bin/mydpdk`, it can be set as `command: mydpdk`, and set `inputs.cbpf.special_network.dpdk.source = eBPF` - # - # In scenarios where DPDK acts as the vhost-user backend, data exchange between the virtual machine and the DPDK - # application occurs through virtqueues (vrings). eBPF can automatically hook into the vring interface without - # requiring any modifications to DPDK or the virtual machine, enabling packet capture and traffic observability - # with zero additional configuration. In contrast, capturing packets on physical NICs requires explicit configuration - # of the corresponding DPDK driver interfaces. - # ch: |- - # 设置 DPDK 应用的命令名称, eBPF 会自动寻找并进行追踪采集数据包 - # - # 配置样例: 如果命令行是 `/usr/bin/mydpdk`, 可以配置成 `command: mydpdk`, 并设置 `inputs.cbpf.special_network.dpdk.source = eBPF` - # - # 在 DPDK 作为 vhost-user 后端的场景中,虚拟机与 DPDK 应用之间通过 virtqueue(vring)进行数据交换。 - # eBPF 可以在无需修改 DPDK 或虚拟机的前提下,自动 hook 到 vring 接口,实现对传输数据包的捕获和分析, - # 无需额外配置即可实现流量可观测。相比之下,若要捕获物理网卡上的数据包,则需要配合 DPDK 的驱动接口进行显式配置。 - # upgrade_from: - command: "" - # type: string - # name: - # en: DPDK Application RX Hooks Configuration - # ch: DPDK 应用数据包接收 hook 点设置 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Fill in the appropriate packet reception hook point according to the actual network card driver. - # You can use the command 'lspci -vmmk' to find the network card driver type. For example: - # ``` - # Slot: 04:00.0 - # Class: Ethernet controller - # Vendor: Intel Corporation - # Device: Ethernet Controller XL710 for 40GbE QSFP+ - # SVendor: Unknown vendor 1e18 - # SDevice: Device 4712 - # Rev: 02 - # Driver: igb_uio - # Module: i40e - # ``` - # In the example above, "Driver: igb_uio" indicates a DPDK-managed device (other options include - # "vfio-pci" and "uio_pci_generic", which are also managed by DPDK). The actual driver is 'i40e' - # (derived from 'Module: i40e'). - # - # You can use the sustainable profiling feature provided by DeepFlow to perform function profiling - # on the DPDK application and check the specific interface names. Alternatively, you can run the - # `perf` command on the node where the agent is located: - # `perf record -F97 -a -g -p -- sleep 30` - # and then use - # `perf script | grep -E 'recv|xmit|rx|tx' | grep ` (`drive_name` may be `ixgbe/i40e/mlx5`) - # to confirm the driver interfaces. - # - # Below are some common interface names for different drivers, for reference only: - # 1. Physical NIC Drivers: - # - Intel Drivers: - # - ixgbe: Supports Intel 82598/82599/X520/X540/X550 series NICs. - # - rx: ixgbe_recv_pkts, ixgbe_recv_pkts_vec - # - tx: ixgbe_xmit_pkts, ixgbe_xmit_fixed_burst_vec, ixgbe_xmit_pkts_vec - # - i40e: Supports Intel X710, XL710 series NICs. - # - rx: i40e_recv_pkts - # - tx: i40e_xmit_pkts - # - ice: Supports Intel E810 series NICs. - # - rx: ice_recv_pkts - # - tx: ice_xmit_pkts - # - Mellanox Drivers: - # - mlx4: Supports Mellanox ConnectX-3 series NICs. - # - rx: mlx4_rx_burst - # - tx: mlx4_tx_burst - # - mlx5: Supports Mellanox ConnectX-4, ConnectX-5, ConnectX-6 series NICs. - # - rx: mlx5_rx_burst, mlx5_rx_burst_vec, mlx5_rx_burst_mprq - # - tx: Pending confirmation - # - Broadcom Drivers: - # - bnxt: Supports Broadcom NetXtreme series NICs. - # - rx: bnxt_recv_pkts, bnxt_recv_pkts_vec (x86, Vector mode receive) - # - tx: bnxt_xmit_pkts, bnxt_xmit_pkts_vec (x86, Vector mode transmit) - # 2. Virtual NIC Drivers: - # - Virtio Driver: - # - virtio: Supports Virtio-based virtual network interfaces. - # - rx: virtio_recv_pkts, virtio_recv_mergeable_pkts_packed, virtio_recv_pkts_packed, - # virtio_recv_pkts_vec, virtio_recv_pkts_inorder, virtio_recv_mergeable_pkts - # - tx: virtio_xmit_pkts_packed, virtio_xmit_pkts - # - VMXNET3 Driver: - # - vmxnet3: Supports VMware's VMXNET3 virtual NICs. - # - rx: vmxnet3_recv_pkts - # - tx: vmxnet3_xmit_pkts - # - # Example: `rx_hooks: [ixgbe_recv_pkts, i40e_recv_pkts, virtio_recv_pkts, virtio_recv_mergeable_pkts]` - # - # Note: When using the burst mode of the current DPDK driver interface to send and receive packets, - # the number of eBPF instructions is limited to 4096 in older Linux kernels (below Linux 5.2). As a - # result, during DPDK packet capture, only a maximum of 16 packets can be captured. For Linux kernels - # 5.2 and above, up to 32 packets can be captured (this is typically the default value for DPDK - # burst mode). For kernels older than Linux 5.2, packet loss may occur (if the burst size exceeds 16). - # - # ch: |- - # 根据实际的网卡驱动填写合适的数据包接收 hook 点,可以利用命令 'lspci -vmmk' 寻找网卡驱动类型例如: - # ``` - # Slot: 04:00.0 - # Class: Ethernet controller - # Vendor: Intel Corporation - # Device: Ethernet Controller XL710 for 40GbE QSFP+ - # SVendor: Unknown vendor 1e18 - # SDevice: Device 4712 - # Rev: 02 - # Driver: igb_uio - # Module: i40e - # ``` - # 上面的 "Driver: igb_uio" 说明是 DPDP 纳管的设备 (除此之外还有 "vfio-pci", "uio_pci_generic" - # 也被 DPDK 纳管), 真实驱动是 'i40e' (从 'Module: i40e' 得到) - # - # 可以使用 deepflow 提供的可持续剖析功能对 DPDK 应用做函数剖析查看具体接口名字,也可以使用 perf 命令 - # 在agent所在节点上运行 `perf record -F97 -a -g -p -- sleep 30`, - # `perf script | grep -E 'recv|xmit|rx|tx' | grep ` (`drive_name` may be `ixgbe/i40e/mlx5`) - # 来确认驱动接口。 - # - # 下面列出了不同驱动对应的接口名称,仅供参考: - # 1. Physical NIC Drivers: - # - Intel Drivers: - # - ixgbe: Supports Intel 82598/82599/X520/X540/X550 series NICs. - # - rx: ixgbe_recv_pkts, ixgbe_recv_pkts_vec - # - tx: ixgbe_xmit_pkts, ixgbe_xmit_fixed_burst_vec, ixgbe_xmit_pkts_vec - # - i40e: Supports Intel X710, XL710 series NICs. - # - rx: i40e_recv_pkts - # - tx: i40e_xmit_pkts - # - ice: Supports Intel E810 series NICs. - # - rx: ice_recv_pkts - # - tx: ice_xmit_pkts - # - Mellanox Drivers: - # - mlx4: Supports Mellanox ConnectX-3 series NICs. - # - rx: mlx4_rx_burst - # - tx: mlx4_tx_burst - # - mlx5: Supports Mellanox ConnectX-4, ConnectX-5, ConnectX-6 series NICs. - # - rx: mlx5_rx_burst, mlx5_rx_burst_vec, mlx5_rx_burst_mprq - # - tx: Pending confirmation - # - Broadcom Drivers: - # - bnxt: Supports Broadcom NetXtreme series NICs. - # - rx: bnxt_recv_pkts, bnxt_recv_pkts_vec (x86, Vector mode receive) - # - tx: bnxt_xmit_pkts, bnxt_xmit_pkts_vec (x86, Vector mode transmit) - # 2. Virtual NIC Drivers: - # - Virtio Driver: - # - virtio: Supports Virtio-based virtual network interfaces. - # - rx: virtio_recv_pkts, virtio_recv_mergeable_pkts_packed, virtio_recv_pkts_packed, - # virtio_recv_pkts_vec, virtio_recv_pkts_inorder, virtio_recv_mergeable_pkts - # - tx: virtio_xmit_pkts_packed, virtio_xmit_pkts, - # - VMXNET3 Driver: - # - vmxnet3: Supports VMware's VMXNET3 virtual NICs. - # - rx: vmxnet3_recv_pkts - # - tx: vmxnet3_xmit_pkts - # - # 配置样例: `rx_hooks: [ixgbe_recv_pkts, i40e_recv_pkts, virtio_recv_pkts, virtio_recv_mergeable_pkts]` - # - # 注意:在当前 DPDK 驱动接口的突发模式下发送和接收数据包时,旧版 Linux 内核(低于 5.2)的 eBPF 指令数量限制为 4096。 - # 因此,在 DPDK 捕获数据包期间,最多只能捕获 16 个数据包。对于 Linux 5.2 及以上版本的内核,最多可捕获 32 个数 - # 据包(这通常是 DPDK 突发模式的默认值)。对于低于 Linux 5.2 的内核,如果突发大小超过 16,可能会发生数据包丢失。 - # upgrade_from: - rx_hooks: [] - # type: string - # name: - # en: DPDK Application TX Hooks Configuration - # ch: DPDK 应用数据包发送 hook 点设置 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Specify the appropriate packet transmission hook point according to the actual network card driver. - # To obtain the driver method and configure the transmission hook point, as well as precautions,refer - # to the description of `inputs.ebpf.socket.uprobe.dpdk.rx_hooks`. - # - # Example: `tx_hooks: [i40e_xmit_pkts, virtio_xmit_pkts_packed, virtio_xmit_pkts]` - # ch: |- - # 根据实际的网卡驱动填写合适的数据包发送 hook 点, 获取驱动方法和发送hook点设置以及注意事项参考 `inputs.ebpf.socket.uprobe.dpdk.rx_hooks` 的说明. - # - # 配置样例: `tx_hooks: [i40e_xmit_pkts, virtio_xmit_pkts_packed, virtio_xmit_pkts]` - # upgrade_from: - tx_hooks: [] # type: section # name: Kprobe # description: @@ -3263,115 +2721,6 @@ inputs: # - 内核建议:若要启用 fentry/fexit 特性,推荐使用 Linux kernel 5.10.28 及以上版本,以确保稳定性和性能。 # upgrade_from: fentry_enabled: false - # type: section - # name: - # en: Preprocess - # ch: 预处理 - # description: - preprocess: - # type: int - # name: - # en: OOOR Cache Size - # ch: 乱序重排(OOOR)缓冲区大小 - # unit: - # range: [8, 1024] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # OOOR: Out Of Order Reassembly - # - # When `out_of_order_reassembly_protocols` is enabled, up to `out_of_order_reassembly_cache_size` - # eBPF socket events (each event consuming up to `processors.request_log.tunning.payload_truncation` bytes) will be cached - # in each TCP/UDP flow to prevent out-of-order events from impacting application protocol - # parsing. Since eBPF socket events are sent to user space in batches, out-of-order scenarios - # mainly occur when requests and responses within a single session are processed by different - # CPUs, causing the response to reach user space before the request. - # ch: |- - # 由于 eBPF socket 事件是以批处理的方式向用户态空间发送数据,同一个应用调用的请求、响应由不同 CPU 处理时,可能 - # 会出现请求、响应乱序的情况,开启 Syscall 数据乱序重排特性后,每个 TCP/UDP 流会缓存一定数量的 eBPF socket - # 事件,以修正乱序数据对应用调用解析的影响。该参数设置了每个 TCP/UDP 流可以缓存的 eBPF socket 事件数量上限(每 - # 条事件数据占用的字节数上限受 `processors.request_log.tunning.payload_truncation` 控制)。在 Syscall 数据乱序较严重 - # 导致应用调用采集不全的环境中,可适当调大该参数。 - # upgrade_from: static_config.ebpf.syscall-out-of-order-cache-size - out_of_order_reassembly_cache_size: 256 - # type: string - # name: - # en: OOOR Protocols - # ch: 乱序重排(OOOR)协议列表 - # unit: - # range: [] - # enum_options: [_DYNAMIC_OPTIONS_] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # OOOR: Out Of Order Reassembly - # - # When this capability is enabled for a specific application protocol, the agent will add - # out-of-order-reassembly processing for it. Note that the agent will consume more memory - # in this case, so please adjust the syscall-out-of-order-cache-size accordingly and monitor - # the agent's memory usage. - # - # Supported protocols: [https://www.deepflow.io/docs/features/l7-protocols/overview/](https://www.deepflow.io/docs/features/l7-protocols/overview/) - # - # Attention: configuring `HTTP2` or `gRPC` will enable both protocols. - # ch: |- - # 配置后 deepflow-agent 将对指定应用协议的处理增加乱序重排过程。注意:(1)开启特性将消耗更多的内存,因此 - # 需关注 agent 内存用量;(2)配置`HTTP2`或`gRPC`会全部开启这两个协议 - # upgrade_from: static_config.ebpf.syscall-out-of-order-reassembly - out_of_order_reassembly_protocols: [] - # type: duration - # name: - # en: OOOR Timeout - # ch: 乱序重排(OOOR)超时时间 - # unit: - # range: [100ms, 1s] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # When the OOOR cache data times out, it will be output directly. This parameter can be adjusted according to metric - # `deepflow_agent_ebpf_collect.metrics.time_backtrack_max`. - # - # Note: Increasing this value will consume more memory - # ch: |- - # OOOR 缓存的数据时间超时会直接输出, 可以根据采集器指标 `deepflow_agent_ebpf_collect.metrics.metrics.time_backtrack_max` 调整该参数。 - # - # 注意:增大该值会消耗更多的内存 - out_of_order_reassembly_timeout: 100ms - # type: string - # name: - # en: SR Protocols - # ch: 分段重组(SR)协议列表 - # unit: - # range: [] - # enum_options: [_DYNAMIC_OPTIONS_] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # SR: Segmentation Reassembly - # - # When this capability is enabled for a specific application protocol, the agent will add - # segmentation-reassembly processing to merge application protocol content spread across - # multiple syscalls before parsing it. This enhances the success rate of application - # protocol parsing. Note that `out_of_order_reassembly_protocols` must also be enabled for - # this feature to be effective. - # Supported protocols: [https://www.deepflow.io/docs/features/l7-protocols/overview/](https://www.deepflow.io/docs/features/l7-protocols/overview/) - # Attention: configuring `HTTP2` or `gRPC` will enable both protocols. - # ch: |- - # 配置后 deepflow-agent 将对指定应用协议的处理增加分片重组过程,将多个 Syscall 的内容分片重组后再进行 - # 协议解析,以增强应用协议的采集成功率。 - # - # 注意: - # 1. 该特性的生效的前提条件是`out_of_order_reassembly_protocols`开启并生效; - # - 支持协议:[https://www.deepflow.io/docs/zh/features/l7-protocols/overview/](https://www.deepflow.io/docs/zh/features/l7-protocols/overview/) - # 2. 配置`HTTP2`或`gRPC`会全部开启这两个协议 - # upgrade_from: static_config.ebpf.syscall-segmentation-reassembly - segmentation_reassembly_protocols: [] # type: section # name: # en: File @@ -3630,208 +2979,11 @@ inputs: # upgrade_from: static_config.ebpf.on-cpu-profile.cpu aggregate_by_cpu: false # type: section - # name: Off-CPU + # name: + # en: Preprocess + # ch: 预处理 # description: - off_cpu: - # type: bool - # name: Disabled - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # eBPF Off-CPU profile switch. - # - # Note: When enabling this feature, the specific process list must also be specified in `inputs.proc.process_matcher`, - # i.e., `ebpf.profile.off_cpu` must be included in `inputs.proc.process_matcher.[*].enabled_features`. - # ch: |- - # eBPF Off-CPU profile 数据的采集开关。 - # - # 注意:开启此功能时,需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, - # 即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.profile.off_cpu`。 - # upgrade_from: static_config.ebpf.off-cpu-profile.disabled - disabled: true - # type: bool - # name: - # en: Aggregate by CPU - # ch: 按 CPU 聚合 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Whether to obtain the value of CPUID and decide whether to participate in aggregation. - # - `true`: Obtain the value of CPUID and will be included in the aggregation of stack - # trace data. - # - `false`: It will not be included in the aggregation. Any other value is considered - # invalid, the CPU value for stack trace data reporting is a special value - # `CPU_INVALID: 0xfff` used to indicate that it is an invalid value. - # ch: |- - # 采集 Off-CPU 数据时,是否获取 CPUID 的开关。 - # - `true`: 表示在采集 Off-CPU 数据时获取 CPUID (Off-CPU 剖析时,支持对单个 CPU 的分析)。 - # - `false`: 表示在采集 Off-CPU 数据时不获取 CPUID (Off-CPU 剖析时,不支持单个 CPU 的分析)。 - # upgrade_from: static_config.ebpf.off-cpu-profile.cpu - aggregate_by_cpu: false - # type: duration - # name: - # en: Minimum Blocking Time - # ch: 最小阻塞时间 - # unit: - # range: [0ns, 1h] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # If set to '0ns', there will be no minimum value limitation. Scheduler events are still - # high-frequency events, as their rate may exceed 1 million events per second, so - # caution should still be exercised. - # - # If overhead remains an issue, you can configure the 'minblock' tunable parameter here. - # If the off-CPU time is less than the value configured in this item, the data will be - # discarded. If your goal is to trace longer blocking events, increasing this parameter - # can filter out shorter blocking events, further reducing overhead. Additionally, we - # will not collect events with a blocking time exceeding 1 hour. - # ch: |- - # 低于'最小阻塞时间'的 Off-CPU 数据将被 deepflow-agent 忽略,'最小阻塞时间'设置为 '0ns' 表示 - # 采集所有的 Off-CPU 数据。由于 CPU 调度事件数量庞大(每秒可能超过一百万次),调小该参数将带来 - # 明显的资源开销,如果需要跟踪大时延的调度阻塞事件,建议调大该参数,以降低资源开销。另外,deepflow-agent - # 不采集阻塞超过 1 小时的事件。 - # upgrade_from: static_config.ebpf.off-cpu-profile.minblock - min_blocking_time: 50us - # type: section - # name: Memory - # description: - memory: - # type: bool - # name: Disabled - # unit: - # range: [] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # eBPF memory profile switch. - # - # Note: When enabling this feature, the specific process list must also be specified in `inputs.proc.process_matcher`, - # i.e., `ebpf.profile.memory` must be included in `inputs.proc.process_matcher.[*].enabled_features`. - # ch: |- - # eBPF memory profile 数据的采集开关。 - # - # 注意:开启此功能时,需要同时在 `inputs.proc.process_matcher` 中进一步指定具体的进程列表, - # 即 `inputs.proc.process_matcher.[*].enabled_features` 中需要包含 `ebpf.profile.memory`。 - # upgrade_from: static_config.ebpf.memory-profile.disabled - disabled: true - # type: duration - # name: - # en: Memory profile report interval - # ch: 内存剖析上报间隔 - # unit: - # range: [1s, 60s] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # The interval at which deepflow-agent aggregates and reports memory profile data. - # ch: |- - # deepflow-agent 聚合和上报内存剖析数据的间隔。 - # upgrade_from: static_config.ebpf.memory-profile.report-interval - report_interval: 10s - # type: int - # name: - # en: LRU length for process allocated addresses - # ch: 进程分配地址 LRU 长度 - # unit: - # range: [1024, 4194704] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # Agent uses LRU cache to record process allocated addresses to avoid uncontrolled - # memory usage. Each record in this LRU is about 80B. - # ch: |- - # 采集器使用 LRU 缓存记录进程分配的地址,以避免内存使用失控。每个 LRU 条目大约占 32B 内存。 - # upgrade_from: - allocated_addresses_lru_len: 131072 - # type: int - # name: - # en: Sort length - # ch: 排序长度 - # unit: - # range: [0, 65536] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # In order to match mallocs and frees, memory profiler will sort data by timestamp before processing. - # This parameter is the length of the sorted array. - # When configuring this option, first adjust the `sort_interval` parameter according to the instructions, - # and then refer to the agent performance statistics in `deepflow_agent_ebpf_memory_profiler` - # `dequeued_by_length` and `dequeued_by_interval` metrics, appropriately reduce this parameter - # while ensuring that the former is several times smaller than the latter. - # ch: |- - # 为了匹配 mallocs 和 frees,内存剖析会在处理前对数据按时间戳排序。 - # 该参数是排序数组的长度。 - # 配置该选项时先按说明调整 `sort_interval` 参数,在参考采集器性能统计 `deepflow_agent_ebpf_memory_profiler` 中 - # `dequeued_by_length` 和 `dequeued_by_interval` 指标,在保证前者小于后者几倍的前提下适当调小该参数。 - # upgrade_from: - sort_length: 16384 - # type: duration - # name: - # en: Sort interval - # ch: 排序间隔 - # unit: - # range: [1ns, 10s] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # In order to match mallocs and frees, memory profiler will sort data by timestamp before processing. - # This parameter controls the max span of interval between the first and last item in the sorted array. - # Refer to agent performance statistics in `deepflow_agent_ebpf_memory_profiler`, - # making `time_backtracked` to 0. Configurion `sort_length` may also need to be increased. - # ch: |- - # 为了匹配 mallocs 和 frees,内存剖析会在处理前对数据按时间戳排序。 - # 该参数控制排序数组中第一个和最后一个元素之间的时间间隔的最大值。 - # 配置该选项可以参考采集器性能统计 `deepflow_agent_ebpf_memory_profiler` 中 - # `time_backtracked` 指标,增大该参数使之为 0 即可。注意可能需要相应增大 `sort_length` 参数。 - # upgrade_from: - sort_interval: 1500ms - # type: int - # name: - # en: Queue Size - # ch: 队列大小 - # unit: - # range: [4096, 64000000] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Memory profiler inner queue size. - # Refer to agent performance statistics in `deepflow_agent_ebpf_memory_profiler`, - # making `overwritten` to 0 and `pending` not exceeding this configuration. - # ch: |- - # 内存剖析组件内部的队列大小。 - # 配置该选项可以参考采集器性能统计 `deepflow_agent_ebpf_memory_profiler` 中 - # `overwritten` 和 `pending` 指标,增大该配置使得前者为 0,后者不高于该配置即可。 - # upgrade_from: - queue_size: 32768 - # type: section - # name: - # en: Preprocess - # ch: 预处理 - # description: - preprocess: + preprocess: # type: bool # name: # en: Stack Compression @@ -3919,342 +3071,6 @@ inputs: nodejs_disabled: false # type: section # name: - # en: Network - # ch: 网络 - # description: - network: - # type: bool - # name: NIC optimization Enabled - # unit: - # range: [] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # Whether to enable NIC optimization for enhanced multi-core packet - # processing and burst traffic resilience. - # - # When enabled, the system applies a combination of: - # - RSS hardware queue configuration - # - RX ring descriptor size tuning - # - IRQ (interrupt) CPU affinity binding - # - Optional XDP CPUMAP-based CPU redirection - # - # This optimization mitigates scenarios where RSS hardware cannot hash - # inner headers of encapsulated traffic (e.g., GRE, Double VLAN, - # VXLAN, ERSPAN), which may otherwise cause traffic to be concentrated - # on a single CPU core and lead to packet drops or performance bottlenecks. - # - # RX ring tuning improves burst handling capability by increasing - # the number of descriptors available for packet reception, reducing - # the likelihood of ring overflow under high traffic conditions. - # - # When XDP CPU redirect is enabled, packets are redistributed in - # software across multiple CPU cores after initial reception, - # providing better load balancing beyond hardware RSS capabilities. - # - # Recommended to enable this feature when: - # 1) Traffic on the interface consists primarily of encapsulated - # packets (e.g., verified via tcpdump showing GRE, Double VLAN, - # VXLAN, etc.). - # 2) One CPU core shows near 100% softirq utilization (e.g., - # observed via `top` with per-CPU view), while other CPUs - # remain underutilized. - # - # For optimal performance, IRQ CPUs and XDP redirect CPUs should be - # configured on the same NUMA node as the physical NIC. - # - # ch: |- - # 是否启用网卡优化功能,用于提升多核环境下的网络包处理能力 - # 以及突发流量承载能力。 - # - # 开启后将综合进行以下优化: - # - RSS 硬件队列数量配置 - # - RX ring 描述符数量调优 - # - 硬件中断与 CPU 亲和性绑定(IRQ 绑核) - # - 可选的 XDP CPUMAP 软件重定向分发 - # - # 该优化主要解决 RSS 硬件无法基于封装报文内层头部 - # (如 GRE、Double VLAN、VXLAN、ERSPAN)进行哈希分摊的问题, - # 避免流量集中在单个 CPU 上造成过载和丢包。 - # - # 通过调整 RX ring 描述符数量,可提升突发流量场景下的 - # 接收缓存能力,降低 ring 满导致的丢包风险。 - # - # 在启用 XDP CPU Redirect 时,数据包会在接收后通过 - # CPUMAP 在多个 CPU 核心之间进行软件层重分发, - # 从而突破硬件 RSS 的能力限制,实现更均衡的负载分布。 - # - # 建议在以下场景开启该功能: - # 1)使用 tcpdump 抓包发现该接口流量主要为 GRE、 - # Double VLAN、VXLAN 等封装报文; - # 2)通过 `top`(按 1)观察各 CPU 软中断占用率时, - # 发现某一个 CPU 的 softirq 接近 100%, - # 而其他 CPU 软中断占用率明显偏低。 - # - # 为获得最佳性能,建议将中断 CPU 和 XDP 重定向 CPU - # 配置在与物理网卡相同的 NUMA 节点上。 - nic_opt_enabled: false - # type: dict - # name: - # en: NIC Optimize - # ch: 网卡优化配置 - # unit: - # range: [] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # Configure NIC-level performance optimizations for specific interfaces. - # - # This feature improves packet processing scalability and burst handling - # by tuning hardware RSS queues, interrupt CPU affinity, RX ring size, - # and optional XDP CPUMAP-based CPU redirection. - # - # Recommended when: - # - Traffic is primarily encapsulated (GRE, Double VLAN, VXLAN, ERSPAN). - # - One CPU shows near 100% softirq usage while others are idle. - # - # To achieve better performance, the program will automatically disable the - # irqbalance service to prevent network interface interrupts from migrating - # between CPUs. - # - # Multiple NIC optimize entries can be configured for different interfaces. - # - # Example: - # ```yaml - # inputs: - # ebpf: - # network: - # nic_opt_enabled: true - # nic_optimize: - # - interface: eth0 - # rx_ring_size: 4096 - # rss_channel_count: 2 - # irq_cpu_list: 1,2 - # xdp_cpu_redirect: true - # xdp_queue_size: 2048 - # xdp_cpu_redirect_list: 4,5,6,7 - # - interface: eth1 - # rx_ring_size: 4096 - # rss_channel_count: 2 - # irq_cpu_list: 1,2 - # xdp_cpu_redirect: true - # xdp_queue_size: 2048 - # xdp_cpu_redirect_list: 4,5,6,7 - # ``` - # - # ch: |- - # 针对指定网卡接口进行性能优化配置。 - # - # 该功能通过调优 RSS 队列、中断绑核、RX ring 大小、 - # 以及可选的 XDP CPUMAP 重定向机制,提升多核扩展能力 - # 和突发流量承载能力。 - # - # 建议在以下场景开启: - # - 接口流量主要为 GRE、Double VLAN、VXLAN、ERSPAN 等封装报文; - # - 某个 CPU 的 softirq 占用率接近 100%,而其他 CPU 空闲。 - # - # 为了获得更好的性能,程序会自动停用 irqbalance 服务,以防止网卡中断在 CPU 之间迁移。 - # - # 可为多个接口分别配置优化策略。 - # - # 样例: - # ```yaml - # inputs: - # ebpf: - # network: - # nic_opt_enabled: true - # nic_optimize: - # - interface: eth0 - # rx_ring_size: 4096 - # rss_channel_count: 2 - # irq_cpu_list: 1,2 - # xdp_cpu_redirect: true - # xdp_queue_size: 2048 - # xdp_cpu_redirect_list: 4,5,6,7 - # - interface: eth1 - # rx_ring_size: 4096 - # rss_channel_count: 2 - # irq_cpu_list: 1,2 - # xdp_cpu_redirect: true - # xdp_queue_size: 2048 - # xdp_cpu_redirect_list: 4,5,6,7 - # ``` - # upgrade_from: - # --- - # type: string - # name: - # en: Interface - # ch: 网卡接口 - # description: - # en: |- - # Name of the network interface to optimize. - # - # ch: |- - # 需要进行优化的网卡接口名称。 - # --- - # interface: "" - # --- - # type: int - # name: - # en: RX Ring Size - # ch: RX Ring 描述符数量 - # description: - # en: |- - # Number of RX descriptors in NIC receive ring. - # - # Increasing this value improves burst traffic buffering - # and reduces packet drops caused by ring overflow. - # Specifically, use `ethtool -g ` to check the current - # configuration, and adjust to an appropriate value based on your workload. - # - # 0 (default) means keep the original state and ignore this setting. - # - # ch: |- - # 网卡接收环(RX ring)的描述符数量。 - # - # 增大该值可提升突发流量场景下的缓存能力, - # 降低因 ring 满导致的丢包风险。 - # 具体使用`ethtool -g ` 查看当前配置,根据实际情况调整到合适的值。 - # - # 默认值为 0 表示保持原状忽略此项配置。 - # --- - # rx_ring_size: 0 - # --- - # type: int - # name: - # en: RSS Channel Count - # ch: RSS 队列数量 - # description: - # en: |- - # Number of hardware RSS queues. - # Determines how many queues packets are distributed to after - # hardware hash calculation. - # - # Maximum supported value is typically 16 and must not exceed - # the number of logical CPU cores. - # Specifically, use `ethtool -l ` to check the current configuration - # and adjust to an appropriate value based on your workload. - # - # When XDP CPU redirect is enabled, it is recommended to set this to 1. - # 0 (default) means keep the original state and ignore this setting. - # - # ch: |- - # RSS 硬件队列数量。 - # 数据包在物理网卡完成哈希后,将分发到指定数量的队列并触发中断。 - # - # 最大一般支持 16,且不要超过逻辑 CPU 核数。 - # 具体使用`ethtool -l ` 查看当前配置,根据实际情况调整到合适的值。 - # - # 当启用 XDP CPU Redirect 时建议设置为 1。 - # 默认值为 0 表示保持原状忽略此项配置。 - # --- - # rss_channel_count: 0 - # --- - # type: string - # name: - # en: Hardware IRQ CPU List - # ch: 硬件中断 CPU 列表 - # description: - # en: |- - # CPU ID or comma-separated CPU list used for handling NIC interrupts. - # - # Recommended to match the number of RSS queues. - # If XDP CPU redirect is enabled, only one CPU is required. - # - # Value can be: - # - Specific CPU list (e.g., 2,4,6) - # - "local" (auto match CPUs in local NUMA node) - # - # CPUs should be located on the same NUMA node as the NIC. - # - # ch: |- - # 用于处理网卡硬件中断的 CPU ID 或 CPU 列表。 - # - # 数量建议与 RSS 队列数量一致。 - # 若启用 XDP CPU Redirect,仅需指定一个 CPU。 - # - # 可设置为: - # - 指定 CPU 列表(如 2,4,6) - # - local(自动匹配本地 NUMA 节点 CPU) - # - # 建议所选 CPU 与物理网卡位于同一 NUMA 节点。 - # --- - # irq_cpu_list: "" - # --- - # type: bool - # name: - # en: Enable XDP CPU Redirect - # ch: 启用 XDP CPU 重定向 - # description: - # en: |- - # Enable XDP CPUMAP redirect to redistribute packets across CPUs - # in software. - # - # Useful when hardware RSS cannot distribute encapsulated traffic - # (e.g., Double VLAN, ERSPAN) evenly across CPUs, resulting in - # single-core overload and packet drops. - # - # ch: |- - # 是否启用 XDP CPUMAP 重定向分发。 - # - # 用于解决硬件 RSS 无法对封装报文 - # (如 Double VLAN、ERSPAN 等)进行均匀分摊, - # 导致单核过载和丢包的问题。 - # --- - # xdp_cpu_redirect: false - # --- - # type: int - # name: - # en: XDP Queue Size - # ch: XDP 队列大小 - # description: - # en: |- - # Size of the XDP CPUMAP queue. - # - # Valid range: [512, 8192]. Powers of two are recommended. - # - # Larger values improve burst tolerance but consume more memory. - # - # ch: |- - # XDP CPUMAP 队列大小。 - # - # 取值范围:[512, 8192],建议配置为 2 的幂。 - # - # 增大可提升突发流量承载能力,但会占用更多内存。 - # --- - # xdp_queue_size: 2048 - # --- - # type: string - # name: - # en: XDP Redirect CPU List - # ch: XDP 重定向 CPU 列表 - # description: - # en: |- - # CPU list used for processing packets after XDP redirection. - # - # Format example: 4,6,8 - # - # ch: |- - # XDP 重定向后用于处理数据包的 CPU 列表。 - # - # 填写样式如:4,6,8 - # --- - # xdp_cpu_redirect_list: "" - # - nic_optimize: - - interface: "" - rx_ring_size: 0 - rss_channel_count: 0 - irq_cpu_list: "" - xdp_cpu_redirect: false - xdp_queue_size: 2048 - xdp_cpu_redirect_list: "" - # type: section - # name: # en: Tunning # ch: 调优 # description: @@ -5180,322 +3996,101 @@ inputs: # en: Log Integration Disabled # ch: 禁用 Log 集成 # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: false - # description: - # upgrade_from: static_config.external-log-integration-disabled - log_integration_disabled: false - # type: section - # name: - # en: Vector - # ch: vector - # description: - vector: - # type: bool - # name: - # en: Vector Component Enabled - # ch: 启用 Vector 组件 - # unit: - # range: [] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # The switcher control for Vector component running. - # ch: |- - # 对 Vector 组件的开关控制。 - enabled: false - # type: dict - # name: - # en: Vector Component Config - # ch: Vector 组件配置控制 - # unit: - # range: [] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # The detail config for Vector Component, all availble config keys could be found in [vector.dev](https://vector.dev/docs/reference/configuration) - # Here's an example for how to capture kubernetes logs、host metrics in virtual machine and kubelet metrics in kubernetes. It'll send to DeepFlow-Agent as output. - # - # scrape host metrics: - # `K8S_NODE_NAME_FOR_DEEPFLOW` only required in k8s container environment - # {{ file: vector_host_metrics.yaml }} - # - # scrape kubernetes metrics - # {{ file: vector_k8s_metrics.yaml }} - # - # scrape kubernentes logs (capture DeepFlow Pod logs as example, if other Pod logs is required, update `extra_label_selector` add custom filters) - # {{ file: vector_k8s_logs.yaml }} - # - # use http_client or socket to dial a remote server for testing - # {{ file: vector_dial.yaml }} - # - # ch: |- - # Vector 组件的具体配置,所有可用配置可在此链接中查找:[vector.dev](https://vector.dev/docs/reference/configuration) - # 以下提供一份抓取 kubernetes 日志、宿主机指标及 kubernetes kubelet 指标的示例,并将这些数据发送到 DeepFlow-Agent。 - # - # 抓取主机指标 - # `K8S_NODE_NAME_FOR_DEEPFLOW` 变量仅容器环境必须,非容器环境可以去掉 - # {{ file: vector_host_metrics.yaml }} - # - # 抓取 kubernetes 指标 - # {{ file: vector_k8s_metrics.yaml }} - # - # 抓取 kubernetes 日志(以采集 DeepFlow Pod 日志为例,若需要采集其他 Pod 日志可修改 `extra_label_selector` 并加上具体条件) - # {{ file: vector_k8s_logs.yaml }} - # - # 使用 http_client 或者 socket 拨测一个远端服务 - # {{ file: vector_dial.yaml }} - config: - -# type: section -# name: -# en: Processors -# ch: 处理器 -# description: -processors: - # type: section - # name: Packet - # description: - packet: - # type: section - # name: Policy - # description: - policy: - # type: int - # name: - # en: Fast-path Map Size - # ch: Fast-path 字典大小 - # unit: - # range: [0, 10000000] - # enum_options: [] - # modification: agent_restart - # ee_feature: false - # description: - # en: |- - # When set to 0, deepflow-agent will automatically adjust the map size - # according to `global.limits.max_memory`. - # Note: In practice, it should not be set to less than 8000. - # ch: |- - # 设置为`0`时,deepflow-agent 根据 `global.limits.max_memory` 参数自动调整 Fast-path 字典大小。 - # 注意:实践中不应配置小于 8000 的值。 - # upgrade_from: static_config.fast-path-map-size - fast_path_map_size: 0 - # type: bool - # name: - # en: Fast-path Disabled - # ch: 禁用 Fast-path - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: false - # description: - # en: |- - # When set to `true`, deepflow-agent will not use fast path. - # ch: |- - # 设置为 `true` 时,deepflow-agent 不启用 fast path。 - # upgrade_from: static_config.fast-path-disabled - fast_path_disabled: false - # type: int - # name: - # en: Forward Table Capacity - # ch: Forward 表容量 - # unit: - # range: [16384, 64000000] - # enum_options: [] - # modification: agent_restart - # ee_feature: false - # description: - # en: |- - # The size of the forwarding table, which is used to store MAC-IP information, - # When this value is larger, the more memory usage may be. - # ch: |- - # 转发表大小,用来存储 MAC-IP 信息,调大该参数,deepflow-agent 将消耗更多的内存。 - # upgrade_from: static_config.forward-capacity - forward_table_capacity: 16384 - # type: int - # name: - # en: Max First-path Level - # ch: 最大 First-path 层级 - # unit: - # range: [1, 16] - # enum_options: [] - # modification: agent_restart - # ee_feature: false - # description: - # en: |- - # DDBS algorithm level. - # - # When this value is larger, the memory overhead is smaller, but the - # performance of policy matching is worse. - # ch: |- - # DDBS 算法等级。 - # - # 该配置越大内存开销越小,但是性能会降低。 - # upgrade_from: static_config.first-path-level - max_first_path_level: 8 - # type: section - # name: - # en: TCP Header - # ch: TCP 包头(时序图) - # description: - tcp_header: - # type: int - # name: - # en: Block Size - # ch: Block 大小 - # unit: - # range: [16, 8192] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # When generating TCP header data, each flow uses one block to compress and - # store multiple TCP headers, and the block size can be set here. - # ch: |- - # 压缩和保存多个 TCP 包头的缓冲区大小。 - # upgrade_from: static_config.packet-sequence-block-size - block_size: 256 - # type: int - # name: - # en: Sender Queue Size - # ch: Sender 队列大小 - # unit: - # range: [65536, 64000000] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # The length of the following queues (to UniformCollectSender): - # - 1-packet-sequence-block-to-uniform-collect-sender - # ch: |- - # TCP 包时序数据的单个发送队列的大小。 - # upgrade_from: static_config.packet-sequence-queue-size - sender_queue_size: 65536 - # type: int - # name: - # en: Header Fields Flag - # ch: 包头字段 Flag - # unit: - # range: [0, 255] + # range: [] # enum_options: [] # modification: agent_restart - # ee_feature: true + # ee_feature: false # description: - # en: |- - # packet-sequence-flag determines which fields need to be reported, the default - # value is `0`, which means the feature is disabled, and `255`, which means all fields - # need to be reported all fields corresponding to each bit: - # ``` - # | FLAG | SEQ | ACK | PAYLOAD_SIZE | WINDOW_SIZE | OPT_MSS | OPT_WS | OPT_SACK | - # 7 6 5 4 3 2 1 0 - # ``` - # ch: |- - # 使用一个 8 bit 的 flag 对 deepflow-agent 采集上报的 TCP 报文时序数据内容进行控制,不同 - # 的 bit 位代表不同 TCP 字段的采集开关: - # ``` - # | FLAG | SEQ | ACK | PAYLOAD_SIZE | WINDOW_SIZE | OPT_MSS | OPT_WS | OPT_SACK | - # 7 6 5 4 3 2 1 0 - # ``` - # flag 设置为`0`表示全部关闭,设置为`255`表示全部 - # upgrade_from: static_config.packet-sequence-flag - header_fields_flag: 0b0000_0000 + # upgrade_from: static_config.external-log-integration-disabled + log_integration_disabled: false + +# type: section +# name: +# en: Processors +# ch: 处理器 +# description: +processors: + # type: section + # name: Packet + # description: + packet: # type: section - # name: - # en: PCAP Stream - # ch: PCAP 字节流 + # name: Policy # description: - pcap_stream: + policy: # type: int # name: - # en: Receiver Queue Size - # ch: Receiver 队列大小 + # en: Fast-path Map Size + # ch: Fast-path 字典大小 # unit: - # range: [65536, 64000000] + # range: [0, 10000000] # enum_options: [] # modification: agent_restart - # ee_feature: true + # ee_feature: false # description: # en: |- - # The length of the following queues: - # - 1-mini-meta-packet-to-pcap + # When set to 0, deepflow-agent will automatically adjust the map size + # according to `global.limits.max_memory`. + # Note: In practice, it should not be set to less than 8000. # ch: |- - # 设置 deepflow-agent 的 1-mini-meta-packet-to-pcap 队列大小。 - # upgrade_from: static_config.pcap.queue-size - receiver_queue_size: 65536 - # type: int + # 设置为`0`时,deepflow-agent 根据 `global.limits.max_memory` 参数自动调整 Fast-path 字典大小。 + # 注意:实践中不应配置小于 8000 的值。 + # upgrade_from: static_config.fast-path-map-size + fast_path_map_size: 0 + # type: bool # name: - # en: Sender Queue Size - # ch: Sender 队列大小 + # en: Fast-path Disabled + # ch: 禁用 Fast-path # unit: - # range: [4096, 64000000] + # range: [] # enum_options: [] # modification: agent_restart - # ee_feature: true + # ee_feature: false # description: # en: |- - # The length of the following queues: - # - 2-pcap-batch-to-sender + # When set to `true`, deepflow-agent will not use fast path. # ch: |- - # 设置 deepflow-agent 的 2-pcap-batch-to-sender 队列大小。 - # upgrade_from: - sender_queue_size: 8192 + # 设置为 `true` 时,deepflow-agent 不启用 fast path。 + # upgrade_from: static_config.fast-path-disabled + fast_path_disabled: false # type: int # name: - # en: Buffer Size Per Flow - # ch: 每个 Flow 的缓冲区大小 + # en: Forward Table Capacity + # ch: Forward 表容量 # unit: - # range: [64, 64000000] + # range: [16384, 64000000] # enum_options: [] # modification: agent_restart - # ee_feature: true + # ee_feature: false # description: # en: |- - # PCap buffer size per flow. Will flush the flow when reach this limit. + # The size of the forwarding table, which is used to store MAC-IP information, + # When this value is larger, the more memory usage may be. # ch: |- - # 按流的 PCap 缓冲区大小。到达该值时 flush 该条流的 PCap 数据。 - # upgrade_from: static_config.pcap.flow-buffer-size - buffer_size_per_flow: 65536 + # 转发表大小,用来存储 MAC-IP 信息,调大该参数,deepflow-agent 将消耗更多的内存。 + # upgrade_from: static_config.forward-capacity + forward_table_capacity: 16384 # type: int # name: - # en: Total Buffer Size - # ch: 总体缓冲区大小 - # unit: - # range: [65536, 64000000] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Total PCap buffer size. Will flush all flows when reach this limit. - # ch: |- - # PCap 总缓冲区大小。到达该值时 flush 所有流的 PCap 数据。 - # upgrade_from: static_config.pcap.buffer-size - total_buffer_size: 88304 - # type: duration - # name: - # en: Flush Interval - # ch: Flush 间隔 + # en: Max First-path Level + # ch: 最大 First-path 层级 # unit: - # range: [1s, 10m] + # range: [1, 16] # enum_options: [] # modification: agent_restart - # ee_feature: true + # ee_feature: false # description: # en: |- - # Flushes the PCap buffer of a flow if it has not been flushed for this interval. + # DDBS algorithm level. + # + # When this value is larger, the memory overhead is smaller, but the + # performance of policy matching is worse. # ch: |- - # 如果一条流的 PCap buffer 超过这个时间没有进行过 flush,强制触发一次 flush。 - # upgrade_from: static_config.pcap.flush-interval - flush_interval: 1m + # DDBS 算法等级。 + # + # 该配置越大内存开销越小,但是性能会降低。 + # upgrade_from: static_config.first-path-level + max_first_path_level: 8 # type: section # name: TOA (TCP Option Address) # description: @@ -5769,301 +4364,6 @@ processors: # upgrade_from: static_config.oracle-parse-config.resp-0x04-extra-byte resp_0x04_extra_byte: false # type: section - # name: ISO8583 - # description: - iso8583: - # type: bool - # name: - # en: Value Translation - # ch: 数据翻译 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Whether to perform field value translation. - # ch: |- - # 是否对解析后的数据进行查表翻译后展示。 - # - # - 支持翻译的字段列表: - # - # | 支持翻译的字段 | 示例(翻译前)|示例(翻译后) | 备注 | - # |-----------------------|-------------- |----------------------- |--------------| - # | 0-报文类型标识符 | 0100 | 0100-授权类请求 | | - # | 3-交易处理码 | 300000 | 300000-余额查询 | | - # | 32-受理机构标识码 | 6100**** | 6100-中国邮政储蓄银行 | 翻译前4位 | - # | 39-应答码 | 00 | 00-承兑或交易成功 | | - # | 49-交易货币代码 | 156 | 156-人民币元 | | - translation_enabled: true - # type: bool - # name: - # en: PAN Obfuscate - # ch: 卡号脱敏 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Whether to obfuscate the Primary Account Number (PAN). - # ch: |- - # 是否对卡号脱敏。 - pan_obfuscate: true - # type: string - # name: - # en: Extract Fields - # ch: 提取字段 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Extracted fields are displayed in `data native tags`. - # - Example: `extract_fields: 0,2-33` - # Field Reference: - # - # | Field No. | Description | - # |-----------|-------------| - # | 0 | Message Type Identifier (MTI) | - # | 1 | Bitmap | - # | 2 | Primary Account Number (PAN) | - # | 3 | Processing Code | - # | 4 | Amount, Transaction | - # | 5 | Amount, Settlement | - # | 6 | Amount, Cardholder Billing | - # | 7 | Transmission Date & Time | - # | 9 | Conversion Rate, Settlement | - # | 10 | Conversion Rate, Cardholder Billing | - # | 11 | System Trace Audit Number (STAN) | - # | 12 | Local Transaction Time | - # | 13 | Local Transaction Date | - # | 14 | Expiration Date | - # | 15 | Settlement Date | - # | 16 | Conversion Date | - # | 18 | Merchant Type | - # | 19 | Acquiring Institution Country Code | - # | 22 | POS Entry Mode Code | - # | 23 | Card Sequence Number | - # | 25 | POS Condition Code | - # | 26 | POS PIN Capture Code | - # | 28 | Transaction Fee | - # | 32 | Acquiring Institution Identification Code | - # | 33 | Forwarding Institution Identification Code | - # | 35 | Track 2 Data | - # | 36 | Track 3 Data | - # | 37 | Retrieval Reference Number (RRN) | - # | 38 | Authorization Identification Response | - # | 39 | Response Code | - # | 41 | Card Acceptor Terminal ID | - # | 42 | Card Acceptor ID Code | - # | 43 | Card Acceptor Name/Location | - # | 44 | Additional Response Data | - # | 45 | Track 1 Data | - # | 48 | Additional Data – Private | - # | 49 | Currency Code, Transaction | - # | 50 | Currency Code, Settlement | - # | 51 | Currency Code, Cardholder Billing | - # | 52 | PIN Data | - # | 53 | Security Related Control Information | - # | 54 | Additional Amounts (Balance) | - # | 55 | ICC Data (EMV Data) | - # | 56 | Additional Data | - # | 57 | Additional Transaction Data | - # | 59 | Detail Data / Reserved for National Use | - # | 60 | Reserved for Private Use | - # | 61 | Cardholder Authentication Information | - # | 62 | Switch Data | - # | 63 | Network Data | - # | 70 | Network Management Information Code | - # | 90 | Original Data Elements | - # | 96 | Message Security Code | - # | 100 | Receiving Institution Identification Code | - # | 102 | Account Identification 1 | - # | 103 | Account Identification 2 | - # | 104 | Additional Data | - # | 113 | Additional Data | - # | 116 | Additional Data | - # | 117 | Additional Data | - # | 121 | Reserved by China UnionPay (CUPS) | - # | 122 | Reserved for Acquirer | - # | 123 | Reserved for Issuer | - # | 125 | Additional Data | - # | 126 | Additional Data | - # | 128 | Message Authentication Code (MAC) | - # ch: |- - # 提取字段展示在`数据原生标签` - # - 配置样例: `extract_fields: 0,2-33` - # - # 字段对照表: - # - # | 字段号 | 说明 | - # |--------|----------| - # | 0 | 报文类型标识符 | - # | 1 | 位图 | - # | 2 | 主账号 | - # | 3 | 交易处理码 | - # | 4 | 交易金额 | - # | 5 | 清算金额 | - # | 6 | 持卡人扣账金额 | - # | 7 | 交易传输时间 | - # | 9 | 清算汇率 | - # | 10 | 持卡人扣账汇率 | - # | 11 | 系统跟踪号 | - # | 12 | 受卡方所在地时间 | - # | 13 | 受卡方所在地日期 | - # | 14 | 卡有效期 | - # | 15 | 清算日期 | - # | 16 | 兑换日期 | - # | 18 | 商户类型 | - # | 19 | 商户国家代码 | - # | 22 | 服务点输入方式码 | - # | 23 | 卡序列号 | - # | 25 | 服务点条件码 | - # | 26 | 服务点 PIN 获取码 | - # | 28 | 交易费 | - # | 32 | 受理机构标识码 | - # | 33 | 发送机构标识码 | - # | 35 | 第二磁道数据 | - # | 36 | 第三磁道数据 | - # | 37 | 检索参考号 | - # | 38 | 授权标识应答码 | - # | 39 | 应答码 | - # | 41 | 受卡机终端标识码 | - # | 42 | 受卡方标识码 | - # | 43 | 受卡方名称地址 | - # | 44 | 附加响应数据 | - # | 45 | 第一磁道数据 | - # | 48 | 附加数据-私有 | - # | 49 | 交易货币代码 | - # | 50 | 清算货币代码 | - # | 51 | 持卡人账户货币代码 | - # | 52 | 个人标识码数据 | - # | 53 | 安全控制信息 | - # | 54 | 实际余额 | - # | 55 | IC 卡数据域 | - # | 56 | 附加信息 | - # | 57 | 附加交易信息 | - # | 59 | 明细查询数据 | - # | 60 | 自定义域 | - # | 61 | 持卡人身份认证信息 | - # | 62 | 交换中心数据 | - # | 63 | 金融网络数据 | - # | 70 | 网络管理信息码 | - # | 90 | 原始数据元 | - # | 96 | 报文安全码 | - # | 100 | 接收机构标识码 | - # | 102 | 账户标识 1 | - # | 103 | 账户标识 2 | - # | 104 | 附加信息 | - # | 113 | 附加信息 | - # | 116 | 附加信息 | - # | 117 | 附加信息 | - # | 121 | CUPS 保留 | - # | 122 | 受理方保留 | - # | 123 | 发卡方保留 | - # | 125 | 附加信息 | - # | 126 | 附加信息 | - # | 128 | 报文鉴别码 | - extract_fields: "2,7,11,32,33" - # type: section - # name: WebSphereMQ - # description: - web_sphere_mq: - # type: bool - # name: - # en: Parse XML - # ch: 解析 XML - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Whether to parse XML. - # ch: |- - # 是否解析 XML 数据内容。 - parse_xml_enabled: true - # type: bool - # name: - # en: Decompress Payload - # ch: 解压数据包 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Some web_sphere_mq messages use zlib compression. When this option is enabled, - # the agent will decompress the data packets during parsing. - # ch: |- - # 部分 web_sphere_mq 的消息中使用 zlib 压缩,开启此选项后,agent 在解析时会对数据包进行解压。 - decompress_enabled: true - # type: bool - # name: - # en: Attribute Field Filter - # ch: 属性字段过滤器 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Enabling this option will cause the agent to retain only the following fields in the XML during parsing, reducing data storage. - # - Document.ComConf.ConfInf.MT - # - Document.ComConf.ConfInf.MsgId - # - Document.ComConf.ConfInf.MsgPrcCd - # - Document.ComConf.ConfInf.MsgRefId - # - Document.ComConf.ConfInf.OrigSndDt - # - Document.ComConf.ConfInf.OrigSndr - # - Document.ComuCnfm.MsgId - # - Document.ComuCnfm.MsgProCd - # - Document.ComuCnfm.MsgRefId - # - Document.ComuCnfm.MsgTp - # - Document.ComuCnfm.OrigSndDt - # - Document.ComuCnfm.OrigSndr - # ch: |- - # 开启此选项后,agent 在解析时 XML 仅保留如下字段,减少数据存储。 - # - Document.ComConf.ConfInf.MT - # - Document.ComConf.ConfInf.MsgId - # - Document.ComConf.ConfInf.MsgPrcCd - # - Document.ComConf.ConfInf.MsgRefId - # - Document.ComConf.ConfInf.OrigSndDt - # - Document.ComConf.ConfInf.OrigSndr - # - Document.ComuCnfm.MsgId - # - Document.ComuCnfm.MsgProCd - # - Document.ComuCnfm.MsgRefId - # - Document.ComuCnfm.MsgTp - # - Document.ComuCnfm.OrigSndDt - # - Document.ComuCnfm.OrigSndr - filter_attributes_enabled: true - # type: section - # name: NetSign - # description: - net_sign: - # type: bool - # name: - # en: Extract Biz Data - # ch: 提取 Biz Data - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Whether to extract the full bizData field into data attributes. - # ch: |- - # 是否将完整 bizData 字段提取到数据属性中。 - extract_biz_data_enabled: false - # type: section # name: MySQL # description: mysql: @@ -6620,23 +4920,6 @@ processors: # 如果指定多个值,优先级从前到后降低。插件重写的字段优先级最高。 # upgrade_from: http_log_x_request_id x_request_id: [X_Request_ID] - # type: bool - # name: - # en: Multiple TraceID Collection - # ch: 多 TraceID 采集 - # unit: - # range: [] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # When configured as `false`, only one TraceID is collected. - # When configured as `true`, multiple TraceIDs will be collected. - # ch: |- - # - 配置为 `false` 时,根据配置 `APM TraceID` 采集到第一个匹配的 TraceID 就不继续采集。 - # - 配置为 `true` 时,采集所有匹配到的 TraceID。 - multiple_trace_id_collection: true # type: string # name: APM TraceID # unit: @@ -7291,25 +5574,6 @@ processors: # 流聚合会使用全部MAC地址。 # upgrade_from: static_config.flow.ignore-l2-end ignore_l2_end: false - # type: bool - # name: - # en: IDC Traffic Ignore VLAN - # ch: IDC 流量忽略 VLAN - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # When the VLAN of the two-way traffic collected at the same location - # are asymmetrical, the traffic cannot be aggregated into a Flow. You can - # set this value at this time. Only valid for IDC (not Cloud) traffic. - # ch: |- - # 当在同一位置采集的双向流量的 VLAN 不对称时,流量无法聚合为同一条流。您可以 - # 此时设置此值。仅适用于 IDC(非云)流量。 - # upgrade_from: static_config.flow.ignore-idc-vlan - idc_traffic_ignore_vlan: false # type: section # name: # en: Timeouts @@ -7563,24 +5827,6 @@ outputs: # 模式下,需配置为 FILE 类型,agent 将 l4_flow_log 和 l7_flow_log 写入本地文件。 # upgrade_from: collector_socket_type data_socket_type: TCP - # type: string - # name: - # en: NPB Socket Type - # ch: NPB Socket 类型 - # unit: - # range: [] - # enum_options: [UDP, RAW_UDP, TCP, ZMQ] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # RAW_UDP uses RawSocket to send UDP packets, which has the highest - # performance, but there may be compatibility issues in some environments. - # ch: |- - # 设置 NPB 分发时使用的 Socket 类型。RAW_UDP 使用 RawSocket 发送 UDP 数据,有更高的 - # 分发性能,但是可能存在一些环境不兼容的情况。 - # upgrade_from: npb_socket_type - npb_socket_type: RAW_UDP # type: bool # name: RAW_UDP QoS Bypass # unit: @@ -8058,162 +6304,6 @@ outputs: # upgrade_from: static_config.collector-sender-queue-size sender_queue_size: 65536 # type: section - # name: NPB (Network Packet Broker) - # description: - npb: - # type: int - # name: - # en: Maximum MTU - # ch: 最大 MTU - # unit: byte - # range: [500, 10000] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # Maximum MTU allowed when using UDP for NPB. - # - # Attention: Public cloud service providers may modify the content of the - # tail of the UDP packet whose packet length is close to 1500 bytes. When - # using UDP transmission, it is recommended to set a slightly smaller value. - # ch: |- - # NPB 分发时的 UDP 传输的 MTU 值。注意:当 UDP 报文长度接近 1500 字节后,云平台可能会 - # 修改数据包的尾部数据,因此建议`max_mtu`的值小于 1500。 - # upgrade_from: mtu - max_mtu: 1500 - # type: int - # name: - # en: RAW_UDP VLAN Tag - # ch: RAW_UDP 的 VLAN 标签 - # unit: - # range: [0, 4095] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # When using RAW_UDP Socket to transmit UDP data, this value can be used to - # set the VLAN tag. Default value `0` means no VLAN tag. - # ch: |- - # 当使用 RAW_UDP Socket 发送 NPB 数据时,通过该参数设置数据包 VLAN 标签。默认值为`0`,表示 - # 不使用 VLAN 标签。 - # upgrade_from: output_vlan - raw_udp_vlan_tag: 0 - # type: int - # name: - # en: Extra VLAN Header - # ch: 额外的 VLAN 头 - # unit: - # range: [] - # enum_options: - # - 0: - # en: None - # ch: 无 - # - 1: 802.1Q - # - 2: QinQ - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # Whether to add an extra 802.1Q header to NPB traffic, when this value is - # set, deepflow-agent will insert a VLAN Tag into the NPB traffic header, and - # the value is the lower 12 bits of TunnelID in the VXLAN header. - # ch: |- - # 设置 NPB 分发数据的 VLAN 模式。`无`表示不加 VLAN;`802.1Q`表示添加 802.1Q header; - # `QinQ`表示添加 QinQ。 - # upgrade_from: npb_vlan_mode - extra_vlan_header: 0 - # type: bool - # name: - # en: Traffic Global Dedup - # ch: 流量全局去重 - # unit: - # range: [] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # Whether to enable global (distributed) traffic deduplication for the - # NPB feature. - # ch: |- - # NPB 数据去重开关。开启开关后,将对 NPB 分发做全局去重,避免一份流量在客户端、服务端分发两次。 - # upgrade_from: npb_dedup_enabled - traffic_global_dedup: true - # type: int - # name: - # en: Target Port - # ch: 目的端口号 - # unit: - # range: [1, 65535] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # Server port for NPB. - # ch: |- - # NPB 分发使用的目标端口号。 - # upgrade_from: static_config.npb-port - target_port: 4789 - # type: int - # name: - # en: Custom VXLAN Flags - # ch: 自定义 VXLAN Flags - # unit: - # range: [0, 255] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # NPB uses the first byte of the VXLAN Flag to identify the sending traffic to - # prevent the traffic sent by NPB from being collected by deepflow-agent. - # - # Attention: To ensure that the VNI bit is set, the value configured here will - # be used after |= 0b1000_0000. Therefore, this value cannot be directly - # configured as 0b1000_0000. - # ch: |- - # 使用 VXLAN 分发时设置 VXLAN 内的 Flags 为该值。采集器不会采集分发流量。 - # - # 这个配置默认会或上0b1000_0000,所以不能配置为 0b1000_0000。 - # upgrade_from: static_config.vxlan-flags - custom_vxlan_flags: 0b1111_1111 - # type: bool - # name: - # en: Overlay VLAN Header Trimming - # ch: Overlay VLAN 头剥离 - # unit: - # range: [] - # enum_options: [] - # modification: agent_restart - # ee_feature: true - # description: - # en: |- - # This configuration only ignores the VLAN header in the captured original message - # and does not affect the configuration item: npb_vlan_mode - # ch: |- - # 开启开关后,deepflow-agent 在 NPB 分发时会剥离 overlay 原始数据包中的 VLAN 头。 - # upgrade_from: static_config.ignore-overlay-vlan - overlay_vlan_header_trimming: false - # type: int - # name: - # en: Maximum Tx Throughput - # ch: 最大 Tx 吞吐量 - # unit: Mbps - # range: [1, 100000] - # enum_options: [] - # modification: hot_update - # ee_feature: true - # description: - # en: |- - # Maximum traffic rate allowed for npb sender. - # ch: |- - # 设置 deepflow-agent 做 NPB 分发的最大吞吐率。 - # upgrade_from: max_npb_bps - max_tx_throughput: 1000 - # type: section # name: # en: Compression # ch: 压缩