Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions DC-SLES-kdump
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# This file originates from the project https://github.com/openSUSE/doc-kit
# This file can be edited downstream.

MAIN="kdump.asm.xml"
# Point to the ID of the <structure> of your assembly
#ROOTID="article-example"
SRC_DIR="articles"
IMG_SRC_DIR="images"

PROFOS="sles"
PROFCONDITION="suse-product"
#PROFCONDITION="suse-product;beta"
#PROFCONDITION="community-project"

STYLEROOT="/usr/share/xml/docbook/stylesheet/suse2022-ns"
FALLBACK_STYLEROOT="/usr/share/xml/docbook/stylesheet/suse-ns"
135 changes: 135 additions & 0 deletions articles/kdump.asm.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-model href="https://cdn.docbook.org/schema/5.2/rng/assemblyxi.rnc"
type="application/relax-ng-compact-syntax"?>
<!DOCTYPE assembly
[
<!ENTITY % entities SYSTEM "../common/generic-entities.ent">
%entities;
]>
<assembly version="5.2" xml:lang="en"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:trans="http://docbook.org/ns/transclusion"
xmlns:its="http://www.w3.org/2005/11/its"
xmlns="http://docbook.org/ns/docbook">
<!-- R E S O U R C E S -->
<resources>
<resource href="../concepts/about-kdump.xml" xml:id="_about-kdump"/>
<resource href="../tasks/setup-kdump.xml" xml:id="_setup-kdump"/>
<resource href="../tasks/configure-kdump.xml" xml:id="_configure-kdump"/>
<resource href="../tasks/troubleshoot-kdump.xml" xml:id="_troubleshoot-kdump"/>
<resource href="../glues/more-info-kdump.xml" xml:id="_more-info-kdump"/>
<resource href="../common/legal.xml" xml:id="_legal"/>
<resource href="../common/license_gfdl1.2.xml" xml:id="_gfdl"/>
</resources>
<!-- S T R U C T U R E -->
<structure renderas="article" xml:id="ansible" xml:lang="en">
<merge>
<title>Introduction to &kdump;</title>
<revhistory xml:id="rh-ansible">
<revision><date>2026-01-12</date>
<revdescription>
<para>
Initial version
</para>
</revdescription>
</revision>
</revhistory>

<!-- Maintainer-->
<meta name="maintainer" content="amrita.sathivel@suse.com" its:translate="no"/>

<!-- Series-->
<meta name="series" its:translate="no">Smart Docs</meta>

<!-- Task -->
<meta name="task" its:translate="no">
<phrase>Administration</phrase>
<phrase>Configuration</phrase>
<phrase>Security</phrase>
</meta>

<!-- Docmanager -->
<dm:docmanager xmlns:dm="urn:x-suse:ns:docmanager">
<dm:bugtracker>
<dm:url>https://bugzilla.suse.com/enter_bug.cgi</dm:url>
<dm:component>Documentation</dm:component>
<dm:product>SUSE Linux Enterprise Server 16.0</dm:product>
<dm:assignee>amrita.sakthivel@suse.com</dm:assignee>
</dm:bugtracker>
<dm:translation>yes</dm:translation>
</dm:docmanager>

<!-- Architecture -->
<meta name="architecture" its:translate="no">
<phrase>&x86-64;</phrase>
<phrase>&power;</phrase>
<phrase>&zseries;</phrase>
<phrase>&aarch64;</phrase>
</meta>

<!-- Productname & Version -->
<meta name="productname" its:translate="no">
<productname version="16.0" os="sles;sles4sap">&productname;</productname>
</meta>

<!-- Social Media -->
<meta name="title" its:translate="yes">Introduction to kdump</meta>
<meta name="social-descr" its:translate="yes">Learn how to use kdump when your system crashes. kdump is a
kernel crash dumping mechanism. When a system encounters a fatal error, kdump allows the system to save the contents of its memory to a file so you can analyze exactly what went wrong.
</meta>

<!-- Search -->
<meta name="description" its:translate="yes">Use kdump to analyze system crashes</meta>
<abstract>
<variablelist>
<varlistentry>
<term>WHAT?</term>
<listitem>
<para>
&kdump; is the standard error-recovery and crash-dumping mechanism for the Linux kernel. Its primary purpose is to capture a snapshot of the system's memory (a vmcore file) at the exact moment a kernel crashes (kernel panic).
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>WHY?</term>
<listitem>
<para>
Mastering &kdump; is essential for administrators and developers because it leverages a dual-kernel mechanism to capture a memory snapshot during a crash. This transforms mysterious system failures into diagnosable vmcore files that ensure production stability and reduce troubleshooting time.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>EFFORT</term>
<listitem>
<para>
The average reading time of this article is approximately 40 minutes.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>REQUIREMENTS</term>
<listitem>
<itemizedlist>
<listitem>
<para>
<emphasis>Linux fundamentals:</emphasis> Understanding basic Linux commands, file permissions, directory structures
and use of the command line.
</para>
</listitem>
</itemizedlist>
</listitem>
</varlistentry>
</variablelist>
</abstract>
</merge>
<module resourceref="_about-kdump"></module>
<module resourceref="_setup-kdump"></module>
<module resourceref="_configure-kdump"></module>
<module resourceref="_troubleshoot-kdump"></module>
<module resourceref="_more-info-kdump"></module>
<module resourceref="_legal"/>
<module resourceref="_gfdl">
<output renderas="appendix"/>
</module>
</structure>
</assembly>
79 changes: 79 additions & 0 deletions concepts/about-kdump.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE topic
[
<!ENTITY % entities SYSTEM "../common/generic-entities.ent">
%entities;
]>
<!-- refers to legacy doc: <add github link to legacy doc piece, if applicable> -->
<!-- point back to this document with a similar comment added to your legacy doc piece -->
<!-- refer to README.md for file and id naming conventions -->
<!-- metadata is dealt with on the assembly level -->
<topic xml:id="about-kdump"
role="concept" xml:lang="en"
xmlns="http://docbook.org/ns/docbook" version="5.2"
xmlns:its="http://www.w3.org/2005/11/its"
xmlns:xi="http://www.w3.org/2001/XInclude"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:trans="http://docbook.org/ns/transclusion">
<info>
<title>About &kdump;</title>
<meta name="maintainer" content="amrita.sakthivel@suse.com" its:translate="no"/>
<abstract>
<para>
&kdump; is a kernel crash dumping mechanism that captures the system’s memory state into a vmcore file when
system crash occurs. A vmcore file is a snapshot of your computer's system memory (RAM) taken at the exact moment the Linux kernel crashed.
</para>
</abstract>
</info>
<section xml:id="important-kdump">
<title>Why is &kdump; important?</title>
<para>The primary importance of &kdump; lies in its ability to capture a snapshot of a system's memory at the exact moment of a critical failure.
When a Linux kernel experiences a fatal error that halts all operations—standard logging services like syslog or journald usually fail along with it.
This often leaving no record of what went wrong. &kdump; bypasses this limitation by using &kexec; to boot a secondary capture kernel in a reserved slice of RAM.
This allows the system to remain stable enough to save the volatile memory (RAM) into a persistent file, known as a vmcore.
Without this tool, administrators are often left with nothing but a blank screen or a frozen console, making it nearly impossible to diagnose the root cause of intermittent or silent system crashes.</para>
</section>
<section xml:id="dual-kernel-kdump">
<title>Understanding the dual-kernel model</title>
<para>Dual-kernel model is the usage of a second, isolated kernel to handle a system crash safely. When the main system kernel fails, you can not trust it to write its own crash logs to disk—the memory that might be corrupted
because the kernel itself is no longer reliable. The dual-kernel approach solves this by jumping into a completely different environment.</para>
<para>The model relies on two distinct kernels residing in memory simultaneously:</para>
<itemizedlist>
<listitem><para><emphasis role="bold">The production (primary) kernel:</emphasis> is the kernel you use every day. It runs your applications and services.</para></listitem>
<listitem><para><emphasis role="bold">The capture (crash) kernel:</emphasis> is a lightweight, minimal kernel specifically compiled to run in a small, reserved area of RAM. It only wakes up when the primary kernel panics.</para></listitem>
</itemizedlist>
</section>
<section xml:id="vmcore-file-kdump">
<title>About the vmcore file</title>
<para>
A vmcore file is a snapshot of your system's physical memory (RAM) taken at the exact moment the Linux kernel crashed.
When a system panics, the &kdump; service uses 7kexec; to boot a small, separate capture kernel that stays in a reserved slice of RAM.
This capture kernel’s main function is to look back at the crashed memory and save it to a vmcore file so that you can figure out what happened after the system reboots.
</para>
<para>The vmcore file is a snapshot of the RAM and includes:</para>
<itemizedlist>
<listitem><para><emphasis role="bold">The Kernel state:</emphasis>All active kernel data structures, global variables and the call stack, which is what the CPU was doing when it died.</para></listitem>
<listitem><para><emphasis role="bold">Process information:</emphasis>A list of every process that was running, including their individual stacks and registers.</para></listitem>
<listitem><para><emphasis role="bold">Memory pages:</emphasis>Depending on your settings, it can contain the actual data held in RAM by applications.</para></listitem>
<listitem><para><emphasis role="bold">VMCOREINFO:</emphasis>special section that tells analysis tools how the kernel's memory was laid out so they can make sense of the raw data.</para></listitem>
</itemizedlist>
</section>
<section xml:id="kexec-kdump">
<title>What is &kexec;? </title>
<para>&kexec; is a system call that functions as a software-defined boot loader, allowing a running kernel to bypass the hardware BIOS/UEFI stage and directly hand over control to a new kernel.
By loading the secondary kernel's image and parameters into memory while the system is still active, &kexec; performs a warm-up boot that preserves the state of RAM and significantly reduces downtime.
This mechanism is the backbone of the &kdump; dual-kernel model, as it provides a reliable way to jump from a crashing production environment into a clean recovery environment for data capture.</para>
<para>The <literal>kexec-tools </literal> package contains a script called <literal>kexec-bootloader</literal>. This script reads the boot loader configuration and runs &kexec; using the same kernel options as the normal boot loader.
The most important component of &kexec; is the <command>/sbin/kexec</command> command. You can load a kernel with &kexec; in two ways:</para>
<itemizedlist>
<listitem><para>Load the kernel to the address space of a production kernel for a regular reboot:</para>
<screen>&prompt.sudo; kexec -l <replaceable>KERNEL_IMAGE</replaceable> </screen>
<para>You can later boot to this kernel with the command <command>kexec -e</command>.</para>
</listitem>
<listitem><para>Load the kernel to a reserved area of memory:</para>
<screen>&prompt.sudo; kexec -p <replaceable>KERNEL_IMAGE</replaceable> </screen>
<para>This kernel is booted automatically when the system crashes.</para>
</listitem>
</itemizedlist>
</section>
</topic>
42 changes: 42 additions & 0 deletions glues/more-info-kdump.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- This file originates from the project https://github.com/openSUSE/doc-kit -->
<!-- This file can be edited downstream. -->
<!DOCTYPE topic
[
<!ENTITY % entities SYSTEM "../common/generic-entities.ent">
%entities;
]>
<topic xml:id="more-info-kdump"
role="glue" xml:lang="en"
xmlns="http://docbook.org/ns/docbook" version="5.2"
xmlns:its="http://www.w3.org/2005/11/its"
xmlns:xi="http://www.w3.org/2001/XInclude"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:trans="http://docbook.org/ns/transclusion">
<info>
<title>For more information</title>
<!-- can be changed via merge in the
assembly -->
<!--add author's e-mail address-->
<meta name="amrita.sakthivel@suse.com" content="" its:translate="no"/>
<!-- add an abstract/para here, if you need one -->
<!-- can be changed via merge in the assembly -->
</info>
<para>
For information on &kdump;, refer to the following resources:
</para>
<itemizedlist>
<listitem>
<para>
Official Linux kernel documentation:
<link xlink:href="https://docs.kernel.org/admin-guide/kdump/kdump.html"/>
</para>
</listitem>
<listitem>
<para>
Man page for &kexec;:
<link xlink:href= "https://man7.org/linux/man-pages/man8/kexec.8.html"/>
</para>
</listitem>
</itemizedlist>
</topic>
82 changes: 82 additions & 0 deletions tasks/configure-kdump.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE topic
[
<!ENTITY % entities SYSTEM "../common/generic-entities.ent">
%entities;
]>
<!-- refers to legacy doc: https://github.com/SUSE/doc-sle/blob/main/xml/adm_sudo.xml -->
<!-- point back to this document with a similar comment added to your legacy doc piece -->
<!-- refer to README.md for file and id naming conventions -->
<!-- metadata is dealt with on the assembly level -->
<topic xml:id="configure-kdump"
role="task" xml:lang="en"
xmlns="http://docbook.org/ns/docbook" version="5.2"
xmlns:its="http://www.w3.org/2005/11/its"
xmlns:xi="http://www.w3.org/2001/XInclude"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:trans="http://docbook.org/ns/transclusion">
<info>
<meta name="maintainer" content="amrita.sakthivel@suse.com" its:translate="no"/>
<title>Configuring &kdump;</title>
<abstract>
<para>
To boot another kernel and preserve the data of the production kernel when the system crashes, you need to reserve a dedicated area of the system memory.
The production kernel never loads to this area because it must be always available. It is used for the capture kernel so that the memory pages of the production kernel can be preserved.
</para>
</abstract>
</info>
<para>To use &kexec; with a capture kernel and to use &kdump; in any way, RAM needs to be allocated for the capture kernel.
To configure the reserved memory for the capture kernel, you must modify the <literal>crashkernel=</literal> parameter within the GRUB configuration file.
This value defines the specific block of RAM sequestered for the secondary kernel and its optimal size is typically determined by the total physical memory available in the system.
</para>
<procedure>
<title>Calculating the allocation size</title>
<step><para>Find the base value for your system, run:</para>
<screen>&prompt.sudo; kdumptool calibrate
Total: 49074
Low: 72
High: 180
MinLow: 72
MaxLow: 3085
MinHigh: 0
MaxHigh: 45824 </screen>
<itemizedlist>
<listitem><para><emphasis role="bold">Total:</emphasis>Your total system RAM.</para></listitem>
<listitem><para><emphasis role="bold">Low:</emphasis>The minimum memory required in the low memory zone (first 4GB) for the kernel to boot.</para></listitem>
<listitem><para><emphasis role="bold">High:</emphasis>The recommended amount for the high memory zone. This covers the actual work of saving the crash dump.</para></listitem>
<listitem><para><emphasis role="bold">MinLow/MaxLow:</emphasis>The safe range for the low reservation. You are currently at the absolute minimum.</para></listitem>
<listitem><para><emphasis role="bold">MinHigh/MaxHigh:</emphasis></para></listitem>
<listitem><para><emphasis role="bold">MaxHigh:</emphasis>he range available for high reservation.</para></listitem>
</itemizedlist>
<para>All values are in megabytes. Note the <literal>Low</literal> value.</para>
</step>
<step><para>Based on your system architecture, adapt the <literal>Low</literal> or <literal>High</literal> value from the previous step for the number of LUN kernel paths (paths to storage devices) attached to the system.
A sensible value in megabytes can be calculated using this formula: </para>
<screen>&prompt.sudo; <replaceable>SIZE_LOW</replaceable> = <replaceable>RECOMMENDATION</replaceable> + (LUNs / 2)</screen>
<screen>&prompt.sudo; <replaceable>SIZE_HIGH</replaceable> = <replaceable>RECOMMENDATION</replaceable> + (LUNs / 2)</screen>
<itemizedlist>
<listitem><para><emphasis role="bold">SIZE_LOW/SIZE_HIGH:</emphasis>The resulting value for Low/High.</para></listitem>
<listitem><para><emphasis role="bold">RECOMMENDATION:</emphasis>The value recommended by the command<command>kdumptool calibrate</command>for Low/High.</para></listitem>
<listitem><para><emphasis role="bold">LUNs:</emphasis>The maximum number of LUN kernel paths that you expect to ever create on your system.
Exclude multipath devices from this number, as these are ignored. To get the current number of LUNs available on your system, run:</para>
<screen>cat /proc/scsi/scsi | grep Lun | wc -l</screen>
</listitem>
</itemizedlist>
</step>
<step><para>Set the values in the correct location. Append the following kernel option to your boot loader configuration:</para>
<screen>crashkernel=<replaceable>SIZE_HIGH</replaceable>,high crashkernel= <replaceable>SIZE_LOW</replaceable>,low</screen>
<screen>crashkernel= <replaceable>SIZE_LOW</replaceable></screen>
</step>
<step><para>The changes won't take effect until the boot loader is rebuilt and the system is restarted to reserve the memory.</para>
<screen>&prompt.sudo; grub2-mkconfig -o /boot/grub2/grub.cfg</screen>
</step>
<step><para>After restarting, confirm that the primary kernel has successfully allocated the memory for the secondary capture kernel.</para>
<screen>cat /sys/kernel/kexec_crash_size</screen>
</step>
<step>
<para>Ensure the &kdump; service is ready to catch a crash:</para>
<screen>&prompt.sudo; systemctl enable --now kdump</screen>
<screen>&prompt.sudo; kdumpctl status</screen>
</step>
</procedure>
</topic>
Loading