What is the OOM Killer?
When the system uses lazy swap reservation (overcommitting), it is possible for physical RAM and swap to become completely exhausted at runtime โ even though all mmap() calls previously succeeded. This happens when processes collectively try to access more pages than the system can back.
The Linux kernel has a subsystem dedicated to handling this situation: the Out-of-Memory (OOM) Killer. When memory is exhausted, the OOM killer selects and terminates one or more processes by sending them a SIGKILL signal, freeing their memory and allowing other processes to continue.
โ Fault resolved
โ Reclaim attempt
โ OOM Killer invoked
โ SIGKILL sent
The OOM killer scores every process using multiple factors. The process with the highest score is killed first.
oom_score โ Read-only. Shows the kernel’s current score for a process. Higher = more likely to be killed.
/* Read oom_score for process with PID 1234 */
cat /proc/1234/oom_score
oom_adj โ Writable (as root). Adjusts the oom_score. Range: -16 to +15. Special value -17 exempts the process entirely.
/* Make current process unlikely to be killed */
echo -10 > /proc/$$/oom_adj
/* Exempt a critical daemon (e.g., PID 100) from OOM killing */
echo -17 > /proc/100/oom_adj
/* Make a low-priority process more killable */
echo 15 > /proc/9999/oom_adj
EXEMPT
Protected
Default
At risk
MOST KILLABLE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int get_oom_score(pid_t pid)
{
char path[64];
snprintf(path, sizeof(path), "/proc/%d/oom_score", (int)pid);
FILE *f = fopen(path, "r");
if (!f) { perror("fopen oom_score"); return -1; }
int score;
fscanf(f, "%d", &score);
fclose(f);
return score;
}
int main(void)
{
pid_t pid = getpid();
int score = get_oom_score(pid);
printf("PID %d oom_score = %d\n", (int)pid, score);
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int set_oom_adj(pid_t pid, int value)
{
char path[64];
snprintf(path, sizeof(path), "/proc/%d/oom_adj", (int)pid);
FILE *f = fopen(path, "w");
if (!f) { perror("fopen oom_adj (need root?)"); return -1; }
fprintf(f, "%d\n", value);
fclose(f);
return 0;
}
int main(void)
{
pid_t pid = getpid();
/* Protect this process โ requires root */
if (set_oom_adj(pid, -17) == 0) {
printf("PID %d is now OOM-exempt (oom_adj=-17)\n", (int)pid);
}
/* Simulate a critical daemon loop */
printf("Critical daemon running (press Ctrl+C to stop)...\n");
while (1) {
sleep(5);
printf("Still alive, oom_score=%d\n", /* read it */
(int)(0)); /* replace with get_oom_score(pid) */
}
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <ctype.h>
#include <string.h>
void print_top_oom_processes(int top_n)
{
DIR *proc_dir = opendir("/proc");
if (!proc_dir) { perror("opendir /proc"); return; }
struct { pid_t pid; int score; char comm[32]; } entries[1024];
int count = 0;
struct dirent *ent;
while ((ent = readdir(proc_dir)) != NULL && count < 1024) {
/* Only look at numeric directories (PIDs) */
if (!isdigit(ent->d_name[0])) continue;
pid_t pid = (pid_t)atoi(ent->d_name);
/* Read oom_score */
char path[64];
snprintf(path, sizeof(path), "/proc/%d/oom_score", pid);
FILE *f = fopen(path, "r");
if (!f) continue;
int score;
if (fscanf(f, "%d", &score) != 1) { fclose(f); continue; }
fclose(f);
/* Read process name */
snprintf(path, sizeof(path), "/proc/%d/comm", pid);
f = fopen(path, "r");
char comm[32] = "?";
if (f) { fscanf(f, "%31s", comm); fclose(f); }
entries[count].pid = pid;
entries[count].score = score;
strncpy(entries[count].comm, comm, 31);
count++;
}
closedir(proc_dir);
/* Bubble sort by score descending (simple demo) */
for (int i = 0; i < count - 1; i++) {
for (int j = 0; j < count - 1 - i; j++) {
if (entries[j].score < entries[j+1].score) {
typeof(entries[0]) tmp = entries[j];
entries[j] = entries[j+1];
entries[j+1] = tmp;
}
}
}
printf("%-8s %-20s %s\n", "PID", "COMM", "OOM_SCORE");
printf("%-8s %-20s %s\n", "---", "----", "---------");
int show = (count < top_n) ? count : top_n;
for (int i = 0; i < show; i++) {
printf("%-8d %-20s %d\n",
entries[i].pid, entries[i].comm, entries[i].score);
}
}
int main(void)
{
printf("Top 10 processes by OOM score:\n");
print_top_oom_processes(10);
return 0;
}
The OOM killer sends SIGKILL, which cannot be caught or ignored. However, you can use a watchdog that monitors memory pressure and gracefully shuts down before OOM occurs:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
static volatile int running = 1;
void sigterm_handler(int sig)
{
(void)sig;
printf("\nSIGTERM received โ shutting down gracefully\n");
running = 0;
}
long read_available_memory_kb(void)
{
FILE *f = fopen("/proc/meminfo", "r");
if (!f) return -1;
char line[128];
long avail = -1;
while (fgets(line, sizeof(line), f)) {
if (sscanf(line, "MemAvailable: %ld kB", &avail) == 1)
break;
}
fclose(f);
return avail;
}
int main(void)
{
signal(SIGTERM, sigterm_handler);
printf("Monitoring memory (threshold: 50MB free)\n");
while (running) {
long avail_kb = read_available_memory_kb();
if (avail_kb >= 0) {
printf("Available memory: %ld KB\n", avail_kb);
if (avail_kb < 50 * 1024) {
printf("LOW MEMORY WARNING โ consider reducing load!\n");
/* Real app: free caches, reduce buffers, etc. */
}
}
sleep(2);
}
printf("Clean exit.\n");
return 0;
}
Q1. What is the OOM killer and why does Linux have it?
A: The OOM (Out-of-Memory) killer is a kernel subsystem that selects and terminates processes when physical memory (RAM + swap) is completely exhausted. Linux needs it because it allows memory overcommitting โ virtual allocations can exceed physical memory. When processes try to use more pages than available, the OOM killer frees memory by killing processes rather than crashing the entire system.
Q2. What signal does the OOM killer send?
A: SIGKILL. This signal cannot be caught, blocked, or ignored, so the process is terminated immediately. There is no way for the process to handle OOM-induced termination gracefully.
Q3. What factors make a process more likely to be chosen by the OOM killer?
A: Higher memory consumption, recently started (less CPU invested), high nice value, many children, not privileged, not doing raw device I/O, and a higher oom_adj value.
Q4. What does oom_adj = -17 mean?
A: It completely exempts the process from being selected by the OOM killer. The process will never be killed by the OOM killer, regardless of memory pressure. This is useful for critical system daemons that must stay alive.
Q5. Can you catch SIGKILL sent by the OOM killer?
A: No. SIGKILL cannot be caught, blocked, or ignored by any process. This is a fundamental property of SIGKILL in POSIX. The only defense is to prevent OOM situations (monitoring memory, adjusting oom_adj, reducing overcommitting) or to write stateless processes that can safely restart.
Q6. What is /proc/PID/oom_score?
A: A read-only file that shows the kernel’s current OOM score for a process. The higher the value, the more likely it is to be selected by the OOM killer. Available since Linux kernel 2.6.11.
Q7. Why does the OOM killer avoid killing processes doing raw device I/O?
A: Killing a process in the middle of raw device I/O could leave the device in an inconsistent or unusable state (e.g., a partially written block device, a corrupted filesystem). The OOM killer avoids this to protect system integrity.
Q8. How can an embedded systems engineer protect a critical process from OOM killing?
A: Write -17 to /proc/PID/oom_adj (requires root). Additionally, mlockall() can be used to lock all pages in RAM (prevent swapping), and careful memory budgeting with RLIMIT_AS per-process ensures no single process can monopolize memory.
