Final File
PIPE_BUF / Capacity
Advanced
Pipe Capacity and Write Atomicity
Pipes have a finite kernel buffer. Understanding its size and behaviour under pressure — blocking, partial writes, atomicity — is essential for robust IPC programming. This file covers everything about pipe capacity and the critical PIPE_BUF guarantee.
Key Concepts
A pipe’s kernel buffer has a maximum size. On Linux the default is 65536 bytes (64 KiB) per pipe (since kernel 2.6.11; was 4096 bytes before that).
| Constant / File | Value / Description |
|---|---|
| PIPE_BUF (POSIX minimum) | 512 bytes |
| PIPE_BUF on Linux | 4096 bytes (defined in <limits.h>) |
| Default pipe buffer size (Linux) | 65536 bytes (64 KiB) since kernel 2.6.11 |
| /proc/sys/fs/pipe-max-size | Maximum allowed pipe buffer size (default: 1 MiB) |
| fcntl(fd, F_SETPIPE_SZ, size) | Set pipe buffer size (Linux 2.6.35+) |
| fcntl(fd, F_GETPIPE_SZ) | Get current pipe buffer size |
Example 1: Check and change pipe buffer size
/* pipe_capacity.c */
#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
int main(void)
{
int pfd[2];
pipe(pfd);
int sz = fcntl(pfd[1], F_GETPIPE_SZ);
printf("Default pipe buffer size: %d bytes (%d KiB)\n", sz, sz / 1024);
printf("PIPE_BUF (atomic write limit): %d bytes\n", (int)PIPE_BUF);
int new_sz = 131072; /* 128 KiB */
int result = fcntl(pfd[1], F_SETPIPE_SZ, new_sz);
if (result == -1)
perror("F_SETPIPE_SZ");
else
printf("New pipe buffer size: %d bytes (%d KiB)\n", result, result / 1024);
close(pfd[0]);
close(pfd[1]);
return 0;
}
/* Output:
Default pipe buffer size: 65536 bytes (64 KiB)
PIPE_BUF (atomic write limit): 4096 bytes
New pipe buffer size: 131072 bytes (128 KiB)
*/
POSIX and Linux guarantee that writes of <= PIPE_BUF bytes to a pipe are atomic: the kernel will not interleave them with writes from other processes. Writes larger than PIPE_BUF may be split and interleaved.
Process B writes 64 bytes
Reader gets:
[64 bytes from A] — complete
[64 bytes from B] — complete
No interleaving guaranteed.
Process B writes 8000 bytes
Reader may get:
[4096 from A] then [4096 from B]
then [3904 from A] then [3904 from B]
Data interleaved — message corruption!
Example 2: Verify atomicity with concurrent writers
/* pipe_atomicity.c */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <limits.h>
#define NUM_WRITERS 4
#define MSG_LEN 64 /* <= PIPE_BUF: guaranteed atomic */
int main(void)
{
int pfd[2];
pipe(pfd);
printf("PIPE_BUF=%d, MSG_LEN=%d -- writes are atomic\n",
(int)PIPE_BUF, MSG_LEN);
for (int i = 0; i < NUM_WRITERS; i++) {
if (fork() == 0) {
close(pfd[0]);
char msg[MSG_LEN];
memset(msg, 'A' + i, MSG_LEN - 1);
msg[MSG_LEN - 1] = '\n';
write(pfd[1], msg, MSG_LEN);
close(pfd[1]);
_exit(0);
}
}
close(pfd[1]);
char buf[MSG_LEN];
int msg_num = 0;
while (read(pfd[0], buf, MSG_LEN) == MSG_LEN) {
char first = buf[0];
int mixed = 0;
for (int i = 0; i < MSG_LEN - 1; i++)
if (buf[i] != first) { mixed = 1; break; }
printf("Msg %d: all '%c'? %s\n",
++msg_num, first, mixed ? "INTERLEAVED!" : "YES atomic");
}
close(pfd[0]);
while (wait(NULL) != -1);
return 0;
}
/* All 4 messages show "YES atomic" because MSG_LEN <= PIPE_BUF */
| Condition | Blocking pipe | O_NONBLOCK pipe |
|---|---|---|
| write <= PIPE_BUF, space available | Writes all bytes atomically | Same: atomic, returns n |
| write <= PIPE_BUF, pipe full | Blocks until all bytes written atomically | Returns -1, errno=EAGAIN (nothing written) |
| write > PIPE_BUF, space available | May write partial; may block | Writes what fits, returns count |
| write > PIPE_BUF, pipe full | Blocks until at least 1 byte written | Returns -1, errno=EAGAIN |
| No reader (broken pipe) | SIGPIPE + errno=EPIPE | Same: SIGPIPE + EPIPE |
Example 3: Blocking write when pipe is full
/* pipe_full_block.c */
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/wait.h>
int main(void)
{
int pfd[2];
pipe(pfd);
int capacity = fcntl(pfd[1], F_GETPIPE_SZ);
printf("Pipe capacity: %d bytes\n", capacity);
pid_t pid = fork();
if (pid == 0) {
close(pfd[0]);
char buf[4096];
memset(buf, 'X', sizeof(buf));
int total = 0;
while (total < capacity) {
int n = (capacity - total < 4096) ? capacity - total : 4096;
write(pfd[1], buf, n);
total += n;
}
printf("[Child] Pipe full (%d bytes). Next write BLOCKS...\n", total);
write(pfd[1], "overflow!", 9); /* BLOCKS here */
printf("[Child] Unblocked after parent read.\n");
close(pfd[1]);
_exit(0);
} else {
sleep(1);
printf("[Parent] Reading to unblock child...\n");
char drain[4096];
read(pfd[0], drain, sizeof(drain));
wait(NULL);
close(pfd[0]);
}
return 0;
}
When writing large data, always use a write loop to guarantee all bytes are sent:
/* safe_write.c */
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <sys/wait.h>
ssize_t write_all(int fd, const void *buf, size_t count)
{
const char *ptr = buf;
size_t remaining = count;
while (remaining > 0) {
ssize_t n = write(fd, ptr, remaining);
if (n == -1) {
if (errno == EINTR) continue;
return -1;
}
ptr += n;
remaining -= n;
}
return (ssize_t)count;
}
ssize_t read_all(int fd, void *buf, size_t count)
{
char *ptr = buf;
size_t remaining = count;
while (remaining > 0) {
ssize_t n = read(fd, ptr, remaining);
if (n == -1) { if (errno == EINTR) continue; return -1; }
if (n == 0) break;
ptr += n;
remaining -= n;
}
return (ssize_t)(count - remaining);
}
int main(void)
{
int pfd[2];
pipe(pfd);
char big_data[200000];
memset(big_data, 'Z', sizeof(big_data));
pid_t pid = fork();
if (pid == 0) {
close(pfd[1]);
char buf[200000];
ssize_t n = read_all(pfd[0], buf, sizeof(buf));
printf("[Child] Received %zd bytes\n", n);
close(pfd[0]);
_exit(0);
} else {
close(pfd[0]);
ssize_t n = write_all(pfd[1], big_data, sizeof(big_data));
printf("[Parent] Wrote %zd bytes\n", n);
close(pfd[1]);
wait(NULL);
}
return 0;
}
/* Output:
[Parent] Wrote 200000 bytes
[Child] Received 200000 bytes
*/
/* pipe_size_tuning.c */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
void show_proc_settings(void)
{
FILE *f = fopen("/proc/sys/fs/pipe-max-size", "r");
if (f) {
int max; fscanf(f, "%d", &max); fclose(f);
printf("/proc/sys/fs/pipe-max-size = %d bytes (%d KiB)\n", max, max/1024);
}
}
int main(void)
{
int pfd[2];
pipe(pfd);
show_proc_settings();
int cur = fcntl(pfd[1], F_GETPIPE_SZ);
printf("Current pipe buffer: %d bytes\n", cur);
/* Shrink -- kernel rounds up to page size minimum */
int small = fcntl(pfd[1], F_SETPIPE_SZ, 1024);
printf("After F_SETPIPE_SZ(1024): actual = %d bytes\n", small);
/* Grow */
int large = fcntl(pfd[1], F_SETPIPE_SZ, 512 * 1024);
if (large == -1) {
if (errno == EPERM)
printf("Need CAP_SYS_RESOURCE for sizes above pipe-max-size\n");
else perror("F_SETPIPE_SZ");
} else {
printf("After F_SETPIPE_SZ(512K): actual = %d bytes\n", large);
}
close(pfd[0]); close(pfd[1]);
return 0;
}
/* Output:
/proc/sys/fs/pipe-max-size = 1048576 bytes (1024 KiB)
Current pipe buffer: 65536 bytes
After F_SETPIPE_SZ(1024): actual = 4096 bytes
After F_SETPIPE_SZ(512K): actual = 524288 bytes
*/
pipe() syscall, fd[0]/fd[1], byte stream, pipe vs FIFO
Parent-child IPC, closing unused ends, bidirectional, siblings
Closing FDs, SIGPIPE, EPIPE, SIG_IGN, reference count
O_CLOEXEC, O_NONBLOCK, atomic flags, race conditions
dup2(), shell pipelines, 3-stage pipeline, popen/pclose
mkfifo(), filesystem entry, blocking open(), unlink()
open() rules, O_NONBLOCK ENXIO, EOF, O_RDWR trick
Well-known FIFO, per-client FIFO, request struct, PIPE_BUF
PIPE_BUF, 65536 default, F_SETPIPE_SZ, partial write loop
A: 65536 bytes (64 KiB) since kernel 2.6.11. Prior to that it was 4096 bytes. The system maximum is controlled by /proc/sys/fs/pipe-max-size (default 1 MiB).
A: PIPE_BUF is the maximum byte count for which a write() to a pipe is guaranteed to be atomic (not interleaved). On Linux it is 4096 bytes. Writes at or below this size will never be interleaved with writes from other processes.
A: The write may be split into multiple kernel operations, each of which can be interleaved with writes from other processes. In blocking mode the write eventually completes. In nonblocking mode it writes as many bytes as fit and returns the count.
A: Use fcntl(fd, F_SETPIPE_SZ, size) (Linux 2.6.35+). The kernel rounds up to the nearest page size. Privileged processes (CAP_SYS_RESOURCE) can set sizes up to /proc/sys/fs/pipe-max-size.
A: Use a write loop: keep calling write() until all bytes are sent, handling partial returns and EINTR. A concurrent reader must be consuming data from the pipe so the writer is never permanently blocked.
A: PIPE_BUF (4096 bytes on Linux) is the atomicity guarantee — the max size of a single write that will not be interleaved. The pipe buffer capacity (65536 bytes) is the total data that can be buffered before write() blocks. They are completely independent values.
Chapter 44 Complete!
All 9 sections of TLPI Chapter 44 — Pipes and FIFOs — covered.
