๐ Shell Pipelines & popen()
dup2(), Connecting Filters, popen/pclose
Part 5 of 9
Shell Pipelines
Shell Pipelines
Topic
dup2 + popen
dup2 + popen
Level
Intermediate
Intermediate
Connecting Commands with Pipes
The shell’s | operator creates pipes between commands. Understanding how the shell does this helps you write your own programs that chain together subprocesses โ exactly like a shell pipeline.
The key system call is dup2() โ it connects a pipe file descriptor to standard input or standard output of a child process before exec().
Key Concepts
dup2() STDIN_FILENO STDOUT_FILENO execvp() popen() pclose() filter pipeline
๐ How the Shell Implements “ls | grep .c”
Shell Pipeline: ls | grep .c
Step 1
Shell calls
pipe(pfd) โ creates pipe (pfd[0]=read, pfd[1]=write)Step 2
Shell forks Child 1 (for
ls): closes pfd[0], calls dup2(pfd[1], STDOUT_FILENO), closes pfd[1], then exec(“ls”)Step 3
Shell forks Child 2 (for
grep): closes pfd[1], calls dup2(pfd[0], STDIN_FILENO), closes pfd[0], then exec(“grep”, “.c”)Result
ls writes to stdout โ goes into pipe โ grep reads from stdin โ from pipels
stdout โ fd[1] (pipe write)
โ
PIPE
kernel buffer
โ
grep .c
stdin โ fd[0] (pipe read)
๐ dup2() โ Redirect a File Descriptor
#include <unistd.h>
int dup2(int oldfd, int newfd);
/* Makes newfd be a copy of oldfd.
* If newfd was already open, it is closed first.
* Returns newfd on success, -1 on error.
*/
The classic use: dup2(pfd[1], STDOUT_FILENO) makes stdout (fd 1) point to the pipe’s write end. After this, any printf() or write(1, …) goes into the pipe.
Example 1: Implement “ls | wc -l” in C
/* ls_wc_pipeline.c โ implement: ls | wc -l */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
int main(void)
{
int pfd[2];
if (pipe(pfd) == -1) {
perror("pipe");
return 1;
}
/* ===== Fork Child 1: runs "ls" ===== */
pid_t pid1 = fork();
if (pid1 == -1) { perror("fork1"); return 1; }
if (pid1 == 0) {
/* Child 1: ls โ stdout goes into pipe */
close(pfd[0]); /* don't need read end */
dup2(pfd[1], STDOUT_FILENO); /* stdout โ pipe write */
close(pfd[1]); /* original fd no longer needed */
execlp("ls", "ls", NULL); /* exec ls */
perror("execlp ls");
_exit(1);
}
/* ===== Fork Child 2: runs "wc -l" ===== */
pid_t pid2 = fork();
if (pid2 == -1) { perror("fork2"); return 1; }
if (pid2 == 0) {
/* Child 2: wc -l โ stdin comes from pipe */
close(pfd[1]); /* don't need write end */
dup2(pfd[0], STDIN_FILENO); /* stdin โ pipe read */
close(pfd[0]); /* original fd no longer needed */
execlp("wc", "wc", "-l", NULL); /* exec wc -l */
perror("execlp wc");
_exit(1);
}
/* ===== Parent: close both ends and wait ===== */
close(pfd[0]);
close(pfd[1]);
waitpid(pid1, NULL, 0);
waitpid(pid2, NULL, 0);
return 0;
}
/* Same output as: ls | wc -l */
Critical: The parent must close both pipe ends after forking both children. Otherwise, wc -l will never see EOF because the parent still holds pfd[1] open.
๐ Example 2: Three-Stage Pipeline โ ls | grep .c | sort
/* three_stage_pipeline.c โ ls | grep .c | sort */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
int main(void)
{
int pipe1[2], pipe2[2];
/* pipe1: ls โ grep */
/* pipe2: grep โ sort */
if (pipe(pipe1) == -1 || pipe(pipe2) == -1) {
perror("pipe");
return 1;
}
/* Child 1: ls */
pid_t p1 = fork();
if (p1 == 0) {
close(pipe1[0]); close(pipe2[0]); close(pipe2[1]);
dup2(pipe1[1], STDOUT_FILENO);
close(pipe1[1]);
execlp("ls", "ls", NULL);
_exit(1);
}
/* Child 2: grep .c */
pid_t p2 = fork();
if (p2 == 0) {
close(pipe1[1]); close(pipe2[0]);
dup2(pipe1[0], STDIN_FILENO); /* stdin โ pipe1 */
dup2(pipe2[1], STDOUT_FILENO); /* stdout โ pipe2 */
close(pipe1[0]); close(pipe2[1]);
execlp("grep", "grep", ".c", NULL);
_exit(1);
}
/* Child 3: sort */
pid_t p3 = fork();
if (p3 == 0) {
close(pipe1[0]); close(pipe1[1]); close(pipe2[1]);
dup2(pipe2[0], STDIN_FILENO); /* stdin โ pipe2 */
close(pipe2[0]);
execlp("sort", "sort", NULL);
_exit(1);
}
/* Parent: close all ends */
close(pipe1[0]); close(pipe1[1]);
close(pipe2[0]); close(pipe2[1]);
waitpid(p1, NULL, 0);
waitpid(p2, NULL, 0);
waitpid(p3, NULL, 0);
return 0;
}
/* Equivalent to: ls | grep .c | sort */
๐ฆ popen() and pclose() โ The Easy Way
popen() is a higher-level function that combines pipe + fork + exec into one call. It gives you a FILE* stream connected to a shell command’s stdin or stdout.
#include <stdio.h>
FILE *popen(const char *command, const char *mode);
/* mode: "r" โ read from command's stdout
* "w" โ write to command's stdin
* Returns FILE* on success, NULL on error */
int pclose(FILE *stream);
/* Closes the stream, waits for child to finish
* Returns child exit status, or -1 on error */
Example 3: Read output of a shell command with popen(“r”)
/* popen_read.c โ read output of "ls -la" */
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
FILE *fp;
char line[256];
/* "r" = we read the command's stdout */
fp = popen("ls -la /tmp", "r");
if (fp == NULL) {
perror("popen");
return 1;
}
printf("Output of 'ls -la /tmp':\n");
printf("%-50s\n", "---------------------------------------------------");
while (fgets(line, sizeof(line), fp) != NULL) {
printf(" %s", line);
}
int status = pclose(fp);
printf("\nCommand exit status: %d\n", status);
return 0;
}
Example 4: Write to a shell command with popen(“w”)
/* popen_write.c โ pipe data into "sort" */
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
FILE *fp;
/* "w" = we write to the command's stdin */
fp = popen("sort", "w");
if (fp == NULL) {
perror("popen");
return 1;
}
/* Feed unsorted lines to sort */
fprintf(fp, "banana\n");
fprintf(fp, "apple\n");
fprintf(fp, "cherry\n");
fprintf(fp, "date\n");
/* pclose() flushes, closes, and waits for sort to finish */
pclose(fp);
return 0;
}
/* Output:
apple
banana
cherry
date
*/
โ ๏ธ popen() security warning: popen() passes the command to
/bin/sh -c. Never pass user input directly to popen() without sanitization โ it’s vulnerable to shell injection. Use pipe + fork + exec manually for untrusted input.๐ pipe+dup2+exec vs popen() Comparison
| Feature | pipe + dup2 + exec | popen() |
|---|---|---|
| Control | Full โ choose any program + args | Runs via /bin/sh -c |
| Security | โ Safe from shell injection | โ ๏ธ Shell injection risk |
| Complexity | High โ more code | Low โ one call |
| Bidirectional | โ Yes (two pipes) | โ No (read or write only) |
| Multiple processes | โ Yes | โ No (single command) |
| Best for | Production, security-critical code | Quick scripts, trusted commands |
๐ฏ Interview Questions โ dup2, Pipelines, popen
Q1. What does dup2(oldfd, newfd) do?
A: dup2() makes newfd a copy of oldfd. If newfd was already open, it’s closed first. Both descriptors then refer to the same file/pipe. Used to redirect stdin/stdout to a pipe.
A: dup2() makes newfd a copy of oldfd. If newfd was already open, it’s closed first. Both descriptors then refer to the same file/pipe. Used to redirect stdin/stdout to a pipe.
Q2. How does a shell implement “cmd1 | cmd2”?
A: The shell: (1) calls pipe() to create a pipe, (2) forks two children, (3) in child1: dup2(pfd[1], STDOUT_FILENO) then exec cmd1, (4) in child2: dup2(pfd[0], STDIN_FILENO) then exec cmd2, (5) parent closes both pipe ends and waits.
A: The shell: (1) calls pipe() to create a pipe, (2) forks two children, (3) in child1: dup2(pfd[1], STDOUT_FILENO) then exec cmd1, (4) in child2: dup2(pfd[0], STDIN_FILENO) then exec cmd2, (5) parent closes both pipe ends and waits.
Q3. Why must you close the original pipe fds after dup2()?
A: After dup2(pfd[1], STDOUT_FILENO), stdout and pfd[1] both point to the pipe’s write end. You must close pfd[1] so the write end has only one reference. Otherwise, when stdout is closed later, the pipe won’t be freed.
A: After dup2(pfd[1], STDOUT_FILENO), stdout and pfd[1] both point to the pipe’s write end. You must close pfd[1] so the write end has only one reference. Otherwise, when stdout is closed later, the pipe won’t be freed.
Q4. What is the difference between popen() mode “r” and “w”?
A: Mode “r” opens a pipe to read the command’s standard output. Mode “w” opens a pipe to write to the command’s standard input.
A: Mode “r” opens a pipe to read the command’s standard output. Mode “w” opens a pipe to write to the command’s standard input.
Q5. Why is popen() dangerous with user-provided input?
A: popen() passes the command string to /bin/sh -c. If the string contains shell metacharacters (;, |, &, $, etc.) from user input, an attacker can inject arbitrary shell commands. For untrusted input, use pipe + fork + execv() directly, which doesn’t invoke a shell.
A: popen() passes the command string to /bin/sh -c. If the string contains shell metacharacters (;, |, &, $, etc.) from user input, an attacker can inject arbitrary shell commands. For untrusted input, use pipe + fork + execv() directly, which doesn’t invoke a shell.
