Process states can be manipulated using various kernel functions and system calls:
set_task_state(task, state) - Set process state__set_task_state(task, state) - Directly set without barriersset_current_state(state) - Set current process stateTASK_RUNNING - Process ready to runTASK_INTERRUPTIBLE - Sleeping, can be awakened by signalTASK_UNINTERRUPTIBLE - Sleeping, cannot be awakened by signalTASK_STOPPED - Process stopped (debugger/job control)TASK_TRACED - Process traced by debuggerExample:
set_current_state(TASK_INTERRUPTIBLE);
schedule(); // Go to sleep
Kernel Threads: Threads created and managed by the kernel
kthread_create()User Threads: Threads created in user space
Creating kernel thread:
struct task_struct *t = kthread_create(threadfunc,
arg, "mythread");
if (!IS_ERR(t))
wake_up_process(t);
fork(): Creates complete copy of parent process
vfork(): Creates process with shared memory
Code comparison:
// fork() - parent continues
pid_t pid = fork();
if (pid == 0) {
// Child code
} else {
// Parent continues
}
// vfork() - parent waits
pid_t pid = vfork();
if (pid == 0) {
// Child MUST exec() or exit()
execv(...);
} else {
// Parent blocked until child execs
}
The task_struct is the fundamental process descriptor in Linux kernel. It contains:
volatile long statepid_t pid, pid_t tgid (thread group ID)int prio, int static_prio, struct list_head run_liststruct mm_struct *mm, struct mm_struct *active_mmstruct files_struct *filesstruct signal_struct *signalvoid *stacku64 se.vruntimestruct task_struct *real_parent, struct list_head childrenunsigned int flags (PF_RUNNING, PF_EXITPIDONE, etc.)Accessing current process:
struct task_struct *current; // Always points to current process
current->pid; // Get PID
current->state; // Get state
current->comm; // Get command name
current_thread_info(); // Get thread_info
Process Context: Execution state when kernel code runs on behalf of a process
When it occurs:
Characteristics:
current macro points to current taskcopy_from_user() and copy_to_user()Example:
// In syscall - process context
asmlinkage long sys_read(unsigned int fd, char __user *buf,
size_t count) {
// current points to the process making the call
// can sleep: schedule();
// can copy: copy_to_user(buf, kbuf, count);
}
Zombie Process: A child process that has exited but parent hasn't reaped it with wait()
Why it exists:
How kernel handles orphaned children:
exit_notify() and forget_original_parent()Code example:
// Parent process
pid_t pid = fork();
if (pid == 0) {
// Child
exit(5);
} else {
// Parent - without wait, child becomes zombie!
sleep(10); // Child is zombie during this time
wait(NULL); // Reaps zombie, collects exit status
}
CFS (Completely Fair Scheduler): Default Linux process scheduler since 2.6.23
Key concepts:
vruntime tracks how much CPU time process has receivedHow it selects next process:
Advantages over previous O(1) scheduler:
Nice Value: Priority hint for process scheduling (range: -20 to +19)
Values:
How it affects CFS:
Linux commands:
nice -n 10 myapp # Start with nice 10
renice -n 5 -p 1234 # Change PID 1234 to nice 5
ps aux # View NI (nice) column
Context Switching: Saving one process state and loading another
What gets saved:
Process:
context_switch() macroswitch_mm()switch_to()Code flow:
// In scheduler
context_switch(rq, prev, next) {
struct mm_struct *mm, *oldmm;
mm = next->mm;
oldmm = prev->active_mm;
if (!mm) // Kernel thread
next->active_mm = oldmm;
else
switch_mm(oldmm, mm, next);
switch_to(prev, next, prev); // Register switch
}
Top Half (ISR - Interrupt Service Routine): Immediate interrupt handler
Bottom Half: Deferred processing
Example:
// Top half - very fast
irqreturn_t interrupt_handler(int irq, void *dev_id) {
// Acknowledge hardware
hardware_ack();
// Schedule bottom half
tasklet_schedule(&my_tasklet);
return IRQ_HANDLED;
}
// Bottom half - can do more work
void tasklet_handler(unsigned long data) {
// Process received data
// No interrupt context restrictions
}
Tasklets: Simple deferred execution mechanism
tasklet_schedule(&tasklet)Softirqs: More lightweight than tasklets
Workqueues: Heavy deferred work
Comparison table:
| Feature | Tasklet | Softirq | Workqueue |
| Context | Softirq | Softirq | Process |
| Can sleep | No | No | Yes |
| Can block | No | No | Yes |
| Speed | Fast | Fastest | Slower |
| Use case | Medium work | Critical | I/O, long work |
Memory Zones: Different regions of physical memory
Memory allocation functions:
alloc_pages(gfp_mask, order) - Allocate pages (return page struct)__get_free_pages(gfp_mask, order) - Allocate pages (return address)kmalloc(size, gfp_mask) - Allocate memory < page sizevmalloc(size) - Allocate non-contiguous virtual memoryget_zeroed_page() - Allocate zero-filled pageGFP (Get Free Pages) flags:
GFP_KERNEL - Normal allocation, can sleep (for process context)GFP_ATOMIC - Atomic context, cannot sleep (for ISR/softirq)GFP_DMA - Allocate from DMA zone__GFP_HIGHMEM - Can use high memory (32-bit)Example:
// Allocate 4 pages (16 KB)
struct page *pages = alloc_pages(GFP_KERNEL, 2);
// Allocate memory for structure
struct mydata *data = kmalloc(sizeof(*data), GFP_KERNEL);
// In interrupt - must not sleep
unsigned long *buf = kmalloc(1024, GFP_ATOMIC);
// Allocate virtual memory (non-contiguous)
void *vbuf = vmalloc(10000);
kmalloc: Physically and virtually contiguous memory
vmalloc: Virtually contiguous, physically scattered
Comparison:
| Feature | kmalloc | vmalloc |
| Physical contiguity | Yes | No |
| Speed | Fast | Slower |
| DMA safe | Yes | No |
| Max size | 128-256 KB | Larger |
| Use | Small, DMA | Large buffers |
Spinlock: Busy-waiting lock for short critical sections
When to use:
Usage:
DEFINE_SPINLOCK(mylock);
// Acquire lock
spin_lock(&mylock);
// Critical section - must be very short!
spin_unlock(&mylock);
// In interrupt context
spin_lock_irqsave(&mylock, flags);
// Critical section
spin_unlock_irqrestore(&mylock, flags);
// Nested locks - always lock in same order
spin_lock(&lock1);
spin_lock(&lock2);
// ...
spin_unlock(&lock2);
spin_unlock(&lock1);
Types:
spin_lock() - Basic spinlockspin_lock_irq() - Disables interruptsspin_lock_irqsave() - Saves and restores interrupt statespin_lock_bh() - Disables bottom halvesAtomic Operations: Operations that complete without interruption
Common atomic operations:
atomic_t count;
atomic_set(&count, 5); // Set to 5
atomic_read(&count); // Read value
atomic_inc(&count); // Increment
atomic_dec(&count); // Decrement
atomic_add(3, &count); // Add value
atomic_sub(2, &count); // Subtract
atomic_inc_and_test(&count); // Inc, test if zero
atomic_dec_and_test(&count); // Dec, test if zero
atomic_xchg(&count, 10); // Exchange
atomic_cmpxchg(&count, 5, 10); // Compare and exchange
When to use:
Example - reference counting:
struct myobj {
atomic_t refcount;
};
// Get reference
void get_obj(struct myobj *obj) {
atomic_inc(&obj->refcount);
}
// Release reference
void put_obj(struct myobj *obj) {
if (atomic_dec_and_test(&obj->refcount))
kfree(obj); // Free when count reaches 0
}
Character Device Driver Components:
Complete example:
#include
#include
#include
#include
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Simple character device");
static dev_t devno; // Device number
static struct cdev my_cdev; // cdev structure
static struct class *my_class; // Device class
// File operations
static int my_open(struct inode *i, struct file *f) {
pr_info("Device opened\n");
return 0;
}
static ssize_t my_read(struct file *f, char __user *buf,
size_t len, loff_t *off) {
char data[] = "Hello";
copy_to_user(buf, data, sizeof(data));
return sizeof(data);
}
static int my_release(struct inode *i, struct file *f) {
pr_info("Device closed\n");
return 0;
}
static const struct file_operations my_fops = {
.owner = THIS_MODULE,
.open = my_open,
.read = my_read,
.release = my_release,
};
// Module initialization
static int __init my_init(void) {
// Allocate device number
alloc_chrdev_region(&devno, 0, 1, "mydev");
// Initialize and add cdev
cdev_init(&my_cdev, &my_fops);
cdev_add(&my_cdev, devno, 1);
// Create device class
my_class = class_create(THIS_MODULE, "mydev_class");
device_create(my_class, NULL, devno, NULL, "mydev");
pr_info("Device created\n");
return 0;
}
// Module cleanup
static void __exit my_exit(void) {
device_destroy(my_class, devno);
class_destroy(my_class);
cdev_del(&my_cdev);
unregister_chrdev_region(devno, 1);
pr_info("Device destroyed\n");
}
module_init(my_init);
module_exit(my_exit);
IOCTL (Input/Output Control): Device-specific commands beyond read/write
IOCTL command format:
_IOC(direction, type, nr, size)
Macros:
_IO(type, nr) // No argument
_IOR(type, nr, size) // Read from device
_IOW(type, nr, size) // Write to device
_IOWR(type, nr, size) // Read/Write
Example:
#define IOCTL_SET_VALUE _IOW('k', 1, int)
#define IOCTL_GET_VALUE _IOR('k', 2, int)
Driver implementation:
// User space
int val = 42;
ioctl(fd, IOCTL_SET_VALUE, &val);
ioctl(fd, IOCTL_GET_VALUE, &val);
// Kernel space
static long my_ioctl(struct file *f, unsigned int cmd,
unsigned long arg) {
int value;
switch(cmd) {
case IOCTL_SET_VALUE:
copy_from_user(&value, (int __user *)arg, sizeof(int));
pr_info("Set value: %d\n", value);
break;
case IOCTL_GET_VALUE:
value = 100;
copy_to_user((int __user *)arg, &value, sizeof(int));
break;
default:
return -EINVAL;
}
return 0;
}
static const struct file_operations my_fops = {
.unlocked_ioctl = my_ioctl,
};