Loading PDF…
Your browser or server settings are blocking the inline PDF viewer. Open it in a new tab or download it — it's the same guide!
From Linux fundamentals to kernel modules, character drivers, IOCTL, interrupts, workqueues and beyond.
Linux is a free and open-source OS based on the Unix-like system. It powers PCs, servers, embedded systems, and IoT devices.
Unix Philosophy:
ls | grep "error" | wc -lLinux is divided into two primary layers:
| Aspect | Kernel Space | User Space |
|---|---|---|
| Description | Privileged space where the kernel runs | Where user applications run |
| Hardware Access | Direct hardware interaction | Cannot directly access hardware |
| Mode | Privileged (Ring 0) | Unprivileged (Ring 3) |
| Communication | Manages process, memory, I/O | Uses system calls to request kernel services |
The interface between user space and kernel space is the System Call Interface (syscall). Example: write(), read(), fork().
Example: When LibreOffice prints a document, it makes a write() system call → kernel invokes the printer device driver → driver translates to hardware-specific commands → printer executes.
An LKM is a piece of code that can be dynamically loaded/unloaded into the kernel at runtime without rebooting. LKMs extend kernel functionality — adding device drivers, new filesystems, or custom system calls.
Two methods to add LKMs:
/driver directory).# Load a module sudo insmod mymodule.ko # Check kernel logs dmesg | tail # Unload a module sudo rmmod mymodule # List loaded modules lsmod
LKM vs Base Kernel: Base kernel is at /boot and loaded at boot time. LKMs are loaded after the base kernel and communicate with it to complete their functions.
| Feature | Kernel Module | User Program |
|---|---|---|
| Address Space | Kernel space (shared with kernel) | User space (isolated) |
| Hardware Access | Direct (reads/writes I/O ports) | Via system calls only |
| Execution | Event-driven (interrupt handling) | Sequential, start to finish |
| Headers | <linux/module.h> | <stdio.h> |
| Entry Point | module_init() | main() |
| Privileges | Full access to all hardware & kernel resources | Restricted — must use system calls |
Module metadata is stored as macros from <linux/module.h> and is visible via the modinfo command.
#include <linux/module.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name <[email protected]>");
MODULE_DESCRIPTION("A sample Linux kernel module.");
MODULE_VERSION("1.0");License types:
| License | Rule | Use Case |
|---|---|---|
| GPL | Must share modifications | Preferred for Linux — kernel is GPL |
| Dual BSD/GPL | No need to share changes | Embedded systems, proprietary products |
| MIT | Completely permissive | Widespread adoption |
| Proprietary | Cannot see/modify/redistribute | Closed-source (e.g. NVIDIA driver) |
# View module info modinfo my_module.ko # Example output: description: A sample driver for learning purposes. author: Your Name <[email protected]> version: 1.0 license: GPL
Instead of main(), kernel modules use init and exit functions as entry/exit points.
#include <linux/module.h>
#include <linux/init.h>
/* Called when: sudo insmod module.ko */
static int __init hello_init(void)
{
printk(KERN_INFO "Module loaded!\n");
return 0; // Return 0 = success
}
/* Called when: sudo rmmod module */
static void __exit hello_exit(void)
{
printk(KERN_INFO "Module removed!\n");
}
module_init(hello_init); // Register init function
module_exit(hello_exit); // Register exit function
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Hello World Kernel Module");__init — marks the function as initialization code; freed after init to save memory.
__exit — marks the function as cleanup code.
printk() is the kernel-level logging function — like printf() but for kernel space. Messages are stored in the kernel log buffer and viewed with dmesg.
| Feature | printk() | printf() |
|---|---|---|
| Space | Kernel space | User space |
| Output | Kernel ring buffer (dmesg) | stdout (terminal) |
| Log levels | Yes (KERN_INFO, KERN_ERR, etc.) | No |
// Log level macros (in order of severity)
KERN_EMERG // 0 - System is unusable
KERN_ALERT // 1 - Action must be taken immediately
KERN_CRIT // 2 - Critical condition
KERN_ERR // 3 - Error condition
KERN_WARNING // 4 - Warning condition
KERN_NOTICE // 5 - Normal but significant
KERN_INFO // 6 - Informational
KERN_DEBUG // 7 - Debug-level messages
// Usage
printk(KERN_INFO "Device initialized, major = %d\n", major);
pr_info("Shorthand for KERN_INFO\n");
pr_err("Shorthand for KERN_ERR\n");Use module_param() to accept parameters at load time via insmod. Parameters appear in /sys/module/<name>/parameters/.
// Syntax module_param(name, type, permissions); // Supported types: bool, int, uint, long, ulong, short, ushort, charp // Example static int my_value = 10; module_param(my_value, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(my_value, "An integer parameter"); // Load with custom value sudo insmod mymodule.ko my_value=42 // Read current value cat /sys/module/mymodule/parameters/my_value
Pass an array:
// module_param_array(name, type, &count, permissions) static int my_array[5]; static int array_size; module_param_array(my_array, int, &array_size, 0444); // Load with array values sudo insmod mymodule.ko my_array=10,20,30
Callback on parameter change:
// module_param_cb() — triggers callback when parameter is updated
static int param_set_cb(const char *val, const struct kernel_param *kp)
{
int res = kstrtoint(val, 10, &valueETX);
pr_info("Parameter updated to: %d\n", valueETX);
return res;
}
static const struct kernel_param_ops param_ops = {
.set = param_set_cb,
.get = param_get_standard_int,
};
module_param_cb(valueETX, ¶m_ops, &valueETX, 0644);A device driver is a translator between the OS/user applications and hardware. In Linux, everything is a file — even hardware is accessed through device files in /dev.
| Type | Data Transfer | Examples | /dev entry |
|---|---|---|---|
| Character Device | Byte by byte (sequential stream) | Keyboard, serial port, audio | /dev/tty0, /dev/ttyS0 |
| Block Device | Fixed-size blocks (random access) | HDD, SSD, USB drive, CD-ROM | /dev/sda, /dev/sdb |
| Network Device | Packets | Ethernet card, Wi-Fi adapter, loopback | Not in /dev (use ip link) |
# Identify device types using ls -l /dev ls -l /dev # Output example: brw-rw---- 1 root disk 8, 0 /dev/sda # b = block device crw-rw---- 1 root tty 4, 0 /dev/tty0 # c = character device # b = block device, c = character device # The two numbers are Major and Minor numbers
Major Number: Identifies the device driver (which driver handles this device).
Minor Number: Identifies the specific device instance managed by that driver.
ls -l /dev/ttyS0 # crw-rw---- 1 root dialout 4, 64 /dev/ttyS0 # Major: 4 → Serial driver # Minor: 64 → Specific port ttyS0 # View all registered drivers cat /proc/devices
Analogy: Major Number = Phone number of the driver. Device Node = Phone you pick up to call. User program = Caller.
| Feature | Static | Dynamic (Preferred) |
|---|---|---|
| Major number | Manually set by developer | Kernel assigns automatically |
| Conflicts | Prone to conflicts | No conflicts |
| Function | register_chrdev_region() | alloc_chrdev_region() |
Static Allocation:
dev_t dev = MKDEV(202, 0); // Major=202, Minor=0
register_chrdev_region(dev, 1, "my_device");
// Retrieve numbers
printk("Major=%d Minor=%d\n", MAJOR(dev), MINOR(dev));
// Free on exit
unregister_chrdev_region(dev, 1);Dynamic Allocation:
dev_t dev = 0;
alloc_chrdev_region(&dev, 0, 1, "my_device");
printk("Major=%d Minor=%d\n", MAJOR(dev), MINOR(dev));
// Free on exit
unregister_chrdev_region(dev, 1);A device node is a special file in /dev that acts as the interface between user-space applications and the kernel driver.
Manual creation using mknod:
# mknod -m <permissions> <path> <type> <major> <minor> sudo mknod -m 666 /dev/my_device c 202 0 # c = character device, 202 = major, 0 = minor # Set permissions separately sudo chmod 666 /dev/my_device # Verify ls -l /dev/my_device # crw-rw-rw- 1 root root 202, 0 /dev/my_device # Remove when done sudo rm /dev/my_device
Automatic creation using udev:
// In driver init function: // 1. Allocate major/minor alloc_chrdev_region(&dev, 0, 1, "etx_Dev"); // 2. Create a class (visible in /sys/class/) dev_class = class_create(THIS_MODULE, "etx_class"); // 3. Create the device — udev creates /dev/etx_device automatically device_create(dev_class, NULL, dev, NULL, "etx_device"); // In driver exit function — cleanup: device_destroy(dev_class, dev); class_destroy(dev_class);
struct cdev represents a character device in the kernel. It links the device number (dev_t) to the driver's file operations (file_operations).
// Static allocation (compile-time)
static struct cdev my_cdev;
cdev_init(&my_cdev, &my_fops); // Link with file ops
// Dynamic allocation (runtime)
struct cdev *my_cdev = cdev_alloc();
my_cdev->ops = &my_fops;
// Register with the kernel (after init)
int cdev_add(struct cdev *cdev, dev_t dev, unsigned int count);
// Example
cdev_init(&etx_cdev, &fops);
if (cdev_add(&etx_cdev, dev, 1) < 0) {
pr_err("Cannot add device to system\n");
}
// Unregister on exit
void cdev_del(struct cdev *cdev);struct file_operations defines the functions the driver provides to handle system calls from user space.
#include <linux/fs.h>
// Define the file operations
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = etx_open,
.release = etx_release,
.read = etx_read,
.write = etx_write,
};
// Open: called when app opens /dev/etx_device
static int etx_open(struct inode *inode, struct file *file) {
pr_info("Driver Open Called\n");
return 0;
}
// Release: called when app closes the device
static int etx_release(struct inode *inode, struct file *file) {
pr_info("Driver Release Called\n");
return 0;
}
// Read: copies data from kernel to user space
static ssize_t etx_read(struct file *filp, char __user *buf,
size_t len, loff_t *off) {
copy_to_user(buf, kernel_buffer, mem_size);
return mem_size;
}
// Write: copies data from user space to kernel
static ssize_t etx_write(struct file *filp, const char __user *buf,
size_t len, loff_t *off) {
copy_from_user(kernel_buffer, buf, len);
return len;
}#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kdev_t.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/slab.h> // kmalloc
#include <linux/uaccess.h> // copy_to/from_user
#define MEM_SIZE 1024
dev_t dev = 0;
static struct class *dev_class;
static struct cdev etx_cdev;
static char *kernel_buffer;
/* File Operations */
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = etx_open,
.release = etx_release,
.read = etx_read,
.write = etx_write,
};
static int __init etx_driver_init(void)
{
// 1. Allocate major/minor numbers
alloc_chrdev_region(&dev, 0, 1, "etx_Dev");
pr_info("Major=%d Minor=%d\n", MAJOR(dev), MINOR(dev));
// 2. Init and add cdev
cdev_init(&etx_cdev, &fops);
cdev_add(&etx_cdev, dev, 1);
// 3. Create class and device node
dev_class = class_create(THIS_MODULE, "etx_class");
device_create(dev_class, NULL, dev, NULL, "etx_device");
// 4. Allocate kernel buffer
kernel_buffer = kmalloc(MEM_SIZE, GFP_KERNEL);
strcpy(kernel_buffer, "Hello_World");
pr_info("Driver Inserted Successfully\n");
return 0;
}
static void __exit etx_driver_exit(void)
{
kfree(kernel_buffer);
device_destroy(dev_class, dev);
class_destroy(dev_class);
cdev_del(&etx_cdev);
unregister_chrdev_region(dev, 1);
pr_info("Driver Removed Successfully\n");
}
module_init(etx_driver_init);
module_exit(etx_driver_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("EmbeTronicX");
MODULE_DESCRIPTION("Simple Linux Character Device Driver");Testing:
sudo insmod driver.ko ls /dev/etx_device sudo cat /dev/etx_device # Read sudo echo "Hello" > /dev/etx_device # Write dmesg | tail # Check logs sudo rmmod driver
kmalloc() — Allocates physically contiguous memory in kernel space (like malloc for kernel).
#include <linux/slab.h>
// Syntax: kmalloc(size, flags)
char *buffer = kmalloc(1024, GFP_KERNEL);
if (!buffer) { pr_err("Memory allocation failed\n"); }
// Clear the memory (kmalloc does NOT zero it)
memset(buffer, 0, 1024);
// Free when done
kfree(buffer);
// Common GFP flags:
// GFP_KERNEL — Normal allocation, may sleep (use in process context)
// GFP_ATOMIC — No sleep (use in interrupt handlers)
// GFP_DMA — DMA-compatible memorycopy_from_user() — Copy data from user space → kernel space.
// unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) // Returns: 0 on success, non-zero = bytes NOT copied copy_from_user(kernel_buffer, user_buf, len);
copy_to_user() — Copy data from kernel space → user space.
// unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) copy_to_user(user_buf, kernel_buffer, mem_size);
IOCTL (Input/Output Control) is a system call for device-specific operations that can't be handled by standard read()/write(). Used for: changing baud rate, ejecting CD, adjusting volume, toggling LEDs, controlling fan speed.
4 IOCTL command types:
| Macro | Direction | Description |
|---|---|---|
| _IO | None | No data transfer |
| _IOW | User → Kernel | Write data to driver (copy_from_user) |
| _IOR | Kernel → User | Read data from driver (copy_to_user) |
| _IOWR | Both | Read and Write |
Kernel driver side:
#include <linux/ioctl.h>
// Define IOCTL commands
#define WR_VALUE _IOW('a', 'a', int32_t*)
#define RD_VALUE _IOR('a', 'b', int32_t*)
int32_t value = 0;
// IOCTL handler function
static long etx_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case WR_VALUE:
copy_from_user(&value, (int32_t*)arg, sizeof(value));
pr_info("Value Written: %d\n", value);
break;
case RD_VALUE:
copy_to_user((int32_t*)arg, &value, sizeof(value));
pr_info("Value Read: %d\n", value);
break;
default:
pr_info("Invalid IOCTL command\n");
}
return 0;
}
// Register in file_operations
static struct file_operations fops = {
.unlocked_ioctl = etx_ioctl,
// ... other ops
};User space application:
#include <sys/ioctl.h>
#define WR_VALUE _IOW('a','a',int32_t*)
#define RD_VALUE _IOR('a','b',int32_t*)
int fd = open("/dev/my_device", O_RDWR);
int32_t number = 42, value;
ioctl(fd, WR_VALUE, &number); // Write 42 to driver
ioctl(fd, RD_VALUE, &value); // Read back from driver
printf("Value from device: %d\n", value);
close(fd);procfs is a virtual filesystem mounted at /proc. It provides a runtime interface to kernel data structures. Not stored on disk — created in memory at boot.
Useful /proc entries:
cat /proc/devices # All registered char/block major numbers cat /proc/iomem # Physical RAM and bus addresses cat /proc/ioports # I/O port addresses (x86) cat /proc/interrupts # Registered IRQ numbers cat /proc/softirqs # Registered soft IRQs cat /proc/kallsyms # Running kernel symbols lsmod # All loaded kernel modules
Creating a proc file:
#include <linux/proc_fs.h>
// Create proc entry
static struct proc_dir_entry *proc_entry;
static ssize_t read_proc(struct file *filp, char __user *buf,
size_t len, loff_t *off) {
copy_to_user(buf, etx_array, len);
return len;
}
static ssize_t write_proc(struct file *filp, const char *buf,
size_t len, loff_t *off) {
copy_from_user(etx_array, buf, len);
return len;
}
static struct proc_ops proc_fops = {
.proc_read = read_proc,
.proc_write = write_proc,
};
// In init function
proc_entry = proc_create("my_proc_file", 0666, NULL, &proc_fops);
// In exit function
proc_remove(proc_entry);sysfs is a special filesystem mounted at /sys. It exposes kernel objects (devices, drivers, subsystems) as files for user-space interaction.
#include <linux/kobject.h>
#include <linux/sysfs.h>
int etx_value = 0;
struct kobject *kobj_ref;
// show: called when user reads the sysfs file
static ssize_t sysfs_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf) {
return sprintf(buf, "%d\n", etx_value);
}
// store: called when user writes to the sysfs file
static ssize_t sysfs_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count) {
sscanf(buf, "%d", &etx_value);
return count;
}
// Define attribute (file name = "etx_value", permissions = 0660)
struct kobj_attribute etx_attr = __ATTR(etx_value, 0660,
sysfs_show, sysfs_store);
// In init function
kobj_ref = kobject_create_and_add("etx_sysfs", kernel_kobj);
// Creates: /sys/kernel/etx_sysfs/
sysfs_create_file(kobj_ref, &etx_attr.attr);
// In exit function
sysfs_remove_file(kernel_kobj, &etx_attr.attr);
kobject_put(kobj_ref);
// Usage from user space
cat /sys/kernel/etx_sysfs/etx_value # Read
echo 42 > /sys/kernel/etx_sysfs/etx_value # WriteAn interrupt is a signal to the processor to temporarily halt current execution and handle an event (keyboard press, network packet, timer overflow).
| Feature | Interrupt | Exception |
|---|---|---|
| Timing | Asynchronous — anytime | Synchronous — during instruction |
| Source | External hardware | Processor itself |
| Examples | Keyboard, NIC, timer | Page fault, divide-by-zero |
Top Half vs Bottom Half:
| Aspect | Top Half (ISR) | Bottom Half (Deferred) |
|---|---|---|
| Timing | Immediate — runs when interrupt fires | Delayed — runs later in process context |
| Purpose | Acknowledge interrupt, minimal work | Heavy/slow processing |
| Context | Interrupt context — cannot sleep | Process context — can sleep |
| Mechanisms | request_irq() | Softirq, Tasklet, Workqueue, Threaded IRQ |
#include <linux/interrupt.h>
// Register ISR
// int request_irq(irq, handler, flags, name, dev_id)
int request_irq(unsigned int irq,
irq_handler_t handler,
unsigned long flags,
const char *name,
void *dev_id);
// Common flags
IRQF_SHARED // Share IRQ with other devices
IRQF_DISABLED // Disable all interrupts when handler runs
// Example ISR
static irqreturn_t my_irq_handler(int irq, void *dev_id)
{
pr_info("Interrupt received! IRQ = %d\n", irq);
// Handle the interrupt...
return IRQ_HANDLED; // or IRQ_NONE if not ours
}
// Register in init
request_irq(IRQ_NUM, my_irq_handler, IRQF_SHARED, "my_device", &dev_id);
// Unregister in exit
free_irq(IRQ_NUM, &dev_id);
// Enable/Disable IRQ
enable_irq(IRQ_NUM);
disable_irq(IRQ_NUM);
disable_irq_nosync(IRQ_NUM); // Non-blocking disableWait queues put a process to sleep until a condition becomes true. They prevent busy-waiting and free up the CPU for other tasks.
#include <linux/wait.h>
// 1. Declare and initialize (static)
DECLARE_WAIT_QUEUE_HEAD(my_wq);
// 1. Declare and initialize (dynamic)
wait_queue_head_t my_wq;
init_waitqueue_head(&my_wq);
// 2. Put process to sleep (various macros):
// Uninterruptible sleep until condition is true
wait_event(my_wq, condition);
// Interruptible sleep (can be woken by signals)
int ret = wait_event_interruptible(my_wq, condition);
if (ret == -ERESTARTSYS) pr_info("Interrupted by signal\n");
// Sleep with timeout (returns remaining jiffies or 0)
wait_event_timeout(my_wq, condition, HZ * 5); // 5 second timeout
// Killable sleep (woken only by fatal signals)
wait_event_killable(my_wq, condition);
// 3. Wake up sleeping processes:
wake_up(&my_wq); // Wake one uninterruptible task
wake_up_all(&my_wq); // Wake all uninterruptible tasks
wake_up_interruptible(&my_wq); // Wake one interruptible taskA workqueue defers work from an interrupt handler to a kernel thread running in process context, allowing it to sleep and access user memory.
#include <linux/workqueue.h>
// 1. Define and declare work (static)
void my_work_fn(struct work_struct *work) {
pr_info("Deferred work executing!\n");
}
DECLARE_WORK(my_work, my_work_fn);
// 2. Dynamic initialization
struct work_struct my_work;
INIT_WORK(&my_work, my_work_fn);
// 3. Schedule work (adds to global workqueue)
schedule_work(&my_work);
// 4. Delayed work (execute after delay)
DECLARE_DELAYED_WORK(my_delayed_work, my_work_fn);
schedule_delayed_work(&my_delayed_work, HZ * 2); // 2 sec delay
// 5. Wait for work to complete
flush_work(&my_work);
flush_scheduled_work();
// 6. Cancel pending work
cancel_work_sync(&my_work);
cancel_delayed_work_sync(&my_delayed_work);
// 7. Check if work is pending
if (work_pending(&my_work))
pr_info("Work still pending\n");Bottom Half Comparison:
| Mechanism | Context | Can Sleep? | Use Case |
|---|---|---|---|
| Softirq | Interrupt | No | High-priority (networking, block) |
| Tasklet | Interrupt | No | Simple deferred work, simpler than softirq |
| Workqueue | Process (kernel thread) | Yes | Heavy/blocking work after interrupt |
| Threaded IRQ | Process (kernel thread) | Yes | Preemptible IRQ handling |
| Feature | Process Context | Interrupt Context |
|---|---|---|
| Definition | Kernel code handling system calls | Kernel code handling hardware interrupts |
| Preemptibility | Preemptible | Non-preemptible |
| Can sleep? | Yes (can block, use mutexes) | No (must complete quickly) |
| User memory access | Yes | No |
| Locking | Mutexes, semaphores | Spinlocks only |
| Example | read(), write() system calls | Keyboard interrupt, timer ISR |
1. Makefile for the module:
obj-m += driver.o all: make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules clean: make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean # Cross-compilation for ARM # make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf-
2. Build, Insert, Test, Remove:
# Build make # Insert module sudo insmod driver.ko # Check logs dmesg | tail -20 # Verify device created ls -l /dev/etx_device cat /proc/devices | grep etx # Test read/write sudo cat /dev/etx_device echo "test" | sudo tee /dev/etx_device # List loaded modules lsmod | grep driver # Remove module sudo rmmod driver # Check cleanup logs dmesg | tail -5
Kernel Module: A piece of code that can be dynamically loaded/unloaded into the Linux kernel at runtime without recompiling the entire kernel. It extends kernel functionality broadly.
Device Driver: A specific type of kernel module that manages communication between the OS and hardware devices.
| Aspect | Kernel Module | Device Driver |
|---|---|---|
| Scope | General — filesystems, protocols, etc. | Specifically manages hardware |
| Relation | Superset | A subset of kernel modules |
| Hardware | May or may not interact | Always manages a hardware device |
| Device types | Not applicable | char, block, network |
| Model | Generic module model | Follows Linux driver model & device tree |
Key takeaway: All device drivers are kernel modules, but not all kernel modules are device drivers.
sudo insmod module_name.ko # Load sudo rmmod module_name # Unload lsmod # List all loaded modules modinfo module_name.ko # Show module metadata
The Device Tree is a data structure that describes the hardware platform to the Linux kernel — replacing hardcoded board-specific definitions in the kernel source.
Why it matters:
File extensions:
.dts — Device Tree Source (human-readable).dtb — Device Tree Blob (compiled binary loaded by bootloader).dtsi — Device Tree Source Include (shared/common definitions)Example .dts file:
/ {
compatible = "my-board";
uart0: serial@e1000000 {
compatible = "ns16550a";
reg = <0xe1000000 0x1000>;
clock-frequency = <115200>;
interrupts = <10>;
};
gpio0: gpio@e1001000 {
compatible = "gpio-controller";
reg = <0xe1001000 0x1000>;
#gpio-cells = <2>;
};
};Driver binding: When the kernel boots, it matches compatible strings in the device tree with drivers registered via of_match_table.
static const struct of_device_id my_driver_ids[] = {
{ .compatible = "ns16550a" },
{ }
};
MODULE_DEVICE_TABLE(of, my_driver_ids);
static struct platform_driver my_driver = {
.driver = {
.name = "my_driver",
.of_match_table = my_driver_ids,
},
.probe = my_probe,
.remove = my_remove,
};The file_operations structure is the heart of a character driver — it contains function pointers that the kernel calls in response to system calls on the device file.
struct file_operations {
int (*open) (struct inode *, struct file *);
int (*release)(struct inode *, struct file *);
ssize_t (*read) (struct file *, char __user *,
size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *,
size_t, loff_t *);
long (*unlocked_ioctl)(struct file *, unsigned int,
unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
loff_t (*llseek) (struct file *, loff_t, int);
int (*fsync) (struct file *, loff_t, loff_t, int);
};| Callback | Triggered by | Purpose |
|---|---|---|
open | open() | Device opened, initialize resources |
release | close() | Device closed, free resources |
read | read() | Send data from kernel → user |
write | write() | Receive data from user → kernel |
unlocked_ioctl | ioctl() | Device-specific control commands |
mmap | mmap() | Map device memory into user space |
poll | select()/poll() | Non-blocking I/O readiness check |
llseek | lseek() | Change file position (offset) |
// Registration example
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = my_open,
.release = my_release,
.read = my_read,
.write = my_write,
.unlocked_ioctl = my_ioctl,
.mmap = my_mmap,
.poll = my_poll,
};Any callback left as NULL uses the kernel's default behavior for that operation.
Mutex (Mutual Exclusion): Binary lock. Only one task holds it at a time. The holder sleeps if unavailable.
Semaphore: Counter-based synchronization. Allows multiple concurrent holders.
Spinlock: Busy-waiting lock. Task spins in a loop until the lock is free.
| Feature | Mutex | Semaphore | Spinlock |
|---|---|---|---|
| Blocking | Yes (sleeps) | Yes (sleeps) | No (busy-waits) |
| Speed | Fast | Medium | Fastest |
| ISR Safe | ❌ No | ❌ No | ✅ Yes |
| Who unlocks | Same task only | Any task | Same task only |
| Best for | Long critical sections | Resource counting | Short CS, interrupt context |
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/semaphore.h>
/* ── Mutex ── */
DEFINE_MUTEX(my_mutex);
mutex_lock(&my_mutex);
// critical section — can sleep here
mutex_unlock(&my_mutex);
/* ── Spinlock ── */
DEFINE_SPINLOCK(my_spinlock);
spin_lock(&my_spinlock);
// critical section — must be fast, no sleeping
spin_unlock(&my_spinlock);
// ISR-safe variant (saves/restores IRQ state)
unsigned long flags;
spin_lock_irqsave(&my_spinlock, flags);
// safe to use inside/outside interrupt context
spin_unlock_irqrestore(&my_spinlock, flags);
/* ── Semaphore ── */
struct semaphore my_sem;
sema_init(&my_sem, 1); // binary semaphore (count = 1)
down(&my_sem); // acquire (decrements count; sleeps if 0)
// critical section
up(&my_sem); // release (increments count)Tasklets are a softirq-based bottom-half mechanism for simple deferred work. They run in interrupt context and cannot sleep.
// Define a tasklet function
static void my_tasklet_func(unsigned long data)
{
struct my_device *dev = (struct my_device *)data;
pr_info("Tasklet running — processing interrupt data\n");
// Cannot call sleep, mutex_lock, kmalloc(GFP_KERNEL), etc.
}
// Declare tasklet (static)
static DECLARE_TASKLET(my_tasklet, my_tasklet_func, 0);
// Schedule from ISR (top half)
static irqreturn_t my_isr(int irq, void *dev_id)
{
// Quick: acknowledge hardware interrupt
tasklet_schedule(&my_tasklet); // Defer the rest
return IRQ_HANDLED;
}
// Disable/Kill tasklet on exit
tasklet_disable(&my_tasklet);
tasklet_kill(&my_tasklet);| Feature | Tasklet | Workqueue |
|---|---|---|
| Context | Interrupt (softirq) | Process (kernel thread) |
| Can sleep? | ❌ No | ✅ Yes |
| Can block? | ❌ No | ✅ Yes (mutex, I/O) |
| Complexity | Simple | More flexible |
| Scheduling | ASAP on same CPU | Queued in kernel thread |
| Use when | Fast non-blocking deferred work | Heavy/blocking deferred work |
Process states are controlled using kernel macros and state constants defined in <linux/sched.h>:
| State | Meaning |
|---|---|
TASK_RUNNING | Process is running or ready to run |
TASK_INTERRUPTIBLE | Sleeping — can be woken by a signal |
TASK_UNINTERRUPTIBLE | Sleeping — cannot be woken by signal (e.g. waiting for I/O) |
TASK_STOPPED | Process stopped by debugger or job control |
TASK_TRACED | Process being traced by a debugger |
// Set state of current process set_current_state(TASK_INTERRUPTIBLE); schedule(); // Yield CPU — go to sleep // Set another task's state set_task_state(task, TASK_RUNNING); // Direct set (no memory barriers — use carefully) __set_task_state(task, TASK_INTERRUPTIBLE);
task_struct is the fundamental process descriptor in the Linux kernel — every process/thread has one. It stores everything the kernel needs to manage a process.
| Field | Type | Purpose |
|---|---|---|
state | volatile long | Current process state |
pid | pid_t | Process ID |
tgid | pid_t | Thread group ID |
prio / static_prio | int | Dynamic / static priority |
mm | struct mm_struct* | Memory map (NULL for kernel threads) |
files | struct files_struct* | Open file descriptors |
signal | struct signal_struct* | Signal handlers |
se.vruntime | u64 | CFS virtual runtime |
real_parent | struct task_struct* | Parent process pointer |
comm | char[] | Command name (executable) |
// 'current' always points to the running process's task_struct
#include <linux/sched.h>
pr_info("PID: %d\n", current->pid);
pr_info("Name: %s\n", current->comm);
pr_info("State: %ld\n", current->state);
// Get thread_info (stack bottom)
current_thread_info();| Feature | Kernel Thread | User Thread |
|---|---|---|
Address space (mm) | NULL — no user address space | Has user address space |
| Memory access | Kernel memory only | User + kernel (via syscalls) |
| Creation | kthread_create() | pthread_create() / clone() |
| Examples | kswapd, kworker, ksoftirqd | App threads |
| Run mode | Kernel mode only | User + kernel mode |
#include <linux/kthread.h>
static int my_thread_fn(void *data)
{
while (!kthread_should_stop()) {
pr_info("Kernel thread running\n");
msleep(1000);
}
return 0;
}
// Create and start thread
struct task_struct *t = kthread_create(my_thread_fn, NULL, "my_kthread");
if (!IS_ERR(t))
wake_up_process(t);
// Stop thread on exit
kthread_stop(t);| Feature | fork() | vfork() |
|---|---|---|
| Memory | Copy-on-write — separate address spaces | Shares parent's address space |
| Parent | Continues running immediately | Blocked until child calls exec() or exit() |
| Speed | Slower (page table setup) | Faster (no copying) |
| Safety | Safe — changes isolated | Dangerous — child must not modify shared memory |
| Use case | General process creation | When child immediately calls exec() |
// fork() — parent and child run independently
pid_t pid = fork();
if (pid == 0) {
// Child code — has own copy of memory (copy-on-write)
exit(0);
} else if (pid > 0) {
// Parent continues — pid = child's PID
wait(NULL); // Reap child to avoid zombie
}
// vfork() — parent blocks until child exec()/exit()
pid_t pid = vfork();
if (pid == 0) {
// Child MUST call exec() or _exit() immediately
execv("/bin/ls", args);
} else {
// Parent resumes here after child calls exec/exit
}A zombie process is a child that has exited but whose exit status hasn't been collected by the parent (via wait()). Its task_struct remains in the kernel occupying a process table slot.
How the kernel handles orphaned zombies:
init (PID 1) or nearest subreaperinit periodically calls wait() to reap all zombiesexit_notify() and forget_original_parent() internallypid_t pid = fork();
if (pid == 0) {
exit(5); // Child exits immediately
} else {
sleep(10); // Child is ZOMBIE during this gap!
wait(NULL); // Parent reaps zombie — frees task_struct
}
// To detect zombies
// ps aux | grep 'Z' or cat /proc/<pid>/status | grep StateCFS is the default Linux process scheduler since kernel 2.6.23. It aims to give every process a fair share of CPU time proportional to its weight (nice value).
Key Concepts:
| Concept | Description |
|---|---|
| vruntime | Virtual runtime — tracks how much CPU each process has used. Lower = more starved. |
| Red-Black Tree | All runnable tasks stored in an RB-tree ordered by vruntime. O(log n) insert/delete. |
| Next to run | Always the leftmost node (smallest vruntime = most CPU-starved process). |
| No fixed time slice | Run time is dynamic, based on number of runnable processes and their weights. |
Scheduling loop:
vruntime += actual_runtime / weightAdvantages over the old O(1) scheduler: Better fairness, no starvation, improved interactive response, scales well on multi-CPU systems.
Nice value is a priority hint ranging from -20 (highest priority) to +19 (lowest priority). Default is 0.
In CFS, nice maps to a weight. Lower nice → higher weight → vruntime grows slower → process gets more CPU time.
| Nice | Weight | Effect |
|---|---|---|
| -20 | ~88761 | Gets the most CPU — near real-time |
| 0 | 1024 | Default — balanced share |
| +19 | ~15 | Gets the least CPU — background tasks |
nice -n 10 my_app # Start app with nice +10 (lower priority) renice -n -5 -p 1234 # Change running PID 1234 to nice -5 ps axo pid,ni,comm # View nice values of all processes # In kernel — set scheduling priority programmatically set_user_nice(current, 10);
A context switch is the process of saving the running process's state and loading another process's state so it can execute on the CPU.
What gets saved/restored:
// Kernel context_switch() flow (simplified)
context_switch(rq, prev, next) {
struct mm_struct *mm = next->mm;
struct mm_struct *oldmm = prev->active_mm;
if (!mm) // Next is kernel thread
next->active_mm = oldmm; // Borrow prev's mm
else
switch_mm(oldmm, mm, next); // Switch page tables + TLB
switch_to(prev, next, prev); // Save/restore registers, stack
}
// Cost of context switch: typically 1–10 µs
// Minimise by: reducing lock contention, using per-CPU dataLinux divides physical memory into zones based on hardware constraints:
| Zone | Range (32-bit) | Purpose |
|---|---|---|
| ZONE_DMA | 0 – 16 MB | Legacy ISA DMA devices with limited address range |
| ZONE_NORMAL | 16 MB – 896 MB | Directly mapped kernel memory — most common |
| ZONE_HIGHMEM | > 896 MB | Not directly mapped (32-bit only) — requires ioremap |
| ZONE_MOVABLE | Varies | Movable pages for memory hot-plug |
Memory allocation functions:
// Allocate pages directly (returns struct page *) struct page *p = alloc_pages(GFP_KERNEL, order); // 2^order pages // Allocate and get virtual address unsigned long addr = __get_free_pages(GFP_KERNEL, 0); // 1 page // Small allocations — slab allocator struct mydata *d = kmalloc(sizeof(*d), GFP_KERNEL); // Large non-contiguous virtual allocation void *buf = vmalloc(1024 * 1024); // 1 MB // Zero-filled single page unsigned long zp = get_zeroed_page(GFP_KERNEL); // Free functions kfree(d); vfree(buf); free_pages(addr, 0);
| Feature | kmalloc() | vmalloc() |
|---|---|---|
| Physical memory | Contiguous (required) | Scattered — not contiguous |
| Virtual memory | Contiguous (identity mapped) | Contiguous (via page tables) |
| Speed | Fast (slab cache) | Slower (page table setup) |
| DMA safe | ✅ Yes | ❌ No (physical addresses scattered) |
| Max size | ~128–256 KB typically | Much larger (limited by virtual space) |
| Best for | Small allocs, DMA buffers, ISR | Large buffers, module data |
| ISR safe flag | GFP_ATOMIC (no sleep) | Cannot use in ISR context |
// kmalloc — physically contiguous, fast char *buf = kmalloc(512, GFP_KERNEL); // Process context — can sleep char *isr_buf = kmalloc(64, GFP_ATOMIC); // Interrupt context — no sleep kfree(buf); // vmalloc — virtually contiguous, large void *large = vmalloc(4 * 1024 * 1024); // 4 MB vfree(large); // kzalloc — kmalloc + zero fill struct mydev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); // kcalloc — kmalloc array + zero fill int *arr = kcalloc(100, sizeof(int), GFP_KERNEL);
Spinlocks busy-wait (spin in a loop) rather than sleeping. They are the only lock safe in interrupt context. Keep the critical section as short as possible.
| Variant | What it does | Use when |
|---|---|---|
spin_lock() | Basic lock, disables preemption | No interrupt contention |
spin_lock_irq() | Lock + disable local IRQs | IRQ handler and process share the lock |
spin_lock_irqsave() | Lock + save/restore IRQ state | Safe even if IRQs already disabled |
spin_lock_bh() | Lock + disable bottom halves | Tasklet/softirq and process share lock |
DEFINE_SPINLOCK(my_lock);
/* Basic */
spin_lock(&my_lock);
// critical section
spin_unlock(&my_lock);
/* IRQ-safe (preferred in drivers) */
unsigned long flags;
spin_lock_irqsave(&my_lock, flags);
// safe from both process and interrupt context
spin_unlock_irqrestore(&my_lock, flags);
/* Bottom-half safe */
spin_lock_bh(&my_lock);
// safe from tasklets/softirqs
spin_unlock_bh(&my_lock);
/* Nested locks — ALWAYS acquire in the same order to avoid deadlock */
spin_lock(&lock_a);
spin_lock(&lock_b);
// work
spin_unlock(&lock_b);
spin_unlock(&lock_a);Atomic operations complete as indivisible CPU instructions — no spinlock required. Fast and safe across CPUs for simple integer operations.
#include <linux/atomic.h> atomic_t count; atomic_set(&count, 5); // Set to 5 int val = atomic_read(&count); // Read value atomic_inc(&count); // Increment atomic_dec(&count); // Decrement atomic_add(3, &count); // Add 3 atomic_sub(2, &count); // Subtract 2 bool z = atomic_inc_and_test(&count); // Inc, returns true if now 0 bool z = atomic_dec_and_test(&count); // Dec, returns true if now 0 atomic_xchg(&count, 10); // Swap, returns old value atomic_cmpxchg(&count, 5, 10); // If val==5, set to 10
Reference counting pattern (common in drivers):
struct my_device {
atomic_t refcount;
// ... other fields
};
// Grab a reference
void get_device(struct my_device *dev) {
atomic_inc(&dev->refcount);
}
// Release a reference — free when count hits 0
void put_device(struct my_device *dev) {
if (atomic_dec_and_test(&dev->refcount))
kfree(dev);
}
// Preferred modern API: kref
#include <linux/kref.h>
struct my_device { struct kref kref; };
kref_init(&dev->kref);
kref_get(&dev->kref);
kref_put(&dev->kref, my_release_fn);When to use atomic vs spinlock: Use atomics for simple counters/flags. Use spinlocks for compound operations that must be atomic as a group.
| Feature | Softirq | Tasklet | Workqueue |
|---|---|---|---|
| Context | Softirq (interrupt) | Softirq (interrupt) | Process (kernel thread) |
| Can sleep? | ❌ No | ❌ No | ✅ Yes |
| Can block? | ❌ No | ❌ No | ✅ Yes (mutex, I/O) |
| Speed | Fastest | Fast | Slower |
| Concurrency | Multiple CPUs simultaneously | One instance at a time | Multiple worker threads |
| Limited count | Yes — only 10 vectors | No | No |
| Best for | Critical kernel subsystems (networking, block) | Simple fast deferred work | I/O, long-running, blocking work |
/* ── Tasklet ── */
void my_tasklet_fn(unsigned long data) {
pr_info("Tasklet running\n");
}
DECLARE_TASKLET(my_tasklet, my_tasklet_fn, 0);
tasklet_schedule(&my_tasklet); // From ISR or anywhere
tasklet_kill(&my_tasklet); // On driver exit
/* ── Workqueue ── */
void my_work_fn(struct work_struct *work) {
pr_info("Work running in process context\n");
msleep(100); // Can sleep!
mutex_lock(&my_mutex); // Can use mutexes!
}
DECLARE_WORK(my_work, my_work_fn);
schedule_work(&my_work); // Add to global workqueue
flush_work(&my_work); // Wait for completion
cancel_work_sync(&my_work); // Cancel + waitDevice files in /dev are special files that serve as interfaces between user-space applications and device drivers. They are not real files on disk — they are virtual entry points into the kernel driver.
Each device file has two key identifiers:
| Number | Purpose | Example |
|---|---|---|
| Major | Identifies which driver handles the device | /dev/sda → major 8 (SCSI disk driver) |
| Minor | Identifies the specific device instance | /dev/sda1 → minor 1 (first partition) |
$ ls -l /dev/ttyS0 crw-rw---- 1 root dialout 4, 64 /dev/ttyS0 │ │ └─ minor number (64) │ └──── major number (4) └──────────────────────────────── 'c' = character device $ ls -l /dev/sda brw-rw---- 1 root disk 8, 0 /dev/sda └─ 'b' = block device # Device file types: # c = character device (serial port, keyboard, GPIO) # b = block device (HDD, SSD, SD card) # l = symbolic link # s = socket # p = named pipe (FIFO)
Common /dev files:
/dev/null — discards all written data, reads return EOF/dev/zero — infinite stream of zero bytes/dev/random — cryptographically secure random bytes/dev/mem — direct access to physical memory/dev/ttyS0 — first serial (UART) port| Aspect | Static Allocation | Dynamic Allocation |
|---|---|---|
| Function | register_chrdev_region() | alloc_chrdev_region() |
| Major number | You choose (risk of conflict) | Kernel assigns available one |
| Predictability | Always same number | Changes each load |
| Conflict risk | High | None |
| Recommended? | Only for legacy/known numbers | ✅ Yes — always preferred |
#include <linux/fs.h>
/* ── Static allocation ── */
dev_t dev = MKDEV(240, 0);
if (register_chrdev_region(dev, 1, "my_device") < 0) {
pr_err("Static alloc failed\n");
return -1;
}
/* ── Dynamic allocation (RECOMMENDED) ── */
dev_t dev;
if (alloc_chrdev_region(&dev, 0, 1, "my_device") < 0) {
pr_err("Dynamic alloc failed\n");
return -1;
}
int major = MAJOR(dev);
int minor = MINOR(dev);
pr_info("Got major=%d minor=%d\n", major, minor);
/* ── Always free in module exit ── */
static void __exit my_exit(void) {
cdev_del(&my_cdev);
unregister_chrdev_region(dev, 1); // MUST call this!
}Check /proc/devices after loading to see your assigned major number.
Modern Linux uses udev to automatically create /dev entries when a driver loads. You trigger this from the driver using class_create() and device_create().
#include <linux/device.h>
#include <linux/fs.h>
static dev_t dev;
static struct class *dev_class;
static struct cdev my_cdev;
static int __init my_init(void)
{
/* 1. Allocate device number */
alloc_chrdev_region(&dev, 0, 1, "my_device");
/* 2. Initialize and register cdev */
cdev_init(&my_cdev, &my_fops);
my_cdev.owner = THIS_MODULE;
cdev_add(&my_cdev, dev, 1);
/* 3. Create device class → appears in /sys/class/ */
dev_class = class_create(THIS_MODULE, "my_class");
if (IS_ERR(dev_class)) {
cdev_del(&my_cdev);
unregister_chrdev_region(dev, 1);
return PTR_ERR(dev_class);
}
/* 4. Create device → udev creates /dev/my_device automatically */
if (IS_ERR(device_create(dev_class, NULL, dev, NULL, "my_device"))) {
class_destroy(dev_class);
cdev_del(&my_cdev);
unregister_chrdev_region(dev, 1);
return -1;
}
pr_info("/dev/my_device created automatically!\n");
return 0;
}
static void __exit my_exit(void)
{
device_destroy(dev_class, dev); /* Remove /dev node */
class_destroy(dev_class); /* Remove /sys/class entry */
cdev_del(&my_cdev);
unregister_chrdev_region(dev, 1);
}Without device_create(): must manually run sudo mknod /dev/my_device c 240 0 every time.
With device_create(): udev creates it automatically on insmod, removes on rmmod. ✅
The Linux kernel uses an intrusive doubly-linked list — the list_head node is embedded inside your data structure rather than wrapping it. This avoids extra allocation and gives O(1) insert/delete.
#include <linux/list.h>
/* Embed list_head inside your struct */
struct my_device {
int id;
char name[32];
struct list_head list; /* ← kernel list node */
};
/* Declare and initialize the list head */
static LIST_HEAD(device_list);
static DEFINE_SPINLOCK(list_lock);
/* ── Add device to tail ── */
void add_device(struct my_device *dev) {
spin_lock(&list_lock);
list_add_tail(&dev->list, &device_list);
spin_unlock(&list_lock);
}
/* ── Remove device ── */
void remove_device(struct my_device *dev) {
spin_lock(&list_lock);
list_del(&dev->list);
spin_unlock(&list_lock);
}
/* ── Find by ID ── */
struct my_device *find_device(int id) {
struct my_device *dev;
spin_lock(&list_lock);
list_for_each_entry(dev, &device_list, list) {
if (dev->id == id) {
spin_unlock(&list_lock);
return dev;
}
}
spin_unlock(&list_lock);
return NULL;
}
/* ── Print all ── */
void print_all(void) {
struct my_device *dev;
list_for_each_entry(dev, &device_list, list)
pr_info("Device: %s (id=%d)\n", dev->name, dev->id);
}| Operation | Function | Description |
|---|---|---|
| Init head | LIST_HEAD(name) | Declare and initialize list head |
| Add front | list_add(new, head) | Add after head |
| Add tail | list_add_tail(new, head) | Add before head |
| Remove | list_del(entry) | Unlink from list |
| Iterate | list_for_each_entry(pos, head, member) | Type-safe iteration |
| Empty check | list_empty(head) | Returns true if empty |
Standard kernel timers schedule a callback function to run after a delay. They are based on jiffies (system ticks) and provide millisecond granularity.
Key concepts:
HZ — ticks per second (typically 100–1000 depending on kernel config)jiffies — current tick count since bootjiffies + HZ — 1 second from nowmsecs_to_jiffies(ms) — convert ms to jiffies#include <linux/timer.h>
static struct timer_list heartbeat_timer;
static int count = 0;
/* Timer callback — runs in softirq context, cannot sleep */
void timer_callback(struct timer_list *t)
{
count++;
pr_info("Heartbeat #%d\n", count);
/* Reschedule every 5 seconds */
mod_timer(&heartbeat_timer, jiffies + (5 * HZ));
}
static int __init driver_init(void)
{
/* Initialize timer */
timer_setup(&heartbeat_timer, timer_callback, 0);
/* Start timer: fires in 1 second */
mod_timer(&heartbeat_timer, jiffies + HZ);
return 0;
}
static void __exit driver_exit(void)
{
del_timer_sync(&heartbeat_timer); /* Wait for any running callback */
}
/* ── Timer operations ── */
// mod_timer(&t, jiffies + HZ) — start or reset timer
// del_timer(&t) — cancel timer (may already be running)
// del_timer_sync(&t) — cancel and wait for completion ✅
// timer_pending(&t) — returns 1 if timer is scheduledLimitation: Jiffies granularity is ~1–10ms. For finer timing, use High-Resolution Timers (HRT).
High-Resolution Timers (HRT) provide nanosecond precision using a hardware clock, unlike standard timers which are limited to jiffies (milliseconds).
| Feature | Standard Timer | HRT |
|---|---|---|
| Resolution | Jiffies (~1–10ms) | Nanosecond |
| Timestamp width | 32-bit jiffies | 64-bit ktime_t |
| Data structure | Simple linked list | Red-black tree (O(log n)) |
| Best for | Polling, watchdogs | Audio, multimedia, real-time |
#include <linux/hrtimer.h>
#include <linux/ktime.h>
static struct hrtimer my_hrtimer;
/* Callback — return value controls rescheduling */
enum hrtimer_restart hrt_callback(struct hrtimer *timer)
{
pr_info("HRT fired — nanosecond precision!\n");
/* Reschedule every 100ms */
hrtimer_forward_now(timer, ktime_set(0, 100000000)); /* 100ms */
return HRTIMER_RESTART; /* Keep repeating */
// return HRTIMER_NORESTART; /* One-shot */
}
static int __init my_init(void)
{
/* Initialize with monotonic clock */
hrtimer_init(&my_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
my_hrtimer.function = hrt_callback;
/* Start: fires in 1 second */
hrtimer_start(&my_hrtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
return 0;
}
static void __exit my_exit(void)
{
hrtimer_cancel(&my_hrtimer); /* Cancel and wait */
}
/* Time creation helpers */
ktime_t t1 = ktime_set(1, 500000000); /* 1.5 seconds */
ktime_t t2 = ktime_set(0, 1000000); /* 1 millisecond */
ktime_t t3 = ktime_set(0, 1000); /* 1 microsecond */A read-write spinlock (rwlock) distinguishes between read access and write access. Multiple readers can hold the lock simultaneously; a writer requires exclusive access.
| Operation | Concurrent? | Use when |
|---|---|---|
| Multiple readers | ✅ Yes — fully parallel | Read-only access to shared data |
| Reader + Writer | ❌ No — blocked | Writer must wait for all readers to exit |
| Multiple writers | ❌ No — exclusive | Only one writer at a time |
#include <linux/spinlock.h>
static DEFINE_RWLOCK(my_rwlock);
static int shared_data = 0;
/* ── Many readers can run concurrently ── */
int read_data(void)
{
int val;
read_lock(&my_rwlock); /* Multiple readers OK */
val = shared_data;
read_unlock(&my_rwlock);
return val;
}
/* ── Only one writer at a time ── */
void write_data(int new_val)
{
write_lock(&my_rwlock); /* Exclusive access */
shared_data = new_val;
write_unlock(&my_rwlock);
}
/* ── IRQ-safe variants ── */
unsigned long flags;
read_lock_irqsave(&my_rwlock, flags);
// read
read_unlock_irqrestore(&my_rwlock, flags);
write_lock_irqsave(&my_rwlock, flags);
// write
write_unlock_irqrestore(&my_rwlock, flags);When to use rwlock over spinlock: When reads happen much more frequently than writes (e.g., routing tables, device registry, configuration data). If writes are as frequent as reads, a regular spinlock is simpler and has lower overhead.
Kernel threads are processes that run entirely in kernel space — no user-space memory context. Used for background tasks like flushing buffers, polling hardware, and driver housekeeping.
| Feature | Kernel Thread | User Thread |
|---|---|---|
| Memory space | Kernel only (no user VM) | User + kernel |
| Context switch | No user/kernel transition | Full mode switch |
| Can sleep? | ✅ Yes | ✅ Yes |
| Visible in ps? | ✅ Yes (inside [brackets]) | Yes |
| Create with | kthread_create() | pthread_create() |
#include <linux/kthread.h>
#include <linux/delay.h>
static struct task_struct *my_thread;
/* Thread function — runs until kthread_should_stop() returns true */
static int thread_fn(void *data)
{
pr_info("Kernel thread started\n");
while (!kthread_should_stop()) {
pr_info("Thread tick\n");
msleep(1000); /* Sleep 1 second — OK to sleep! */
}
pr_info("Kernel thread stopping\n");
return 0;
}
static int __init my_init(void)
{
/* Create thread (does NOT start it yet) */
my_thread = kthread_create(thread_fn, NULL, "my_kthread_%d", 0);
if (IS_ERR(my_thread))
return PTR_ERR(my_thread);
/* Start the thread */
wake_up_process(my_thread);
return 0;
}
static void __exit my_exit(void)
{
if (my_thread) {
kthread_stop(my_thread); /* Signal stop + wait for exit */
my_thread = NULL;
}
}
/* Bind thread to specific CPU */
// kthread_bind(my_thread, 0); /* Run only on CPU 0 */The Linux GPIO subsystem provides a unified API for controlling digital I/O pins across all hardware platforms. The same API works on Raspberry Pi, BeagleBone, STM32MP1, etc.
#include <linux/gpio.h>
#include <linux/interrupt.h>
#define GPIO_OUTPUT_PIN 17 /* LED */
#define GPIO_INPUT_PIN 27 /* Button */
static int irq_number;
static irqreturn_t gpio_irq_handler(int irq, void *dev_id)
{
pr_info("GPIO interrupt! Value=%d\n",
gpio_get_value(GPIO_INPUT_PIN));
return IRQ_HANDLED;
}
static int __init gpio_driver_init(void)
{
/* ── Validate ── */
if (!gpio_is_valid(GPIO_OUTPUT_PIN) || !gpio_is_valid(GPIO_INPUT_PIN)) {
pr_err("Invalid GPIO number\n");
return -EINVAL;
}
/* ── Request ownership ── */
gpio_request(GPIO_OUTPUT_PIN, "led_gpio");
gpio_request(GPIO_INPUT_PIN, "btn_gpio");
/* ── Configure direction ── */
gpio_direction_output(GPIO_OUTPUT_PIN, 0); /* Output, init LOW */
gpio_direction_input(GPIO_INPUT_PIN); /* Input */
/* ── Drive and read ── */
gpio_set_value(GPIO_OUTPUT_PIN, 1); /* LED ON */
int btn = gpio_get_value(GPIO_INPUT_PIN); /* Read button */
pr_info("Button state: %d\n", btn);
/* ── GPIO as interrupt ── */
irq_number = gpio_to_irq(GPIO_INPUT_PIN);
request_irq(irq_number, gpio_irq_handler,
IRQF_TRIGGER_RISING, "gpio_int", NULL);
/* ── Export to user space via /sys/class/gpio/ ── */
gpio_export(GPIO_OUTPUT_PIN, false); /* false = no direction change */
return 0;
}
static void __exit gpio_driver_exit(void)
{
gpio_set_value(GPIO_OUTPUT_PIN, 0);
gpio_unexport(GPIO_OUTPUT_PIN);
free_irq(irq_number, NULL);
gpio_free(GPIO_OUTPUT_PIN);
gpio_free(GPIO_INPUT_PIN);
pr_info("GPIO driver removed\n");
}GPIO from user space (via sysfs):
# Export GPIO 17 echo 17 > /sys/class/gpio/export # Set as output echo "out" > /sys/class/gpio/gpio17/direction # Drive high / low echo 1 > /sys/class/gpio/gpio17/value echo 0 > /sys/class/gpio/gpio17/value # Set as input and read echo "in" > /sys/class/gpio/gpio17/direction cat /sys/class/gpio/gpio17/value # Unexport when done echo 17 > /sys/class/gpio/unexport
Signals are software interrupts sent to a process to notify it of an event. A kernel driver can send signals (like SIGIO) to user-space processes to notify them of hardware events — an alternative to polling or blocking reads.
| Signal | Default Action | Common Use |
|---|---|---|
SIGIO | Ignored | Async I/O notification from driver |
SIGKILL | Terminate | Force-kill process (uncatchable) |
SIGTERM | Terminate | Graceful shutdown request |
SIGUSR1/2 | Terminate | User-defined events |
SIGSEGV | Core dump | Segmentation fault (invalid memory) |
/* ── Kernel driver: send SIGIO to user process ── */
#include <linux/signal.h>
#include <linux/sched/signal.h>
static struct fasync_struct *async_queue;
/* Handle fasync() system call from user space */
static int my_fasync(int fd, struct file *filp, int mode)
{
return fasync_helper(fd, filp, mode, &async_queue);
}
/* In ISR or timer: notify user-space process */
void notify_user(void)
{
if (async_queue)
kill_fasync(&async_queue, SIGIO, POLL_IN);
}
/* Add to file_operations */
static struct file_operations my_fops = {
.fasync = my_fasync,
/* ... other ops ... */
};
/* ── User space: receive async notification ── */
/*
#include <signal.h>
#include <fcntl.h>
void sigio_handler(int sig) {
printf("Async data ready from driver!\n");
}
int main() {
int fd = open("/dev/my_device", O_RDWR);
signal(SIGIO, sigio_handler); // Install handler
fcntl(fd, F_SETOWN, getpid()); // This process owns the signal
fcntl(fd, F_SETFL, O_ASYNC | O_NONBLOCK); // Enable async
pause(); // Wait for signal
}
*/This shows the complete, production-quality skeleton of a character driver combining all the key pieces: dynamic allocation, udev, mutex, wait queue, and proper cleanup.
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/wait.h>
#include <linux/uaccess.h>
#define DEVICE_NAME "mydev"
#define BUF_SIZE 1024
MODULE_LICENSE("GPL");
MODULE_AUTHOR("EmbeddedLinuxCareer");
static dev_t dev_num;
static struct cdev my_cdev;
static struct class *my_class;
static DEFINE_MUTEX(dev_mutex);
static DECLARE_WAIT_QUEUE_HEAD(read_queue);
static char kernel_buf[BUF_SIZE];
static int data_ready = 0;
static int my_open(struct inode *i, struct file *f)
{
pr_info("%s: opened\n", DEVICE_NAME);
return 0;
}
static int my_release(struct inode *i, struct file *f)
{
pr_info("%s: closed\n", DEVICE_NAME);
return 0;
}
static ssize_t my_read(struct file *f, char __user *buf,
size_t count, loff_t *off)
{
/* Block until data is available */
if (wait_event_interruptible(read_queue, data_ready))
return -ERESTARTSYS;
mutex_lock(&dev_mutex);
if (copy_to_user(buf, kernel_buf, min(count, (size_t)BUF_SIZE))) {
mutex_unlock(&dev_mutex);
return -EFAULT;
}
data_ready = 0;
mutex_unlock(&dev_mutex);
return min(count, (size_t)BUF_SIZE);
}
static ssize_t my_write(struct file *f, const char __user *buf,
size_t count, loff_t *off)
{
mutex_lock(&dev_mutex);
if (copy_from_user(kernel_buf, buf, min(count, (size_t)BUF_SIZE))) {
mutex_unlock(&dev_mutex);
return -EFAULT;
}
data_ready = 1;
mutex_unlock(&dev_mutex);
wake_up_interruptible(&read_queue); /* Wake blocked readers */
return count;
}
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = my_open,
.release = my_release,
.read = my_read,
.write = my_write,
};
static int __init my_init(void)
{
alloc_chrdev_region(&dev_num, 0, 1, DEVICE_NAME);
cdev_init(&my_cdev, &fops);
my_cdev.owner = THIS_MODULE;
cdev_add(&my_cdev, dev_num, 1);
my_class = class_create(THIS_MODULE, DEVICE_NAME);
device_create(my_class, NULL, dev_num, NULL, DEVICE_NAME);
pr_info("/dev/%s ready (major=%d)\n", DEVICE_NAME, MAJOR(dev_num));
return 0;
}
static void __exit my_exit(void)
{
device_destroy(my_class, dev_num);
class_destroy(my_class);
cdev_del(&my_cdev);
unregister_chrdev_region(dev_num, 1);
pr_info("/dev/%s removed\n", DEVICE_NAME);
}
module_init(my_init);
module_exit(my_exit);Deep-dive coverage of C programming, Operating Systems, Linux System Programming, Kernel Development, RTOS, and Advanced ARM — all in one place.
One-time payment · Lifetime access · No subscription