If binary semaphore can achieve the synchronization as provided by mutex, then why do we need mutex at all? Another question was, can we use semaphore / mutex in interrupt handlers? To find the answer to these questions, read on.
Mutex and Binary Semaphore
Below is the simple example using the binary semaphore:
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <asm/uaccess.h>
#include <linux/semaphore.h>
#define FIRST_MINOR 0
#define MINOR_CNT 1
static dev_t dev;
static struct cdev c_dev;
static struct class *cl;
static int my_open(struct inode *i, struct file *f)
{
return 0;
}
static int my_close(struct inode *i, struct file *f)
{
return 0;
}
static char c = 'A';
static struct semaphore my_sem;
static ssize_t my_read(struct file *f, char __user *buf, size_t len, loff_t *off)
{
// Acquire the Semaphore
if (down_interruptible(&my_sem))
{
printk("Unable to acquire Semaphore\n");
return -1;
}
return 0;
}
static ssize_t my_write(struct file *f, const char __user *buf, size_t len,
loff_t *off)
{
// Release the semaphore
up(&my_sem);
if (copy_from_user(&c, buf + len - 1, 1))
{
return -EFAULT;
}
return len;
}
static struct file_operations driver_fops =
{
.owner = THIS_MODULE,
.open = my_open,
.release = my_close,
.read = my_read,
.write = my_write
};
static int __init sem_init(void)
{
int ret;
struct device *dev_ret;
if ((ret = alloc_chrdev_region(&dev, FIRST_MINOR, MINOR_CNT, "my_sem")) < 0)
{
return ret;
}
cdev_init(&c_dev, &driver_fops);
if ((ret = cdev_add(&c_dev, dev, MINOR_CNT)) < 0)
{
unregister_chrdev_region(dev, MINOR_CNT);
return ret;
}
if (IS_ERR(cl = class_create(THIS_MODULE, "char")))
{
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
return PTR_ERR(cl);
}
if (IS_ERR(dev_ret = device_create(cl, NULL, dev, NULL, "mysem%d", FIRST_MINOR)))
{
class_destroy(cl);
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
return PTR_ERR(dev_ret);
}
sema_init(&my_sem, 0);
return 0;
}
static void __exit sem_exit(void)
{
device_destroy(cl, dev);
class_destroy(cl);
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
}
module_init(sem_init);
module_exit(sem_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pradeep");
MODULE_DESCRIPTION("Binary Semaphore Demonstration");
In the above example, we initialize the semaphore with the value of 0 with sem_init(). Inmy_read(), we decrement the semaphore and in my_write(), we increment the semaphore. Below is the sample run:
insmod sem.ko
cat /dev/mysem0 - This will block
echo 1 > /dev/mysem0 - Will unblock the cat process
Now, let’s try achieving the same with mutex. Below is the example for the same.
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <asm/uaccess.h>
#include <linux/mutex.h>
#define FIRST_MINOR 0
#define MINOR_CNT 1
DEFINE_MUTEX(my_mutex);
static dev_t dev;
static struct cdev c_dev;
static struct class *cl;
static int my_open(struct inode *i, struct file *f)
{
return 0;
}
static int my_close(struct inode *i, struct file *f)
{
return 0;
}
static char c = 'A';
static ssize_t my_read(struct file *f, char __user *buf, size_t len, loff_t *off)
{
if (mutex_lock_interruptible(&my_mutex))
{
printk("Unable to acquire Semaphore\n");
return -1;
}
return 0;
}
static ssize_t my_write(struct file *f, const char __user *buf, size_t len,
loff_t *off)
{
mutex_unlock(&my_mutex);
if (copy_from_user(&c, buf + len - 1, 1))
{
return -EFAULT;
}
return len;
}
static struct file_operations driver_fops =
{
.owner = THIS_MODULE,
.open = my_open,
.release = my_close,
.read = my_read,
.write = my_write
};
static int __init init_mutex(void)
{
int ret;
struct device *dev_ret;
if ((ret = alloc_chrdev_region(&dev, FIRST_MINOR, MINOR_CNT, "my_mutex")) < 0)
{
return ret;
}
cdev_init(&c_dev, &driver_fops);
if ((ret = cdev_add(&c_dev, dev, MINOR_CNT)) < 0)
{
unregister_chrdev_region(dev, MINOR_CNT);
return ret;
}
if (IS_ERR(cl = class_create(THIS_MODULE, "char")))
{
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
return PTR_ERR(cl);
}
if (IS_ERR(dev_ret = device_create(cl, NULL, dev, NULL, "mymutex%d",
FIRST_MINOR)))
{
class_destroy(cl);
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
return PTR_ERR(dev_ret);
}
return 0;
}
static void __exit exit_mutex(void)
{
device_destroy(cl, dev);
class_destroy(cl);
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
}
module_init(init_mutex);
module_exit(exit_mutex);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pradeep");
MODULE_DESCRIPTION("Mutex Demonstration");
In the above example, I have replaced the semaphore with mutex. Below is the sample run:
cat /dev/mymutex0 - This will acquire the mutex
cat /dev/mymutex0 - This will block
echo 1 > /dev/mymutex0
So, what do you get after executing the echo command? I get the warning as below:
DEBUG_LOCKS_WARN_ON(lock->owner != current)
So, what does this warning mean? It warns that the process that is trying to unlock the mutex is not the owner of the same. But same thing worked without any warning with semaphore. What does this mean? This brings us to the important difference between the mutex and semaphore. Mutex have ownership associated with it. The process that acquires the lock is the one that should unlock the mutex. While such ownership didn’t exist with the semaphore. While using the semaphores for synchronization, its completely upto the user to ensure that the down & upare always called in pairs. But, mutex is designed in a way that lock and unlock must always be called in pairs.
Spinlock
Now, let’s come to the second question – can we use the semaphore / mutex in interrupt handlers. The answer is yes and no. I mean you can use the up and unlock, but can’t use downand lock, as these are blocking calls which put the process to sleep and we are not supposed to sleep in interrupt handlers. So, what if I want to achieve the synchronization in interrupt handlers? For this, there is a mechanism called spinlock. Spinlock is a lock which never yields. Similar to mutex, it has two operations – lock and unlock. If the lock is available, process will acquire it and will continue in the critical section and unlock it, once its done. This is pretty much similar to mutex. But, what if lock is not available? Here, comes the interesting difference. With mutex, the process will sleep, until the lock is available. But, in case of spinlock, it goes into the tight loop, where it continuously checks for a lock, until it becomes available. This is the spinning part of the spin lock. This was designed for multiprocessor systems. But, with the preemptible kernel, even a uniprocessor system behaves like an SMP. Below are the data structures associated with the spinlock:
#include <linux/spinlock.h>
// Data structure
struct spinlock_t my_slock
// Initialization
spinlock_init(&my_slock)
// Operations
spin_lock(&my_slock)
spin_unlock(&my_slock)
Now, let’s try to understand the complications associated with the spinlock. Let’s say, thread T1 acquires the spinlock and enters the critical section. Meanwhile, some high priority thread T2 becomes runnable and preempts the thread T1. Now, thread T2 also tries to acquire the spinlock and since the lock is not available, T2 will spin. Now, since T2 has a higher priority, T1 won’t run ever and this in turn will result in deadlock. So, how do we avoid such scenarios? Spinlock code is designed in such a way that any time kernel code holds a spinlock, the preemption is disabled on the local processor. Therefore, its very important to hold a spinlock for minimum possible time. What if the spinlock is shared between the thread T1 and interrupt handler? For this, there is a variant of spinlock, which disables the interrupts on local processor.
Conclusion
One common thing which we observed with mutex and semaphore is that they block the process, irrespective of the operation it wants to perform on the data structure. As you understand, there are two different operations a process can perform on the data structure – read and write. In most of the cases, it is innocuous to allow multiple readers at a time as far as they don’t modify the data structure. Such a parallelism would improve the performance. So, how do we achieve this?