vservices: block: add the block protocol and drivers

Adds the virtual services block protocol and block io drivers.
This protocol and drivers support exporting virtual block devices
from one VM to another, which are backed by another block device.

Change-Id: I47f86307c6201cb66d81bd453bb8c6c7ce99480f
Signed-off-by: Carl van Schaik <carl@cog.systems>
Git-commit: 9e55989babab087627e1ae42b2ed8e656c4b3993
Git-repo: https://github.com/CogSystems/linux-msm/commits/msm-4.9-hyp
Signed-off-by: Murali Nalajala <mnalajal@codeaurora.org>
[pheragu@codeaurora: Changing bio APIs]
Signed-off-by: Prakruthi Deepak Heragu <pheragu@codeaurora.org>
Signed-off-by: Murali Nalajala <mnalajal@codeaurora.org>
This commit is contained in:
Carl van Schaik 2018-07-06 22:24:59 +10:00 committed by Murali Nalajala
parent e96bfc1377
commit b523237653
14 changed files with 5244 additions and 0 deletions

View File

@ -482,3 +482,23 @@ config BLK_DEV_RSXX
module will be called rsxx.
endif # BLK_DEV
config VSERVICES_BLOCK_SERVER
tristate "Virtual Services block server"
depends on BLOCK && VSERVICES_SUPPORT && VSERVICES_SERVER
default y
select VSERVICES_PROTOCOL_BLOCK_SERVER
help
Select this option if you want support for server side Virtual
Services block. This allows any Linux block device to be
virtualized and exported as a virtual service.
config VSERVICES_BLOCK_CLIENT
tristate "Virtual Services Block client device"
depends on BLOCK && VSERVICES_SUPPORT && VSERVICES_CLIENT
default y
select VSERVICES_PROTOCOL_BLOCK_CLIENT
help
Select this option if you want support for client side Virtual
Services block devices. The virtual block devices are typically
named /dev/vblock0, /dev/vblock1, etc.

View File

@ -44,3 +44,8 @@ null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o
skd-y := skd_main.o
swim_mod-y := swim.o swim_asm.o
obj-$(CONFIG_VSERVICES_BLOCK_SERVER) += vs_block_server.o
CFLAGS_vs_block_server.o += -Werror
obj-$(CONFIG_VSERVICES_BLOCK_CLIENT) += vs_block_client.o
CFLAGS_vs_block_client.o += -Werror

View File

@ -0,0 +1,956 @@
/*
* drivers/block/vs_block_client.c
*
* Copyright (c) 2012-2018 General Dynamics
* Copyright (c) 2014 Open Kernel Labs, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* block vservice client driver
*
* Function vs_block_client_vs_alloc() is partially derived from
* drivers/block/brd.c (brd_alloc())
*
*/
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/genhd.h>
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/version.h>
#include <linux/idr.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <vservices/buffer.h>
#include <vservices/protocol/block/types.h>
#include <vservices/protocol/block/common.h>
#include <vservices/protocol/block/client.h>
#include <vservices/service.h>
#include <vservices/session.h>
#include <vservices/wait.h>
/*
* BLK_DEF_MAX_SECTORS was replaced with the hard-coded number 1024 in 3.19,
* and restored in 4.3
*/
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0)) && \
(LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0))
#define BLK_DEF_MAX_SECTORS 1024
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
#define bio_sector(bio) (bio)->bi_iter.bi_sector
#define bio_size(bio) (bio)->bi_iter.bi_size
#else
#define bio_sector(bio) (bio)->bi_sector
#define bio_size(bio) (bio)->bi_size
#endif
#define CLIENT_BLKDEV_NAME "vblock"
#define PERDEV_MINORS 256
struct block_client;
struct vs_block_device {
/*
* The client that created this block device. A reference is held
* to the client until the block device is released, so this pointer
* should always be valid. However, the client may since have reset;
* so it should only be used if, after locking it, its blkdev pointer
* points back to this block device.
*/
struct block_client *client;
int id;
struct gendisk *disk;
struct request_queue *queue;
struct kref kref;
};
struct block_client {
struct vs_client_block_state client;
struct vs_service_device *service;
/* Tasklet & queue for bouncing buffers out of read acks */
struct tasklet_struct rx_tasklet;
struct list_head rx_queue;
struct spinlock rx_queue_lock;
/*
* The current virtual block device. This gets replaced when we do
* a reset since other parts of the kernel (e.g. vfs) may still
* be accessing the disk.
*/
struct vs_block_device *blkdev;
/* Shared work item for disk creation */
struct work_struct disk_creation_work;
struct kref kref;
};
#define state_to_block_client(state) \
container_of(state, struct block_client, client)
static int block_client_major;
/* Unique identifier allocation for virtual block devices */
static DEFINE_IDA(vs_block_ida);
static DEFINE_MUTEX(vs_block_ida_lock);
static int
block_client_vs_to_linux_error(vservice_block_block_io_error_t vs_err)
{
switch (vs_err) {
case VSERVICE_BLOCK_INVALID_INDEX:
return -EILSEQ;
case VSERVICE_BLOCK_MEDIA_FAILURE:
return -EIO;
case VSERVICE_BLOCK_MEDIA_TIMEOUT:
return -ETIMEDOUT;
case VSERVICE_BLOCK_UNSUPPORTED_COMMAND:
return -ENOTSUPP;
case VSERVICE_BLOCK_SERVICE_RESET:
return -ENXIO;
default:
WARN_ON(vs_err);
return 0;
}
return 0;
}
static void vs_block_client_kfree(struct kref *kref)
{
struct block_client *client =
container_of(kref, struct block_client, kref);
vs_put_service(client->service);
kfree(client);
}
static void vs_block_client_put(struct block_client *client)
{
kref_put(&client->kref, vs_block_client_kfree);
}
static void vs_block_device_kfree(struct kref *kref)
{
struct vs_block_device *blkdev =
container_of(kref, struct vs_block_device, kref);
/* Delete the disk and clean up its queue */
del_gendisk(blkdev->disk);
blk_cleanup_queue(blkdev->queue);
put_disk(blkdev->disk);
mutex_lock(&vs_block_ida_lock);
ida_free(&vs_block_ida, blkdev->id);
mutex_unlock(&vs_block_ida_lock);
if (blkdev->client)
vs_block_client_put(blkdev->client);
kfree(blkdev);
}
static void vs_block_device_put(struct vs_block_device *blkdev)
{
kref_put(&blkdev->kref, vs_block_device_kfree);
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
static void
#else
static int
#endif
vs_block_client_blkdev_release(struct gendisk *disk, fmode_t mode)
{
struct vs_block_device *blkdev = disk->private_data;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
if (WARN_ON(!blkdev))
return;
#else
if (WARN_ON(!blkdev))
return -ENXIO;
#endif
vs_block_device_put(blkdev);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
return 0;
#endif
}
static int vs_block_client_blkdev_open(struct block_device *bdev, fmode_t mode)
{
struct vs_block_device *blkdev = bdev->bd_disk->private_data;
struct block_client *client;
int err = -ENXIO;
if (!blkdev || !kref_get_unless_zero(&blkdev->kref))
goto fail_get_blkdev;
client = blkdev->client;
if (WARN_ON(!client))
goto fail_lock_client;
if (!vs_state_lock_safe(&client->client)) {
err = -ENODEV;
goto fail_lock_client;
}
if (blkdev != client->blkdev) {
/* The client has reset, this blkdev is no longer usable */
err = -ENXIO;
goto fail_check_client;
}
if ((mode & FMODE_WRITE) > 0 && client->client.readonly) {
dev_dbg(&client->service->dev,
"opening a readonly disk as writable\n");
err = -EROFS;
goto fail_check_client;
}
vs_state_unlock(&client->client);
return 0;
fail_check_client:
vs_state_unlock(&client->client);
fail_lock_client:
vs_block_device_put(blkdev);
fail_get_blkdev:
return err;
}
static int vs_block_client_blkdev_getgeo(struct block_device *bdev,
struct hd_geometry *geo)
{
/* These numbers are some default sane values for disk geometry. */
geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16);
geo->heads = 4;
geo->sectors = 16;
return 0;
}
/*
* Indirectly determine linux block layer sector size and ensure that our
* sector size matches.
*/
static int vs_block_client_check_sector_size(struct block_client *client,
struct bio *bio)
{
unsigned int expected_bytes;
if (unlikely(!bio_sectors(bio))) {
dev_err(&client->service->dev, "zero-length bio\n");
return -EIO;
}
expected_bytes = bio_sectors(bio) * client->client.sector_size;
if (unlikely(bio_size(bio) != expected_bytes)) {
dev_err(&client->service->dev,
"bio has %zd bytes, which is unexpected "
"for %d sectors of %zd bytes each\n",
(size_t)bio_size(bio), bio_sectors(bio),
(size_t)client->client.sector_size);
return -EIO;
}
return 0;
}
static const struct block_device_operations block_client_ops = {
.getgeo = vs_block_client_blkdev_getgeo,
.open = vs_block_client_blkdev_open,
.release = vs_block_client_blkdev_release,
.owner = THIS_MODULE,
};
static int block_client_send_write_req(struct block_client *client,
struct bio *bio)
{
struct vs_client_block_state *state = &client->client;
struct vs_mbuf *mbuf;
struct vs_pbuf pbuf;
struct bio_vec *bvec;
int err;
bool flush, nodelay, commit;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
struct bvec_iter iter;
struct bio_vec bvec_local;
#else
int i;
#endif
err = vs_block_client_check_sector_size(client, bio);
if (err < 0)
goto fail;
do {
/* Wait until it's possible to send a write request */
err = vs_wait_state_nointr(state,
vs_client_block_io_req_write_can_send(state));
if (err == -ECANCELED)
err = -ENXIO;
if (err < 0)
goto fail;
/* Wait for quota, while sending a write remains possible */
mbuf = vs_wait_alloc_nointr(state,
vs_client_block_io_req_write_can_send(state),
vs_client_block_io_alloc_req_write(
state, &pbuf, GFP_KERNEL));
err = PTR_ERR_OR_ZERO(mbuf);
/* Retry if sending is no longer possible */
} while (err == -ECANCELED);
if (err < 0)
goto fail;
vs_pbuf_resize(&pbuf, 0);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
bvec = &bvec_local;
bio_for_each_segment(bvec_local, bio, iter)
#else
bio_for_each_segment(bvec, bio, i)
#endif
{
unsigned long flags;
void *buf = bvec_kmap_irq(bvec, &flags);
flush_kernel_dcache_page(bvec->bv_page);
err = vs_pbuf_append(&pbuf, buf, bvec->bv_len);
bvec_kunmap_irq(buf, &flags);
if (err < 0) {
dev_err(&client->service->dev,
"pbuf copy failed with err %d\n", err);
err = -EIO;
goto fail_free_write;
}
}
if (unlikely(vs_pbuf_size(&pbuf) != bio_size(bio))) {
dev_err(&client->service->dev,
"pbuf size is wrong: %zd, should be %zd\n",
vs_pbuf_size(&pbuf), (size_t)bio_size(bio));
err = -EIO;
goto fail_free_write;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
flush = (bio_flags(bio) & REQ_PREFLUSH);
commit = (bio_flags(bio) & REQ_FUA);
nodelay = (bio_flags(bio) & REQ_SYNC);
#else
flush = (bio->bi_rw & REQ_FLUSH);
commit = (bio->bi_rw & REQ_FUA);
nodelay = (bio->bi_rw & REQ_SYNC);
#endif
err = vs_client_block_io_req_write(state, bio, bio_sector(bio),
bio_sectors(bio), nodelay, flush, commit, pbuf, mbuf);
if (err) {
dev_err(&client->service->dev,
"write req failed with err %d\n", err);
goto fail_free_write;
}
return 0;
fail_free_write:
vs_client_block_io_free_req_write(state, &pbuf, mbuf);
fail:
return err;
}
static int block_client_send_read_req(struct block_client *client,
struct bio *bio)
{
struct vs_client_block_state *state = &client->client;
int err;
bool flush, nodelay;
err = vs_block_client_check_sector_size(client, bio);
if (err < 0)
return err;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
flush = (bio_flags(bio) & REQ_PREFLUSH);
nodelay = (bio_flags(bio) & REQ_SYNC);
#else
flush = (bio->bi_rw & REQ_FLUSH);
nodelay = (bio->bi_rw & REQ_SYNC);
#endif
do {
/* Wait until it's possible to send a read request */
err = vs_wait_state_nointr(state,
vs_client_block_io_req_read_can_send(state));
if (err == -ECANCELED)
err = -ENXIO;
if (err < 0)
break;
/* Wait for quota, while sending a read remains possible */
err = vs_wait_send_nointr(state,
vs_client_block_io_req_read_can_send(state),
vs_client_block_io_req_read(state, bio,
bio_sector(bio), bio_sectors(bio),
nodelay, flush, GFP_KERNEL));
} while (err == -ECANCELED);
return err;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
static blk_qc_t
#else
static void
#endif
vs_block_client_make_request(struct request_queue *q, struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;
struct vs_block_device *blkdev = bdev->bd_disk->private_data;
struct block_client *client;
int err = 0;
client = blkdev->client;
if (!client || !kref_get_unless_zero(&client->kref)) {
err = -ENODEV;
goto fail_get_client;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
blk_queue_split(q, &bio, q->bio_split);
#endif
if (!vs_state_lock_safe(&client->client)) {
err = -ENODEV;
goto fail_lock_client;
}
if (client->blkdev != blkdev) {
/* Client has reset, this block device is no longer usable */
err = -EIO;
goto fail_check_client;
}
if (bio_data_dir(bio) == WRITE)
err = block_client_send_write_req(client, bio);
else
err = block_client_send_read_req(client, bio);
fail_check_client:
if (err == -ENOLINK)
err = -EIO;
else
vs_state_unlock(&client->client);
fail_lock_client:
vs_block_client_put(client);
fail_get_client:
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
if (err < 0) {
bio->bi_error = err;
bio_endio(bio);
}
#else
if (err < 0)
bio_endio(bio, err);
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
return BLK_QC_T_NONE;
#endif
}
static int vs_block_client_get_blkdev_id(struct block_client *client)
{
int id;
int ret;
retry:
ret = ida_pre_get(&vs_block_ida, GFP_KERNEL);
if (ret == 0)
return -ENOMEM;
mutex_lock(&vs_block_ida_lock);
ret = ida_alloc_range(&vs_block_ida, 0, id, GFP_KERNEL);
mutex_unlock(&vs_block_ida_lock);
if (ret == -EAGAIN)
goto retry;
return id;
}
static int vs_block_client_disk_add(struct block_client *client)
{
struct vs_block_device *blkdev;
unsigned int max_hw_sectors;
int err;
dev_dbg(&client->service->dev, "device add\n");
blkdev = kzalloc(sizeof(*blkdev), GFP_KERNEL);
if (!blkdev) {
err = -ENOMEM;
goto fail;
}
kref_init(&blkdev->kref);
blkdev->id = vs_block_client_get_blkdev_id(client);
if (blkdev->id < 0) {
err = blkdev->id;
goto fail_free_blkdev;
}
if ((blkdev->id * PERDEV_MINORS) >> MINORBITS) {
err = -ENODEV;
goto fail_remove_ida;
}
blkdev->queue = blk_alloc_queue(GFP_KERNEL);
if (!blkdev->queue) {
dev_err(&client->service->dev,
"Error initializing blk queue\n");
err = -ENOMEM;
goto fail_remove_ida;
}
blk_queue_make_request(blkdev->queue, vs_block_client_make_request);
blk_queue_bounce_limit(blkdev->queue, BLK_BOUNCE_ANY);
blk_queue_dma_alignment(blkdev->queue, 0);
/*
* Mark this as a paravirtualised device. This is just an alias
* of QUEUE_FLAG_NONROT, which prevents the I/O schedulers trying
* to wait for the disk to spin.
*/
blk_queue_flag_set(QUEUE_FLAG_VIRT, blkdev->queue);
blkdev->queue->queuedata = blkdev;
blkdev->client = client;
kref_get(&client->kref);
max_hw_sectors = min_t(sector_t, BLK_DEF_MAX_SECTORS,
client->client.segment_size /
client->client.sector_size);
blk_queue_max_hw_sectors(blkdev->queue, max_hw_sectors);
blkdev->disk = alloc_disk(PERDEV_MINORS);
if (!blkdev->disk) {
dev_err(&client->service->dev, "Error allocating disk\n");
err = -ENOMEM;
goto fail_free_blk_queue;
}
if (client->client.readonly) {
dev_dbg(&client->service->dev, "set device as readonly\n");
set_disk_ro(blkdev->disk, true);
}
blkdev->disk->major = block_client_major;
blkdev->disk->first_minor = blkdev->id * PERDEV_MINORS;
blkdev->disk->fops = &block_client_ops;
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
blkdev->disk->driverfs_dev = &client->service->dev;
#endif
blkdev->disk->private_data = blkdev;
blkdev->disk->queue = blkdev->queue;
blkdev->disk->flags |= GENHD_FL_EXT_DEVT;
/*
* The block device name is vblock<x>, where x is a unique
* identifier. Userspace should rename or symlink the device
* appropriately, typically by processing the add uevent.
*
* If a virtual block device is reset then it may re-open with a
* different identifier if something still holds a reference to
* the old device (such as a userspace application having an open
* file handle).
*/
snprintf(blkdev->disk->disk_name, sizeof(blkdev->disk->disk_name),
"%s%d", CLIENT_BLKDEV_NAME, blkdev->id);
set_capacity(blkdev->disk, client->client.device_sectors);
/*
* We need to hold a reference on blkdev across add_disk(), to make
* sure a concurrent reset does not immediately release the blkdev
* and call del_gendisk().
*/
kref_get(&blkdev->kref);
vs_service_state_lock(client->service);
if (!VSERVICE_BASE_STATE_IS_RUNNING(client->client.state.base)) {
vs_service_state_unlock(client->service);
err = -ENXIO;
goto fail_free_blk_queue;
}
client->blkdev = blkdev;
vs_service_state_unlock(client->service);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,7,0)
device_add_disk(&client->service->dev, blkdev->disk);
#else
add_disk(blkdev->disk);
#endif
dev_dbg(&client->service->dev, "added block disk '%s'\n",
blkdev->disk->disk_name);
/* Release the reference taken above. */
vs_block_device_put(blkdev);
return 0;
fail_free_blk_queue:
blk_cleanup_queue(blkdev->queue);
fail_remove_ida:
mutex_lock(&vs_block_ida_lock);
ida_free(&vs_block_ida, blkdev->id);
mutex_unlock(&vs_block_ida_lock);
fail_free_blkdev:
kfree(blkdev);
fail:
return err;
}
static void vs_block_client_disk_creation_work(struct work_struct *work)
{
struct block_client *client = container_of(work,
struct block_client, disk_creation_work);
struct vs_block_device *blkdev;
bool running;
vs_service_state_lock(client->service);
blkdev = client->blkdev;
running = VSERVICE_BASE_STATE_IS_RUNNING(client->client.state.base);
dev_dbg(&client->service->dev,
"disk changed: blkdev = %pK, running = %d\n",
client->blkdev, running);
if (!blkdev && running) {
dev_dbg(&client->service->dev, "adding block disk\n");
vs_service_state_unlock(client->service);
vs_block_client_disk_add(client);
} else {
vs_service_state_unlock(client->service);
}
}
static void vs_block_client_rx_tasklet(unsigned long data);
static struct vs_client_block_state *
vs_block_client_alloc(struct vs_service_device *service)
{
struct block_client *client;
client = kzalloc(sizeof(*client), GFP_KERNEL);
if (!client) {
dev_err(&service->dev, "Error allocating client struct\n");
return NULL;
}
vs_get_service(service);
client->service = service;
INIT_LIST_HEAD(&client->rx_queue);
spin_lock_init(&client->rx_queue_lock);
tasklet_init(&client->rx_tasklet, vs_block_client_rx_tasklet,
(unsigned long)client);
tasklet_disable(&client->rx_tasklet);
INIT_WORK(&client->disk_creation_work,
vs_block_client_disk_creation_work);
kref_init(&client->kref);
dev_dbg(&service->dev, "New block client %pK\n", client);
return &client->client;
}
static void vs_block_client_release(struct vs_client_block_state *state)
{
struct block_client *client = state_to_block_client(state);
flush_work(&client->disk_creation_work);
vs_block_client_put(client);
}
/* FIXME: Jira ticket SDK-2459 - anjaniv */
static void vs_block_client_closed(struct vs_client_block_state *state)
{
struct block_client *client = state_to_block_client(state);
/*
* Stop the RX bounce tasklet and clean up its queue. We can wait for
* it to stop safely because it doesn't need to acquire the state
* lock, only the RX lock which we acquire after it is disabled.
*/
tasklet_disable(&client->rx_tasklet);
spin_lock(&client->rx_queue_lock);
while (!list_empty(&client->rx_queue)) {
struct vs_mbuf *mbuf = list_first_entry(&client->rx_queue,
struct vs_mbuf, queue);
struct vs_pbuf pbuf;
list_del(&mbuf->queue);
vs_client_block_io_getbufs_ack_read(state, &pbuf, mbuf);
vs_client_block_io_free_ack_read(state, &pbuf, mbuf);
}
spin_unlock(&client->rx_queue_lock);
if (client->blkdev) {
struct vs_block_device *blkdev = client->blkdev;
char service_remove[] = "REMOVING_SERVICE=1";
/* + 9 because "DEVNAME=" is 8 chars plus 1 for '\0' */
char devname[sizeof(blkdev->disk->disk_name) + 9];
char *envp[] = { service_remove, devname, NULL };
dev_dbg(&client->service->dev, "removing block disk\n");
/*
* Send a change event with DEVNAME to allow the block helper
* script to remove any server sessions which use either
* v${SERVICE_NAME} or ${DEVNAME}. The remove event generated
* by the session driver doesn't include DEVNAME so the only
* way for userspace to map SERVICE_NAME to DEVNAME is by the
* symlink added when the client service was created. If that
* symlink has been deleted, there's no other way to connect
* the two names.
*/
snprintf(devname, sizeof(devname), "DEVNAME=%s",
blkdev->disk->disk_name);
kobject_uevent_env(&client->service->dev.kobj, KOBJ_CHANGE,
envp);
/*
* We are done with the device now. The block device will only
* get removed once there are no more users (e.g. userspace
* applications).
*/
client->blkdev = NULL;
vs_block_device_put(blkdev);
}
}
static void vs_block_client_opened(struct vs_client_block_state *state)
{
struct block_client *client = state_to_block_client(state);
#if !defined(CONFIG_LBDAF) && !defined(CONFIG_64BIT)
if (state->device_sectors >> (sizeof(sector_t) * 8)) {
dev_err(&client->service->dev,
"Client doesn't support full capacity large block devices\n");
vs_client_block_close(state);
return;
}
#endif
/* Unblock the RX bounce tasklet. */
tasklet_enable(&client->rx_tasklet);
/*
* The block device allocation needs to sleep, so we defer it to a
* work queue.
*/
queue_work(client->service->work_queue, &client->disk_creation_work);
}
static int vs_block_client_ack_read(struct vs_client_block_state *state,
void *tag, struct vs_pbuf pbuf, struct vs_mbuf *mbuf)
{
struct block_client *client = state_to_block_client(state);
struct bio *bio = tag;
struct bio_vec *bvec;
int err = 0;
size_t bytes_read = 0;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
struct bio_vec bvec_local;
struct bvec_iter iter;
#else
int i;
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 14, 0)
bvec = &bvec_local;
bio_for_each_segment(bvec_local, bio, iter)
#else
bio_for_each_segment(bvec, bio, i)
#endif
{
unsigned long flags;
void *buf;
if (vs_pbuf_size(&pbuf) < bytes_read + bvec->bv_len) {
dev_err(&client->service->dev,
"bio read overrun: %zu into %zu byte response, but need %zd bytes\n",
bytes_read, vs_pbuf_size(&pbuf),
(size_t)bvec->bv_len);
err = -EIO;
break;
}
buf = bvec_kmap_irq(bvec, &flags);
memcpy(buf, vs_pbuf_data(&pbuf) + bytes_read, bvec->bv_len);
flush_kernel_dcache_page(bvec->bv_page);
bvec_kunmap_irq(buf, &flags);
bytes_read += bvec->bv_len;
}
vs_client_block_io_free_ack_read(state, &pbuf, mbuf);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
if (err < 0)
bio->bi_error = err;
bio_endio(bio);
#else
bio_endio(bio, err);
#endif
return 0;
}
static void vs_block_client_rx_tasklet(unsigned long data)
{
struct block_client *client = (struct block_client *)data;
struct vs_mbuf *mbuf;
struct vs_pbuf pbuf;
spin_lock(&client->rx_queue_lock);
/* The list shouldn't be empty. */
if (WARN_ON(list_empty(&client->rx_queue))) {
spin_unlock(&client->rx_queue_lock);
return;
}
/* Get the next mbuf, and reschedule ourselves if there are more. */
mbuf = list_first_entry(&client->rx_queue, struct vs_mbuf, queue);
list_del(&mbuf->queue);
if (!list_empty(&client->rx_queue))
tasklet_schedule(&client->rx_tasklet);
spin_unlock(&client->rx_queue_lock);
/* Process the ack. */
vs_client_block_io_getbufs_ack_read(&client->client, &pbuf, mbuf);
vs_block_client_ack_read(&client->client, mbuf->priv, pbuf, mbuf);
}
static int vs_block_client_queue_ack_read(struct vs_client_block_state *state,
void *tag, struct vs_pbuf pbuf, struct vs_mbuf *mbuf)
{
struct block_client *client = state_to_block_client(state);
spin_lock(&client->rx_queue_lock);
list_add_tail(&mbuf->queue, &client->rx_queue);
mbuf->priv = tag;
spin_unlock(&client->rx_queue_lock);
tasklet_schedule(&client->rx_tasklet);
wake_up(&state->service->quota_wq);
return 0;
}
static int vs_block_client_ack_write(struct vs_client_block_state *state,
void *tag)
{
struct bio *bio = tag;
if (WARN_ON(!bio))
return -EPROTO;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
bio_endio(bio);
#else
bio_endio(bio, 0);
#endif
wake_up(&state->service->quota_wq);
return 0;
}
static int vs_block_client_nack_io(struct vs_client_block_state *state,
void *tag, vservice_block_block_io_error_t err)
{
struct bio *bio = tag;
if (WARN_ON(!bio))
return -EPROTO;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
bio->bi_error = block_client_vs_to_linux_error(err);
bio_endio(bio);
#else
bio_endio(bio, block_client_vs_to_linux_error(err));
#endif
wake_up(&state->service->quota_wq);
return 0;
}
static struct vs_client_block block_client_driver = {
.rx_atomic = true,
.alloc = vs_block_client_alloc,
.release = vs_block_client_release,
.opened = vs_block_client_opened,
.closed = vs_block_client_closed,
.io = {
.ack_read = vs_block_client_queue_ack_read,
.nack_read = vs_block_client_nack_io,
.ack_write = vs_block_client_ack_write,
.nack_write = vs_block_client_nack_io,
}
};
static int __init vs_block_client_init(void)
{
int err;
block_client_major = register_blkdev(0, CLIENT_BLKDEV_NAME);
if (block_client_major < 0) {
pr_err("Err registering blkdev\n");
err = -ENOMEM;
goto fail;
}
err = vservice_block_client_register(&block_client_driver,
"block_client_driver");
if (err)
goto fail_unregister_blkdev;
return 0;
fail_unregister_blkdev:
unregister_blkdev(block_client_major, CLIENT_BLKDEV_NAME);
fail:
return err;
}
static void __exit vs_block_client_exit(void)
{
vservice_block_client_unregister(&block_client_driver);
unregister_blkdev(block_client_major, CLIENT_BLKDEV_NAME);
}
module_init(vs_block_client_init);
module_exit(vs_block_client_exit);
MODULE_DESCRIPTION("OKL4 Virtual Services Block Client Driver");
MODULE_AUTHOR("Open Kernel Labs, Inc");

File diff suppressed because it is too large Load Diff

View File

@ -5,6 +5,23 @@
if VSERVICES_SERVER || VSERVICES_CLIENT
menu "Protocol drivers"
config VSERVICES_PROTOCOL_BLOCK
bool
config VSERVICES_PROTOCOL_BLOCK_SERVER
tristate "Block server protocol"
depends on VSERVICES_SUPPORT && VSERVICES_SERVER
select VSERVICES_PROTOCOL_BLOCK
help
This option adds support for Virtual Services block protocol server.
config VSERVICES_PROTOCOL_BLOCK_CLIENT
tristate "Block client protocol"
depends on VSERVICES_SUPPORT && VSERVICES_CLIENT
select VSERVICES_PROTOCOL_BLOCK
help
This option adds support for Virtual Services block protocol client.
config VSERVICES_PROTOCOL_SERIAL
bool

View File

@ -1,4 +1,5 @@
# This is a autogenerated Makefile for vservice-linux-stacks
obj-$(CONFIG_VSERVICES_SUPPORT) += core/
obj-$(CONFIG_VSERVICES_PROTOCOL_BLOCK) += block/
obj-$(CONFIG_VSERVICES_PROTOCOL_SERIAL) += serial/

View File

@ -0,0 +1,7 @@
ccflags-y += -Werror
obj-$(CONFIG_VSERVICES_PROTOCOL_BLOCK_SERVER) += vservices_protocol_block_server.o
vservices_protocol_block_server-objs = server.o
obj-$(CONFIG_VSERVICES_PROTOCOL_BLOCK_CLIENT) += vservices_protocol_block_client.o
vservices_protocol_block_client-objs = client.o

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
header-y += types.h

View File

@ -0,0 +1,175 @@
/*
* Copyright (c) 2012-2018 General Dynamics
* Copyright (c) 2014 Open Kernel Labs, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#if !defined(__VSERVICES_CLIENT_BLOCK__)
#define __VSERVICES_CLIENT_BLOCK__
struct vs_service_device;
struct vs_client_block_state;
struct vs_client_block {
/*
* If set to false then the receive message handlers are run from
* workqueue context and are allowed to sleep. If set to true the
* message handlers are run from tasklet context and may not sleep.
*/
bool rx_atomic;
/*
* If this is set to true along with rx_atomic, the driver is allowed
* to send messages from softirq contexts other than the receive
* message handlers, after calling vs_service_state_lock_bh. Otherwise,
* messages may only be sent from the receive message handlers, or
* from task context after calling vs_service_state_lock. This must
* not be set to true if rx_atomic is set to false.
*/
bool tx_atomic;
/** session setup **/
struct vs_client_block_state *(*alloc) (struct vs_service_device *
service);
void (*release) (struct vs_client_block_state * _state);
struct vs_service_driver *driver;
/** Opened, reopened and closed functions **/
void (*opened) (struct vs_client_block_state * _state);
void (*reopened) (struct vs_client_block_state * _state);
void (*closed) (struct vs_client_block_state * _state);
/** Send/receive state callbacks **/
int (*tx_ready) (struct vs_client_block_state * _state);
struct {
int (*ack_read) (struct vs_client_block_state * _state,
void *_opaque, struct vs_pbuf data,
struct vs_mbuf * _mbuf);
int (*nack_read) (struct vs_client_block_state * _state,
void *_opaque,
vservice_block_block_io_error_t err);
int (*ack_write) (struct vs_client_block_state * _state,
void *_opaque);
int (*nack_write) (struct vs_client_block_state * _state,
void *_opaque,
vservice_block_block_io_error_t err);
} io;
};
struct vs_client_block_state {
vservice_block_state_t state;
bool readonly;
uint32_t sector_size;
uint32_t segment_size;
uint64_t device_sectors;
bool flushable;
bool committable;
struct {
uint32_t sector_size;
uint32_t segment_size;
} io;
struct vs_service_device *service;
bool released;
};
extern int vs_client_block_reopen(struct vs_client_block_state *_state);
extern int vs_client_block_close(struct vs_client_block_state *_state);
/** interface block_io **/
/* command parallel read */
extern int vs_client_block_io_getbufs_ack_read(struct vs_client_block_state
*_state, struct vs_pbuf *data,
struct vs_mbuf *_mbuf);
extern int vs_client_block_io_free_ack_read(struct vs_client_block_state
*_state, struct vs_pbuf *data,
struct vs_mbuf *_mbuf);
extern int vs_client_block_io_req_read(struct vs_client_block_state *_state,
void *_opaque, uint64_t sector_index,
uint32_t num_sects, bool nodelay,
bool flush, gfp_t flags);
/* command parallel write */
extern struct vs_mbuf *vs_client_block_io_alloc_req_write(struct
vs_client_block_state
*_state,
struct vs_pbuf *data,
gfp_t flags);
extern int vs_client_block_io_free_req_write(struct vs_client_block_state
*_state, struct vs_pbuf *data,
struct vs_mbuf *_mbuf);
extern int vs_client_block_io_req_write(struct vs_client_block_state *_state,
void *_opaque, uint64_t sector_index,
uint32_t num_sects, bool nodelay,
bool flush, bool commit,
struct vs_pbuf data,
struct vs_mbuf *_mbuf);
/* Status APIs for async parallel commands */
static inline bool vs_client_block_io_req_read_can_send(struct
vs_client_block_state
*_state)
{
return !bitmap_full(_state->state.io.read_bitmask,
VSERVICE_BLOCK_IO_READ_MAX_PENDING);
}
static inline bool vs_client_block_io_req_read_is_pending(struct
vs_client_block_state
*_state)
{
return !bitmap_empty(_state->state.io.read_bitmask,
VSERVICE_BLOCK_IO_READ_MAX_PENDING);
}
static inline bool vs_client_block_io_req_write_can_send(struct
vs_client_block_state
*_state)
{
return !bitmap_full(_state->state.io.write_bitmask,
VSERVICE_BLOCK_IO_WRITE_MAX_PENDING);
}
static inline bool vs_client_block_io_req_write_is_pending(struct
vs_client_block_state
*_state)
{
return !bitmap_empty(_state->state.io.write_bitmask,
VSERVICE_BLOCK_IO_WRITE_MAX_PENDING);
}
/** Module registration **/
struct module;
extern int __vservice_block_client_register(struct vs_client_block *client,
const char *name,
struct module *owner);
static inline int vservice_block_client_register(struct vs_client_block *client,
const char *name)
{
#ifdef MODULE
extern struct module __this_module;
struct module *this_module = &__this_module;
#else
struct module *this_module = NULL;
#endif
return __vservice_block_client_register(client, name, this_module);
}
extern int vservice_block_client_unregister(struct vs_client_block *client);
#endif /* ! __VSERVICES_CLIENT_BLOCK__ */

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2012-2018 General Dynamics
* Copyright (c) 2014 Open Kernel Labs, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#if !defined(__VSERVICES_BLOCK_PROTOCOL_H__)
#define __VSERVICES_BLOCK_PROTOCOL_H__
#define VSERVICE_BLOCK_PROTOCOL_NAME "com.ok-labs.block"
typedef enum {
VSERVICE_BLOCK_BASE_REQ_OPEN,
VSERVICE_BLOCK_BASE_ACK_OPEN,
VSERVICE_BLOCK_BASE_NACK_OPEN,
VSERVICE_BLOCK_BASE_REQ_CLOSE,
VSERVICE_BLOCK_BASE_ACK_CLOSE,
VSERVICE_BLOCK_BASE_NACK_CLOSE,
VSERVICE_BLOCK_BASE_REQ_REOPEN,
VSERVICE_BLOCK_BASE_ACK_REOPEN,
VSERVICE_BLOCK_BASE_NACK_REOPEN,
VSERVICE_BLOCK_BASE_MSG_RESET,
VSERVICE_BLOCK_IO_REQ_READ,
VSERVICE_BLOCK_IO_ACK_READ,
VSERVICE_BLOCK_IO_NACK_READ,
VSERVICE_BLOCK_IO_REQ_WRITE,
VSERVICE_BLOCK_IO_ACK_WRITE,
VSERVICE_BLOCK_IO_NACK_WRITE,
} vservice_block_message_id_t;
typedef enum {
VSERVICE_BLOCK_NBIT_IN__COUNT
} vservice_block_nbit_in_t;
typedef enum {
VSERVICE_BLOCK_NBIT_OUT__COUNT
} vservice_block_nbit_out_t;
/* Notification mask macros */
#endif /* ! __VSERVICES_BLOCK_PROTOCOL_H__ */

View File

@ -0,0 +1,177 @@
/*
* Copyright (c) 2012-2018 General Dynamics
* Copyright (c) 2014 Open Kernel Labs, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#if !defined(VSERVICES_SERVER_BLOCK)
#define VSERVICES_SERVER_BLOCK
struct vs_service_device;
struct vs_server_block_state;
struct vs_server_block {
/*
* If set to false then the receive message handlers are run from
* workqueue context and are allowed to sleep. If set to true the
* message handlers are run from tasklet context and may not sleep.
*/
bool rx_atomic;
/*
* If this is set to true along with rx_atomic, the driver is allowed
* to send messages from softirq contexts other than the receive
* message handlers, after calling vs_service_state_lock_bh. Otherwise,
* messages may only be sent from the receive message handlers, or
* from task context after calling vs_service_state_lock. This must
* not be set to true if rx_atomic is set to false.
*/
bool tx_atomic;
/*
* These are the driver's recommended message quotas. They are used
* by the core service to select message quotas for services with no
* explicitly configured quotas.
*/
u32 in_quota_best;
u32 out_quota_best;
/** session setup **/
struct vs_server_block_state *(*alloc) (struct vs_service_device *
service);
void (*release) (struct vs_server_block_state * _state);
struct vs_service_driver *driver;
/** Open, reopen, close and closed functions **/
vs_server_response_type_t(*open) (struct vs_server_block_state *
_state);
vs_server_response_type_t(*reopen) (struct vs_server_block_state *
_state);
vs_server_response_type_t(*close) (struct vs_server_block_state *
_state);
void (*closed) (struct vs_server_block_state * _state);
/** Send/receive state callbacks **/
int (*tx_ready) (struct vs_server_block_state * _state);
struct {
int (*req_read) (struct vs_server_block_state * _state,
uint32_t _opaque, uint64_t sector_index,
uint32_t num_sects, bool nodelay, bool flush);
int (*req_write) (struct vs_server_block_state * _state,
uint32_t _opaque, uint64_t sector_index,
uint32_t num_sects, bool nodelay, bool flush,
bool commit, struct vs_pbuf data,
struct vs_mbuf * _mbuf);
} io;
};
struct vs_server_block_state {
vservice_block_state_t state;
bool readonly;
uint32_t sector_size;
uint32_t segment_size;
uint64_t device_sectors;
bool flushable;
bool committable;
struct {
uint32_t sector_size;
uint32_t segment_size;
} io;
struct vs_service_device *service;
bool released;
};
/** Complete calls for server core functions **/
extern int vs_server_block_open_complete(struct vs_server_block_state *_state,
vs_server_response_type_t resp);
extern int vs_server_block_close_complete(struct vs_server_block_state *_state,
vs_server_response_type_t resp);
extern int vs_server_block_reopen_complete(struct vs_server_block_state *_state,
vs_server_response_type_t resp);
/** interface block_io **/
/* command parallel read */
extern struct vs_mbuf *vs_server_block_io_alloc_ack_read(struct
vs_server_block_state
*_state,
struct vs_pbuf *data,
gfp_t flags);
extern int vs_server_block_io_free_ack_read(struct vs_server_block_state
*_state, struct vs_pbuf *data,
struct vs_mbuf *_mbuf);
extern int vs_server_block_io_send_ack_read(struct vs_server_block_state
*_state, uint32_t _opaque,
struct vs_pbuf data,
struct vs_mbuf *_mbuf);
extern int vs_server_block_io_send_nack_read(struct vs_server_block_state
*_state, uint32_t _opaque,
vservice_block_block_io_error_t
err, gfp_t flags);
/* command parallel write */
extern int vs_server_block_io_getbufs_req_write(struct vs_server_block_state
*_state, struct vs_pbuf *data,
struct vs_mbuf *_mbuf);
extern int vs_server_block_io_free_req_write(struct vs_server_block_state
*_state, struct vs_pbuf *data,
struct vs_mbuf *_mbuf);
extern int vs_server_block_io_send_ack_write(struct vs_server_block_state
*_state, uint32_t _opaque,
gfp_t flags);
extern int vs_server_block_io_send_nack_write(struct vs_server_block_state
*_state, uint32_t _opaque,
vservice_block_block_io_error_t
err, gfp_t flags);
static inline bool vs_server_block_io_send_ack_read_is_pending(struct
vs_server_block_state
*_state)
{
return !bitmap_empty(_state->state.io.read_bitmask,
VSERVICE_BLOCK_IO_READ_MAX_PENDING);
}
static inline bool vs_server_block_io_send_ack_write_is_pending(struct
vs_server_block_state
*_state)
{
return !bitmap_empty(_state->state.io.write_bitmask,
VSERVICE_BLOCK_IO_WRITE_MAX_PENDING);
}
/** Module registration **/
struct module;
extern int __vservice_block_server_register(struct vs_server_block *server,
const char *name,
struct module *owner);
static inline int vservice_block_server_register(struct vs_server_block *server,
const char *name)
{
#ifdef MODULE
extern struct module __this_module;
struct module *this_module = &__this_module;
#else
struct module *this_module = NULL;
#endif
return __vservice_block_server_register(server, name, this_module);
}
extern int vservice_block_server_unregister(struct vs_server_block *server);
#endif /* ! VSERVICES_SERVER_BLOCK */

View File

@ -0,0 +1,106 @@
/*
* Copyright (c) 2012-2018 General Dynamics
* Copyright (c) 2014 Open Kernel Labs, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#if !defined(VSERVICES_BLOCK_TYPES_H)
#define VSERVICES_BLOCK_TYPES_H
#define VSERVICE_BLOCK_IO_READ_MAX_PENDING 1024
#define VSERVICE_BLOCK_IO_WRITE_MAX_PENDING 1024
typedef enum vservice_block_block_io_error {
VSERVICE_BLOCK_INVALID_INDEX,
VSERVICE_BLOCK_MEDIA_FAILURE,
VSERVICE_BLOCK_MEDIA_TIMEOUT,
VSERVICE_BLOCK_UNSUPPORTED_COMMAND,
VSERVICE_BLOCK_SERVICE_RESET
} vservice_block_block_io_error_t;
typedef enum {
/* state closed */
VSERVICE_BASE_STATE_CLOSED = 0,
VSERVICE_BASE_STATE_CLOSED__OPEN,
VSERVICE_BASE_STATE_CLOSED__CLOSE,
VSERVICE_BASE_STATE_CLOSED__REOPEN,
/* state running */
VSERVICE_BASE_STATE_RUNNING,
VSERVICE_BASE_STATE_RUNNING__OPEN,
VSERVICE_BASE_STATE_RUNNING__CLOSE,
VSERVICE_BASE_STATE_RUNNING__REOPEN,
VSERVICE_BASE__RESET = VSERVICE_BASE_STATE_CLOSED
} vservice_base_statenum_t;
typedef struct {
vservice_base_statenum_t statenum;
} vservice_base_state_t;
#define VSERVICE_BASE_RESET_STATE (vservice_base_state_t) { \
.statenum = VSERVICE_BASE__RESET}
#define VSERVICE_BASE_STATE_IS_CLOSED(state) (\
((state).statenum == VSERVICE_BASE_STATE_CLOSED) || \
((state).statenum == VSERVICE_BASE_STATE_CLOSED__OPEN) || \
((state).statenum == VSERVICE_BASE_STATE_CLOSED__CLOSE) || \
((state).statenum == VSERVICE_BASE_STATE_CLOSED__REOPEN))
#define VSERVICE_BASE_STATE_IS_RUNNING(state) (\
((state).statenum == VSERVICE_BASE_STATE_RUNNING) || \
((state).statenum == VSERVICE_BASE_STATE_RUNNING__OPEN) || \
((state).statenum == VSERVICE_BASE_STATE_RUNNING__CLOSE) || \
((state).statenum == VSERVICE_BASE_STATE_RUNNING__REOPEN))
#define VSERVICE_BASE_STATE_VALID(state) ( \
VSERVICE_BASE_STATE_IS_CLOSED(state) ? true : \
VSERVICE_BASE_STATE_IS_RUNNING(state) ? true : \
false)
static inline const char *vservice_base_get_state_string(vservice_base_state_t
state)
{
static const char *names[] =
{ "closed", "closed__open", "closed__close", "closed__reopen",
"running", "running__open", "running__close", "running__reopen"
};
if (!VSERVICE_BASE_STATE_VALID(state)) {
return "INVALID";
}
return names[state.statenum];
}
typedef struct {
DECLARE_BITMAP(read_bitmask, VSERVICE_BLOCK_IO_READ_MAX_PENDING);
void *read_tags[VSERVICE_BLOCK_IO_READ_MAX_PENDING];
DECLARE_BITMAP(write_bitmask, VSERVICE_BLOCK_IO_WRITE_MAX_PENDING);
void *write_tags[VSERVICE_BLOCK_IO_WRITE_MAX_PENDING];
} vservice_block_io_state_t;
#define VSERVICE_BLOCK_IO_RESET_STATE (vservice_block_io_state_t) { \
.read_bitmask = {0}, \
.read_tags = {NULL}, \
.write_bitmask = {0}, \
.write_tags = {NULL}}
#define VSERVICE_BLOCK_IO_STATE_VALID(state) true
typedef struct {
vservice_base_state_t base;
vservice_block_io_state_t io;
} vservice_block_state_t;
#define VSERVICE_BLOCK_RESET_STATE (vservice_block_state_t) {\
.base = VSERVICE_BASE_RESET_STATE,\
.io = VSERVICE_BLOCK_IO_RESET_STATE }
#define VSERVICE_BLOCK_IS_STATE_RESET(state) \
((state).base.statenum == VSERVICE_BASE__RESET)
#endif /* ! VSERVICES_BLOCK_TYPES_H */