diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 19a418a1b63125..f3087846acd48e 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -34,6 +34,20 @@ config INFINIBAND_USER_ACCESS libibverbs, libibcm and a hardware driver library from . +config INFINIBAND_EXP_USER_ACCESS + bool "Allow experimental support for Infiniband ABI" + depends on INFINIBAND_USER_ACCESS + ---help--- + IOCTL based ABI support for Infiniband. This allows userspace + to invoke the experimental IOCTL based ABI. + +config INFINIBAND_USE_IOCTL_BACKWARD_COMP + bool "Use IOCTL parsing for write commands" + depends on INFINIBAND_USER_ACCESS + ---help--- + Transform supported write commands to IOCTL commands and + execute them. Serialize the response back to the write schema. + config INFINIBAND_USER_MEM bool depends on INFINIBAND_USER_ACCESS != n diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index edaae9f9853c73..131ea4b1475816 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -28,4 +28,6 @@ ib_umad-y := user_mad.o ib_ucm-y := ucm.o -ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o +ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ + rdma_core.o uverbs_ioctl_cmd.o uverbs_ioctl.o \ + uverbs_ioctl_merge.o diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 19d499dcab764b..fccc7bccc66a80 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -153,4 +153,18 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, int ib_nl_handle_ip_res_resp(struct sk_buff *skb, struct netlink_callback *cb); +/* Remove ignored fields set in the attribute mask */ +static inline int modify_qp_mask(enum ib_qp_type qp_type, int mask) +{ + switch (qp_type) { + case IB_QPT_XRC_INI: + return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); + case IB_QPT_XRC_TGT: + return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY); + default: + return mask; + } +} + #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 760ef603a46840..c3b68f54238676 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -168,11 +168,23 @@ static int alloc_name(char *name) return 0; } +static void ib_device_allocate_idrs(struct ib_device *device) +{ + spin_lock_init(&device->idr_lock); + idr_init(&device->idr); +} + +static void ib_device_destroy_idrs(struct ib_device *device) +{ + idr_destroy(&device->idr); +} + static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); ib_cache_release_one(dev); + ib_device_destroy_idrs(dev); kfree(dev->port_immutable); kfree(dev); } @@ -219,6 +231,8 @@ struct ib_device *ib_alloc_device(size_t size) if (!device) return NULL; + ib_device_allocate_idrs(device); + device->dev.class = &ib_class; device_initialize(&device->dev); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c new file mode 100644 index 00000000000000..a9ea010a5a1355 --- /dev/null +++ b/drivers/infiniband/core/rdma_core.c @@ -0,0 +1,576 @@ +/* + * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "uverbs.h" +#include "rdma_core.h" + +int uverbs_group_idx(u16 *id, unsigned int ngroups) +{ + int ret = (*id & UVERBS_ID_RESERVED_MASK) >> UVERBS_ID_RESERVED_SHIFT; + + if (ret >= ngroups) + return -EINVAL; + + *id &= ~UVERBS_ID_RESERVED_MASK; + return ret; +} + +const struct uverbs_type *uverbs_get_type(const struct ib_device *ibdev, + uint16_t type) +{ + const struct uverbs_root *groups = ibdev->specs_root; + const struct uverbs_type_group *types; + int ret = uverbs_group_idx(&type, groups->num_groups); + + if (ret < 0) + return NULL; + + types = groups->type_groups[ret]; + + if (type >= types->num_types) + return NULL; + + return types->types[type]; +} + +const struct uverbs_action *uverbs_get_action(const struct uverbs_type *type, + uint16_t action) +{ + const struct uverbs_action_group *action_group; + int ret = uverbs_group_idx(&action, type->num_groups); + + if (ret < 0) + return NULL; + + action_group = type->action_groups[ret]; + if (action >= action_group->num_actions) + return NULL; + + return action_group->actions[action]; +} + +static int uverbs_lock_object(struct ib_uobject *uobj, + enum uverbs_idr_access access) +{ + if (access == UVERBS_ACCESS_READ) + return down_read_trylock(&uobj->usecnt) == 1 ? 0 : -EBUSY; + + /* lock is either WRITE or DESTROY - should be exclusive */ + return down_write_trylock(&uobj->usecnt) == 1 ? 0 : -EBUSY; +} + +static struct ib_uobject *get_uobj_rcu(int id, struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), + "uverbs: get_uobj_rcu wasn't called in a rcu_read_lock()!"); + /* object won't be released as we're protected in rcu */ + uobj = idr_find(&context->device->idr, id); + if (uobj) { + if (uobj->context != context) + uobj = NULL; + } + + return uobj; +} + +bool uverbs_is_live(struct ib_uobject *uobj) +{ + return uobj == get_uobj_rcu(uobj->id, uobj->context); +} + +struct ib_ucontext_lock { + struct kref ref; + /* locking the uobjects_list */ + struct mutex lock; +}; + +static void init_uobjects_list_lock(struct ib_ucontext_lock *lock) +{ + mutex_init(&lock->lock); + kref_init(&lock->ref); +} + +static void release_uobjects_list_lock(struct kref *ref) +{ + struct ib_ucontext_lock *lock = container_of(ref, + struct ib_ucontext_lock, + ref); + + kfree(lock); +} + +static void init_uobj(struct ib_uobject *uobj, struct ib_ucontext *context) +{ + init_rwsem(&uobj->usecnt); + uobj->context = context; +} + +static int add_uobj(struct ib_uobject *uobj) +{ + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&uobj->context->device->idr_lock); + + /* + * We start with allocating an idr pointing to NULL. This represents an + * object which isn't initialized yet. We'll replace it later on with + * the real object once we commit. + */ + ret = idr_alloc(&uobj->context->device->idr, NULL, 0, 0, GFP_NOWAIT); + if (ret >= 0) + uobj->id = ret; + + spin_unlock(&uobj->context->device->idr_lock); + idr_preload_end(); + + return ret < 0 ? ret : 0; +} + +static void remove_uobj(struct ib_uobject *uobj) +{ + spin_lock(&uobj->context->device->idr_lock); + idr_remove(&uobj->context->device->idr, uobj->id); + spin_unlock(&uobj->context->device->idr_lock); +} + +static void put_uobj(struct ib_uobject *uobj) +{ + /* + * When we destroy an object, we first just lock it for WRITE and + * actually DESTROY it in the finalize stage. So, the problematic + * scenario is when we just stared the finalize stage of the + * destruction (nothing was executed yet). Now, the other thread + * fetched the object for READ access, but it didn't lock it yet. + * The DESTROY thread continues and starts destroying the object. + * When the other thread continue - without the RCU, it would + * access freed memory. However, the rcu_read_lock delays the free + * until the rcu_read_lock of the READ operation quits. Since the + * write lock of the object is still taken by the DESTROY flow, the + * READ operation will get -EBUSY and it'll just bail out. + */ + kfree_rcu(uobj, rcu); +} + +/* + * Returns the ib_uobject, NULL if the requested object isn't found or an error. + * The caller should check for IS_ERR_OR_NULL. + */ +static struct ib_uobject *get_uobject_from_context(struct ib_ucontext *ucontext, + const struct uverbs_type_alloc_action *type, + u32 idr, + enum uverbs_idr_access access) +{ + struct ib_uobject *uobj; + int ret; + + rcu_read_lock(); + uobj = get_uobj_rcu(idr, ucontext); + if (!uobj) + goto free; + + if (uobj->type != type) { + uobj = NULL; + goto free; + } + + ret = uverbs_lock_object(uobj, access); + if (ret) + uobj = ERR_PTR(ret); +free: + rcu_read_unlock(); + return uobj; +} + +static struct ib_uobject *uverbs_get_uobject_from_idr(const struct uverbs_type_alloc_action *type_alloc, + struct ib_ucontext *ucontext, + enum uverbs_idr_access access, + uint32_t idr) +{ + struct ib_uobject *uobj; + int ret; + + if (access == UVERBS_ACCESS_NEW) { + uobj = kmalloc(type_alloc->obj_size, GFP_KERNEL); + if (!uobj) + return ERR_PTR(-ENOMEM); + + init_uobj(uobj, ucontext); + + uobj->type = type_alloc; + ret = add_uobj(uobj); + if (ret) { + kfree(uobj); + return ERR_PTR(ret); + } + + } else { + uobj = get_uobject_from_context(ucontext, type_alloc, idr, + access); + + if (IS_ERR_OR_NULL(uobj)) + return ERR_PTR(-ENOENT); + } + + return uobj; +} + +static struct ib_uobject *uverbs_priv_fd_to_uobject(void *priv) +{ + return priv - sizeof(struct ib_uobject); +} + +static struct ib_uobject *uverbs_get_uobject_from_fd(const struct uverbs_type_alloc_action *type_alloc, + struct ib_ucontext *ucontext, + enum uverbs_idr_access access, + unsigned int fd) +{ + if (access == UVERBS_ACCESS_NEW) { + int _fd; + struct ib_uobject *uobj = NULL; + struct file *filp; + + _fd = get_unused_fd_flags(O_CLOEXEC); + if (_fd < 0) + return ERR_PTR(_fd); + + uobj = kmalloc(type_alloc->obj_size, GFP_KERNEL); + if (!uobj) { + put_unused_fd(_fd); + return ERR_PTR(-ENOMEM); + } + + init_uobj(uobj, ucontext); + filp = anon_inode_getfile(type_alloc->fd.name, + type_alloc->fd.fops, + uverbs_fd_uobj_to_priv(uobj), + type_alloc->fd.flags); + if (IS_ERR(filp)) { + put_unused_fd(_fd); + kfree(uobj); + return (void *)filp; + } + + /* + * user_handle should be filled by the user, + * the list is filled in the commit operation. + */ + uobj->type = type_alloc; + uobj->id = _fd; + uobj->object = filp; + + return uobj; + } else if (access == UVERBS_ACCESS_READ) { + struct file *f = fget(fd); + struct ib_uobject *uobject; + + if (!f) + return ERR_PTR(-EBADF); + + uobject = uverbs_priv_fd_to_uobject(f->private_data); + if (f->f_op != type_alloc->fd.fops || + !uobject->context) { + fput(f); + return ERR_PTR(-EBADF); + } + + /* + * No need to protect it with a ref count, as fget increases + * f_count. + */ + return uobject; + } else { + return ERR_PTR(-EOPNOTSUPP); + } +} + +struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_type_alloc_action *type_alloc, + struct ib_ucontext *ucontext, + enum uverbs_idr_access access, + unsigned int id) +{ + if (type_alloc->type == UVERBS_ATTR_TYPE_IDR) + return uverbs_get_uobject_from_idr(type_alloc, ucontext, access, + id); + else + return uverbs_get_uobject_from_fd(type_alloc, ucontext, access, + id); +} + +static void ib_uverbs_uobject_add(struct ib_uobject *uobject) +{ + mutex_lock(&uobject->context->uobjects_lock->lock); + list_add(&uobject->list, &uobject->context->uobjects); + mutex_unlock(&uobject->context->uobjects_lock->lock); +} + +static void ib_uverbs_uobject_remove(struct ib_uobject *uobject, bool lock) +{ + /* + * Calling remove requires exclusive access, so it's not possible + * another thread will use our object since the function is called + * with exclusive access. + */ + remove_uobj(uobject); + if (lock) + mutex_lock(&uobject->context->uobjects_lock->lock); + list_del(&uobject->list); + if (lock) + mutex_unlock(&uobject->context->uobjects_lock->lock); + put_uobj(uobject); +} + +static void uverbs_finalize_idr(struct ib_uobject *uobj, + enum uverbs_idr_access access, + bool commit) +{ + switch (access) { + case UVERBS_ACCESS_READ: + up_read(&uobj->usecnt); + break; + case UVERBS_ACCESS_NEW: + if (commit) { + ib_uverbs_uobject_add(uobj); + spin_lock(&uobj->context->device->idr_lock); + /* + * We already allocated this IDR with a NULL object, so + * this shouldn't fail. + */ + WARN_ON(idr_replace(&uobj->context->device->idr, + uobj, uobj->id)); + spin_unlock(&uobj->context->device->idr_lock); + } else { + remove_uobj(uobj); + put_uobj(uobj); + } + break; + case UVERBS_ACCESS_WRITE: + up_write(&uobj->usecnt); + break; + case UVERBS_ACCESS_DESTROY: + if (commit) + ib_uverbs_uobject_remove(uobj, true); + else + up_write(&uobj->usecnt); + break; + } +} + +static void uverbs_finalize_fd(struct ib_uobject *uobj, + enum uverbs_idr_access access, + bool commit) +{ + struct file *filp = uobj->object; + + if (access == UVERBS_ACCESS_NEW) { + if (commit) { + uobj->uobjects_lock = uobj->context->uobjects_lock; + kref_get(&uobj->uobjects_lock->ref); + ib_uverbs_uobject_add(uobj); + fd_install(uobj->id, uobj->object); + } else { + /* Unsuccessful NEW */ + fput(filp); + put_unused_fd(uobj->id); + kfree(uobj); + } + } else { + fput(filp); + } +} + +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_idr_access access, + bool commit) +{ + if (uobj->type->type == UVERBS_ATTR_TYPE_IDR) + uverbs_finalize_idr(uobj, access, commit); + else if (uobj->type->type == UVERBS_ATTR_TYPE_FD) + uverbs_finalize_fd(uobj, access, commit); + else + WARN_ON(true); +} + +static void ib_uverbs_remove_fd(struct ib_uobject *uobject) +{ + /* + * user should release the uobject in the release + * callback. + */ + if (uobject->context) { + list_del(&uobject->list); + uobject->context = NULL; + } +} + +void ib_uverbs_close_fd(struct file *f) +{ + struct ib_uobject *uobject = uverbs_priv_fd_to_uobject(f->private_data); + + mutex_lock(&uobject->uobjects_lock->lock); + ib_uverbs_remove_fd(uobject); + mutex_unlock(&uobject->uobjects_lock->lock); + kref_put(&uobject->uobjects_lock->ref, release_uobjects_list_lock); +} + +void ib_uverbs_cleanup_fd(void *private_data) +{ + struct ib_uobject *uobject = uverbs_priv_fd_to_uobject(private_data); + + kfree(uobject); +} + +void uverbs_finalize_objects(struct uverbs_attr_array *attr_array, + size_t num, + const struct uverbs_action *action, + bool commit) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + struct uverbs_attr_array *attr_spec_array = &attr_array[i]; + const struct uverbs_attr_spec_group *attr_spec_group = + action->attr_groups[i]; + unsigned int j; + + for (j = 0; j < attr_spec_array->num_attrs; j++) { + struct uverbs_attr *attr = &attr_spec_array->attrs[j]; + struct uverbs_attr_spec *spec = &attr_spec_group->attrs[j]; + + if (!uverbs_is_valid(attr_spec_array, j)) + continue; + + if (spec->type == UVERBS_ATTR_TYPE_IDR || + spec->type == UVERBS_ATTR_TYPE_FD) + /* + * refcounts should be handled at the object + * level and not at the uobject level. Refcounts + * of the objects themselves are done in + * handlers. + */ + uverbs_finalize_object(attr->obj_attr.uobject, + spec->obj.access, + commit); + } + } +} + +static unsigned int get_max_type_orders(const struct uverbs_root *root) +{ + unsigned int i; + unsigned int max = 0; + + for (i = 0; i < root->num_groups; i++) { + unsigned int j; + const struct uverbs_type_group *types = root->type_groups[i]; + + for (j = 0; j < types->num_types; j++) { + /* + * Either this type isn't supported by this ib_device + * (as the group is an array of pointers to types + * indexed by the type or this type is supported, but + * we can't instantiate objects from this type + * (e.g. you can't instantiate objects of + * UVERBS_DEVICE). + */ + if (!types->types[j] || !types->types[j]->alloc) + continue; + if (types->types[j]->alloc->order > max) + max = types->types[j]->alloc->order; + } + } + + return max; +} + +void ib_uverbs_uobject_type_cleanup_ucontext(struct ib_ucontext *ucontext, + const struct uverbs_root *root) +{ + unsigned int num_orders = get_max_type_orders(root); + unsigned int i; + + for (i = 0; i <= num_orders; i++) { + struct ib_uobject *obj, *next_obj; + + /* + * The context is locked here, so we're protected from other + * concurrent commands running. The only thing we should take + * care of is releasing a FD while traversing this list. The FD + * could be closed and released from the _release fop of this + * FD. In order to mitigate this, we add a lock. + */ + mutex_lock(&ucontext->uobjects_lock->lock); + list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, + list) + if (obj->type->order == i) { + obj->type->free_fn(obj->type, obj); + if (obj->type->type == UVERBS_ATTR_TYPE_IDR) + ib_uverbs_uobject_remove(obj, false); + else + ib_uverbs_remove_fd(obj); + } + mutex_unlock(&ucontext->uobjects_lock->lock); + } + /* + * Since FD objects could outlive their context, we use a kref'ed + * lock. This lock is referenced when a context and FD objects are + * created. This lock protects concurrent context release from FD + * objects release. Therefore, we need to put this lock object in + * the context and every FD object release. + */ + kref_put(&ucontext->uobjects_lock->ref, release_uobjects_list_lock); +} + +int ib_uverbs_uobject_type_initialize_ucontext(struct ib_ucontext *ucontext) +{ + ucontext->uobjects_lock = kmalloc(sizeof(*ucontext->uobjects_lock), + GFP_KERNEL); + if (!ucontext->uobjects_lock) + return -ENOMEM; + + init_uobjects_list_lock(ucontext->uobjects_lock); + INIT_LIST_HEAD(&ucontext->uobjects); + + return 0; +} + +void ib_uverbs_uobject_type_release_ucontext(struct ib_ucontext *ucontext) +{ + kfree(ucontext->uobjects_lock); +} + diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h new file mode 100644 index 00000000000000..3f405c80a31c92 --- /dev/null +++ b/drivers/infiniband/core/rdma_core.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. + * Copyright (c) 2005-2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_CORE_H +#define RDMA_CORE_H + +#include +#include +#include +#include + +int uverbs_group_idx(u16 *id, unsigned int ngroups); +const struct uverbs_type *uverbs_get_type(const struct ib_device *ibdev, + uint16_t type); +const struct uverbs_action *uverbs_get_action(const struct uverbs_type *type, + uint16_t action); +/* + * Get an ib_uobject that corresponds to the given id from ucontext, assuming + * the object is from the given type. Lock it to the required access. + * This function could create (access == NEW) or destroy (access == DESTROY) + * objects if required. The action will be finalized only when + * uverbs_finalize_object or uverbs_finalize_objects is called. + */ +struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_type_alloc_action *type_alloc, + struct ib_ucontext *ucontext, + enum uverbs_idr_access access, + unsigned int id); + +/* Check if the object is still alive. This must be either called within RCU */ +bool uverbs_is_live(struct ib_uobject *uobj); +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_idr_access access, + bool success); +void uverbs_finalize_objects(struct uverbs_attr_array *attr_array, + size_t num, + const struct uverbs_action *action, + bool success); + +/* + * These functions initialize and destroy the context. The context has a + * list of objects which is protected by a kref-ed lock, whose purpose is + * to protect concurrent FDs (e.g completion channel FDs) release while + * traversing the context and releasing its objects. initialize_ucontext + * should be called when we create a context. cleanup_ucontext removes all + * objects created in the ucontext. release_ucontext drops the reference from + * the lock. + */ +void ib_uverbs_uobject_type_cleanup_ucontext(struct ib_ucontext *ucontext, + const struct uverbs_root *root); +int ib_uverbs_uobject_type_initialize_ucontext(struct ib_ucontext *ucontext); +void ib_uverbs_uobject_type_release_ucontext(struct ib_ucontext *ucontext); + +/* + * Indicate this fd is no longer used by this consumer, but its memory isn't + * released yet. The memory is released only when ib_uverbs_cleanup_fd is + * called. + */ +void ib_uverbs_close_fd(struct file *f); +void ib_uverbs_cleanup_fd(void *private_data); + +static inline void *uverbs_fd_uobj_to_priv(struct ib_uobject *uobj) +{ + return uobj + 1; +} + +#endif /* RDMA_CORE_H */ diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index df26a741cda659..2e26de3347ab77 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -38,10 +38,10 @@ #define UVERBS_H #include -#include #include #include #include +#include #include #include @@ -84,6 +84,14 @@ * released when the CQ is destroyed. */ +struct ib_uverbs_ioctl_hdr; +long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct ib_uverbs_ioctl_hdr *hdr, + void __user *buf, + bool w_legacy); + struct ib_uverbs_device { atomic_t refcount; int num_comp_vectors; @@ -176,26 +184,11 @@ struct ib_ucq_object { u32 async_events_reported; }; -extern spinlock_t ib_uverbs_idr_lock; -extern struct idr ib_uverbs_pd_idr; -extern struct idr ib_uverbs_mr_idr; -extern struct idr ib_uverbs_mw_idr; -extern struct idr ib_uverbs_ah_idr; -extern struct idr ib_uverbs_cq_idr; -extern struct idr ib_uverbs_qp_idr; -extern struct idr ib_uverbs_srq_idr; -extern struct idr ib_uverbs_xrcd_idr; -extern struct idr ib_uverbs_rule_idr; -extern struct idr ib_uverbs_wq_idr; -extern struct idr ib_uverbs_rwq_ind_tbl_idr; - -void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); +extern const struct file_operations uverbs_event_fops; -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev, - int is_async); +struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, + struct ib_device *ib_dev); void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, @@ -213,6 +206,12 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); int uverbs_dealloc_mw(struct ib_mw *mw); +void uverbs_copy_query_dev_fields(struct ib_device *ib_dev, + struct ib_uverbs_query_device_resp *resp, + struct ib_device_attr *attr); + +void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj); struct ib_uverbs_flow_spec { union { diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index cb3f515a2285df..d514a646a94de9 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -37,275 +37,172 @@ #include #include #include +#include +#include #include +#include +#include +#include "rdma_core.h" + #include "uverbs.h" #include "core_priv.h" -struct uverbs_lock_class { - struct lock_class_key key; - char name[16]; -}; - -static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; -static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; -static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; -static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; -static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; -static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; -static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; -static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; -static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; -static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; -static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; +#define idr_get_xxxx(_type, _access, _handle, _context) ({ \ + const struct uverbs_type * const type = &uverbs_type_## _type; \ + struct ib_uobject *uobj = uverbs_get_uobject_from_context( \ + type->alloc, \ + _context, _access, _handle); \ + \ + IS_ERR(uobj) ? NULL : uobj->object; }) -/* - * The ib_uobject locking scheme is as follows: - * - * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it - * needs to be held during all idr write operations. When an object is - * looked up, a reference must be taken on the object's kref before - * dropping this lock. For read operations, the rcu_read_lock() - * and rcu_write_lock() but similarly the kref reference is grabbed - * before the rcu_read_unlock(). - * - * - Each object also has an rwsem. This rwsem must be held for - * reading while an operation that uses the object is performed. - * For example, while registering an MR, the associated PD's - * uobject.mutex must be held for reading. The rwsem must be held - * for writing while initializing or destroying an object. - * - * - In addition, each object has a "live" flag. If this flag is not - * set, then lookups of the object will fail even if it is found in - * the idr. This handles a reader that blocks and does not acquire - * the rwsem until after the object is destroyed. The destroy - * operation will set the live flag to 0 and then drop the rwsem; - * this will allow the reader to acquire the rwsem, see that the - * live flag is 0, and then drop the rwsem and its reference to - * object. The underlying storage will not be freed until the last - * reference to the object is dropped. - */ - -static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct uverbs_lock_class *c) -{ - uobj->user_handle = user_handle; - uobj->context = context; - kref_init(&uobj->ref); - init_rwsem(&uobj->mutex); - lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name); - uobj->live = 0; -} - -static void release_uobj(struct kref *kref) -{ - kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu); -} - -static void put_uobj(struct ib_uobject *uobj) -{ - kref_put(&uobj->ref, release_uobj); -} - -static void put_uobj_read(struct ib_uobject *uobj) -{ - up_read(&uobj->mutex); - put_uobj(uobj); -} - -static void put_uobj_write(struct ib_uobject *uobj) -{ - up_write(&uobj->mutex); - put_uobj(uobj); -} - -static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) -{ - int ret; - - idr_preload(GFP_KERNEL); - spin_lock(&ib_uverbs_idr_lock); - - ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); - if (ret >= 0) - uobj->id = ret; - - spin_unlock(&ib_uverbs_idr_lock); - idr_preload_end(); - - return ret < 0 ? ret : 0; -} - -void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) -{ - spin_lock(&ib_uverbs_idr_lock); - idr_remove(idr, uobj->id); - spin_unlock(&ib_uverbs_idr_lock); -} - -static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, - struct ib_ucontext *context) -{ - struct ib_uobject *uobj; - - rcu_read_lock(); - uobj = idr_find(idr, id); - if (uobj) { - if (uobj->context == context) - kref_get(&uobj->ref); - else - uobj = NULL; - } - rcu_read_unlock(); - - return uobj; -} - -static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, - struct ib_ucontext *context, int nested) +static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) { - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(idr, id, context); - if (!uobj) - return NULL; - - if (nested) - down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); - else - down_read(&uobj->mutex); - if (!uobj->live) { - put_uobj_read(uobj); - return NULL; - } - - return uobj; + return idr_get_xxxx(pd, UVERBS_ACCESS_READ, pd_handle, context); } -static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, - struct ib_ucontext *context) +static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context) { - struct ib_uobject *uobj; - - uobj = __idr_get_uobj(idr, id, context); - if (!uobj) - return NULL; - - down_write(&uobj->mutex); - if (!uobj->live) { - put_uobj_write(uobj); - return NULL; - } - - return uobj; + return idr_get_xxxx(cq, UVERBS_ACCESS_READ, cq_handle, context); } -static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, - int nested) +static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) { - struct ib_uobject *uobj; - - uobj = idr_read_uobj(idr, id, context, nested); - return uobj ? uobj->object : NULL; + return idr_get_xxxx(ah, UVERBS_ACCESS_READ, ah_handle, context); } -static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) +static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); + return idr_get_xxxx(qp, UVERBS_ACCESS_READ, qp_handle, context); } -static void put_pd_read(struct ib_pd *pd) +static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) { - put_uobj_read(pd->uobject); + return idr_get_xxxx(wq, UVERBS_ACCESS_READ, wq_handle, context); } -static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) +static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, + struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); + return idr_get_xxxx(rwq_ind_table, UVERBS_ACCESS_READ, + ind_table_handle, context); } -static void put_cq_read(struct ib_cq *cq) +static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) { - put_uobj_read(cq->uobject); + return idr_get_xxxx(qp, UVERBS_ACCESS_WRITE, qp_handle, context); } -static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) +static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { - return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); + return idr_get_xxxx(srq, UVERBS_ACCESS_READ, srq_handle, context); } -static void put_ah_read(struct ib_ah *ah) +static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, + struct ib_uobject **uobj) { - put_uobj_read(ah->uobject); + *uobj = uverbs_get_uobject_from_context(uverbs_type_xrcd.alloc, + context, UVERBS_ACCESS_READ, + xrcd_handle); + return *uobj ? (*uobj)->object : NULL; } -static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) +#if IS_ENABLED(CONFIG_INFINIBAND_USE_IOCTL_BACKWARD_COMP) +static int get_vendor_num_attrs(size_t cmd, size_t resp, int in_len, + int out_len) { - return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); + return !!(cmd != in_len) + !!(resp != out_len); } -static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) +static void init_ioctl_hdr(struct ib_uverbs_ioctl_hdr *hdr, + struct ib_device *ib_dev, + size_t num_attrs, + u16 object_type, + u16 action) { - return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); + hdr->length = sizeof(*hdr) + num_attrs * sizeof(hdr->attrs[0]); + hdr->flags = 0; + hdr->reserved = 0; + hdr->object_type = object_type; + hdr->action = action; + hdr->num_attrs = num_attrs; } -static void put_wq_read(struct ib_wq *wq) +static void fill_attr_ptr(struct ib_uverbs_attr *attr, u16 attr_id, u16 len, + const void * __user source) { - put_uobj_read(wq->uobject); + attr->attr_id = attr_id; + attr->len = len; + attr->reserved = 0; + attr->data = (__u64)source; } -static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, - struct ib_ucontext *context) +static void fill_hw_attrs(struct ib_uverbs_attr *hw_attrs, + const void __user *in_buf, + const void __user *out_buf, + size_t cmd_size, size_t resp_size, + int in_len, int out_len) { - return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); -} + if (in_len > cmd_size) + fill_attr_ptr(&hw_attrs[UVERBS_UHW_IN], + UVERBS_UHW_IN | UVERBS_UDATA_DRIVER_DATA_FLAG, + in_len - cmd_size, + in_buf + cmd_size); -static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) -{ - put_uobj_read(ind_table->uobject); + if (out_len > resp_size) + fill_attr_ptr(&hw_attrs[UVERBS_UHW_OUT], + UVERBS_UHW_OUT | UVERBS_UDATA_DRIVER_DATA_FLAG, + out_len - resp_size, + out_buf + resp_size); } -static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) +ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, + int in_len, int out_len) { - struct ib_uobject *uobj; + struct ib_uverbs_get_context cmd; + struct ib_uverbs_get_context_resp resp; + struct { + struct ib_uverbs_ioctl_hdr hdr; + struct ib_uverbs_attr cmd_attrs[GET_CONTEXT_RESP + 1]; + struct ib_uverbs_attr hw_attrs[UVERBS_UHW_OUT + 1]; + } ioctl_cmd; + long err; - uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); - return uobj ? uobj->object : NULL; -} + if (out_len < sizeof(resp)) + return -ENOSPC; -static void put_qp_read(struct ib_qp *qp) -{ - put_uobj_read(qp->uobject); -} + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; -static void put_qp_write(struct ib_qp *qp) -{ - put_uobj_write(qp->uobject); -} + init_ioctl_hdr(&ioctl_cmd.hdr, ib_dev, ARRAY_SIZE(ioctl_cmd.cmd_attrs) + + get_vendor_num_attrs(sizeof(cmd), sizeof(resp), in_len, + out_len), + UVERBS_TYPE_DEVICE, UVERBS_DEVICE_ALLOC_CONTEXT); -static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) -{ - return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); -} + /* + * We have to have a direct mapping between the new format and the old + * format. It's easily achievable with new attributes. + */ + fill_attr_ptr(&ioctl_cmd.cmd_attrs[GET_CONTEXT_RESP], + GET_CONTEXT_RESP, sizeof(resp), + (const void * __user)cmd.response); + fill_hw_attrs(ioctl_cmd.hw_attrs, buf, + (const void * __user)cmd.response, sizeof(cmd), + sizeof(resp), in_len, out_len); -static void put_srq_read(struct ib_srq *srq) -{ - put_uobj_read(srq->uobject); -} + err = ib_uverbs_cmd_verbs(ib_dev, file, &ioctl_cmd.hdr, + ioctl_cmd.cmd_attrs, true); -static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, - struct ib_uobject **uobj) -{ - *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); - return *uobj ? (*uobj)->object : NULL; -} + if (err < 0) + goto err; -static void put_xrcd_read(struct ib_uobject *uobj) -{ - put_uobj_read(uobj); +err: + return err == 0 ? in_len : err; } - +#else ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, @@ -342,17 +239,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, } ucontext->device = ib_dev; - INIT_LIST_HEAD(&ucontext->pd_list); - INIT_LIST_HEAD(&ucontext->mr_list); - INIT_LIST_HEAD(&ucontext->mw_list); - INIT_LIST_HEAD(&ucontext->cq_list); - INIT_LIST_HEAD(&ucontext->qp_list); - INIT_LIST_HEAD(&ucontext->srq_list); - INIT_LIST_HEAD(&ucontext->ah_list); - INIT_LIST_HEAD(&ucontext->wq_list); - INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); - INIT_LIST_HEAD(&ucontext->xrcd_list); - INIT_LIST_HEAD(&ucontext->rule_list); + /* ufile is required when some objects are released */ + ucontext->ufile = file; + ret = ib_uverbs_uobject_type_initialize_ucontext(ucontext); + if (ret) + goto err_ctx; + rcu_read_lock(); ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); rcu_read_unlock(); @@ -376,7 +268,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err_free; resp.async_fd = ret; - filp = ib_uverbs_alloc_event_file(file, ib_dev, 1); + filp = ib_uverbs_alloc_async_event_file(file, ib_dev); if (IS_ERR(filp)) { ret = PTR_ERR(filp); goto err_fd; @@ -405,15 +297,17 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, err_free: put_pid(ucontext->tgid); + ib_uverbs_uobject_type_release_ucontext(ucontext); +err_ctx: ib_dev->dealloc_ucontext(ucontext); err: mutex_unlock(&file->mutex); return ret; } +#endif -static void copy_query_dev_fields(struct ib_uverbs_file *file, - struct ib_device *ib_dev, +void uverbs_copy_query_dev_fields(struct ib_device *ib_dev, struct ib_uverbs_query_device_resp *resp, struct ib_device_attr *attr) { @@ -474,7 +368,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, return -EFAULT; memset(&resp, 0, sizeof resp); - copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); + uverbs_copy_query_dev_fields(ib_dev, &resp, &ib_dev->attrs); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -556,12 +450,10 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &pd_lock_class); - down_write(&uobj->mutex); + uobj = uverbs_get_uobject_from_context(uverbs_type_pd.alloc, file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); if (IS_ERR(pd)) { @@ -573,12 +465,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->uobject = uobj; pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); - uobj->object = pd; - ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); - if (ret) - goto err_idr; - memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; @@ -588,24 +475,14 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, goto err_copy; } - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->pd_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, true); return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - -err_idr: ib_dealloc_pd(pd); - err: - put_uobj_write(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, false); return ret; } @@ -622,9 +499,11 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_pd.alloc, file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.pd_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); pd = uobj->object; if (atomic_read(&pd->usecnt)) { @@ -637,21 +516,12 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, if (ret) goto err_put; - uobj->live = 0; - put_uobj_write(uobj); - - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); return in_len; err_put: - put_uobj_write(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return ret; } @@ -789,15 +659,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, } } - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) { - ret = -ENOMEM; - goto err_tree_mutex_unlock; - } - - init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); - - down_write(&obj->uobject.mutex); + obj = (struct ib_uxrcd_object *) + uverbs_get_uobject_from_context(uverbs_type_xrcd.alloc, file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); if (!xrcd) { xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); @@ -816,10 +682,6 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, atomic_set(&obj->refcnt, 0); obj->uobject.object = xrcd; - ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); - if (ret) - goto err_idr; - memset(&resp, 0, sizeof resp); resp.xrcd_handle = obj->uobject.id; @@ -828,7 +690,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, /* create new inode/xrcd table entry */ ret = xrcd_table_insert(file->device, inode, xrcd); if (ret) - goto err_insert_xrcd; + goto err_dealloc_xrcd; } atomic_inc(&xrcd->usecnt); } @@ -842,12 +704,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (f.file) fdput(f); - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - up_write(&obj->uobject.mutex); + uverbs_finalize_object(&obj->uobject, UVERBS_ACCESS_NEW, true); mutex_unlock(&file->device->xrcd_tree_mutex); return in_len; @@ -859,14 +716,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, atomic_dec(&xrcd->usecnt); } -err_insert_xrcd: - idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); - -err_idr: +err_dealloc_xrcd: ib_dealloc_xrcd(xrcd); err: - put_uobj_write(&obj->uobject); + uverbs_finalize_object(&obj->uobject, UVERBS_ACCESS_NEW, false); err_tree_mutex_unlock: if (f.file) @@ -887,24 +741,25 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, struct ib_xrcd *xrcd = NULL; struct inode *inode = NULL; struct ib_uxrcd_object *obj; - int live; int ret = 0; + bool destroyed = false; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; mutex_lock(&file->device->xrcd_tree_mutex); - uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); - if (!uobj) { - ret = -EINVAL; - goto out; + uobj = uverbs_get_uobject_from_context(uverbs_type_xrcd.alloc, file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.xrcd_handle); + if (IS_ERR(uobj)) { + mutex_unlock(&file->device->xrcd_tree_mutex); + return PTR_ERR(uobj); } xrcd = uobj->object; inode = xrcd->inode; obj = container_of(uobj, struct ib_uxrcd_object, uobject); if (atomic_read(&obj->refcnt)) { - put_uobj_write(uobj); ret = -EBUSY; goto out; } @@ -912,30 +767,24 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { ret = ib_dealloc_xrcd(uobj->object); if (!ret) - uobj->live = 0; + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); + destroyed = !ret; } - live = uobj->live; if (inode && ret) atomic_inc(&xrcd->usecnt); - put_uobj_write(uobj); - if (ret) goto out; - if (inode && !live) + if (inode && destroyed) xrcd_table_delete(file->device, inode); - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); ret = in_len; out: + if (!destroyed) + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); mutex_unlock(&file->device->xrcd_tree_mutex); return ret; } @@ -985,12 +834,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (ret) return ret; - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &mr_lock_class); - down_write(&uobj->mutex); + uobj = uverbs_get_uobject_from_context(uverbs_type_mr.alloc, file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { @@ -1020,9 +867,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); uobj->object = mr; - ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); - if (ret) - goto err_unreg; memset(&resp, 0, sizeof resp); resp.lkey = mr->lkey; @@ -1035,29 +879,20 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mr_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, true); - up_write(&uobj->mutex); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, true); return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - -err_unreg: ib_dereg_mr(mr); err_put: - put_pd_read(pd); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, false); err_free: - put_uobj_write(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, false); return ret; } @@ -1093,11 +928,11 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, - file->ucontext); - - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_mr.alloc, file->ucontext, + UVERBS_ACCESS_WRITE, + cmd.mr_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); mr = uobj->object; @@ -1141,11 +976,11 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) - put_pd_read(pd); + uverbs_finalize_object(pd->uobject, + UVERBS_ACCESS_READ, ret == in_len); put_uobjs: - - put_uobj_write(mr->uobject); + uverbs_finalize_object(uobj, UVERBS_ACCESS_WRITE, ret == in_len); return ret; } @@ -1163,28 +998,22 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_mr.alloc, file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.mr_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); mr = uobj->object; ret = ib_dereg_mr(mr); - if (!ret) - uobj->live = 0; - put_uobj_write(uobj); - - if (ret) + if (ret) { + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return ret; + } - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); return in_len; } @@ -1208,12 +1037,10 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, 0, file->ucontext, &mw_lock_class); - down_write(&uobj->mutex); + uobj = uverbs_get_uobject_from_context(uverbs_type_mw.alloc, file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { @@ -1238,9 +1065,6 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, atomic_inc(&pd->usecnt); uobj->object = mw; - ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); - if (ret) - goto err_unalloc; memset(&resp, 0, sizeof(resp)); resp.rkey = mw->rkey; @@ -1252,29 +1076,17 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->mw_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, true); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, true); return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - -err_unalloc: uverbs_dealloc_mw(mw); - err_put: - put_pd_read(pd); - + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, false); err_free: - put_uobj_write(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, false); return ret; } @@ -1291,28 +1103,21 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_mw.alloc, file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.mw_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); mw = uobj->object; ret = uverbs_dealloc_mw(mw); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) + if (ret) { + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return ret; + } - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); return in_len; } @@ -1324,8 +1129,8 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; - struct file *filp; - int ret; + struct ib_uobject *uobj; + struct ib_uverbs_event_file *ev_file; if (out_len < sizeof resp) return -ENOSPC; @@ -1333,25 +1138,30 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - ret = get_unused_fd_flags(O_CLOEXEC); - if (ret < 0) - return ret; - resp.fd = ret; + uobj = uverbs_get_uobject_from_context(uverbs_type_comp_channel.alloc, + file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); - filp = ib_uverbs_alloc_event_file(file, ib_dev, 0); - if (IS_ERR(filp)) { - put_unused_fd(resp.fd); - return PTR_ERR(filp); - } + resp.fd = uobj->id; + + ev_file = uverbs_fd_uobj_to_priv(uobj); + kref_init(&ev_file->ref); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + ev_file->async_queue = NULL; + ev_file->uverbs_file = file; + ev_file->is_closed = 0; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { - put_unused_fd(resp.fd); - fput(filp); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, false); return -EFAULT; } - fd_install(resp.fd, filp); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, true); return in_len; } @@ -1369,6 +1179,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, void *context) { struct ib_ucq_object *obj; + struct ib_uobject *ev_uobj = NULL; struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; @@ -1378,21 +1189,27 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return ERR_PTR(-ENOMEM); - - init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); - down_write(&obj->uobject.mutex); + obj = (struct ib_ucq_object *)uverbs_get_uobject_from_context( + uverbs_type_cq.alloc, + file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(obj)) + return obj; if (cmd->comp_channel >= 0) { - ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); - if (!ev_file) { + ev_uobj = uverbs_get_uobject_from_context(uverbs_type_comp_channel.alloc, + file->ucontext, + UVERBS_ACCESS_READ, + cmd->comp_channel); + if (IS_ERR(ev_uobj)) { ret = -EINVAL; goto err; } + ev_file = uverbs_fd_uobj_to_priv(ev_uobj); + kref_get(&ev_file->ref); } + obj->uobject.user_handle = cmd->user_handle; obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; @@ -1405,8 +1222,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) attr.flags = cmd->flags; - cq = ib_dev->create_cq(ib_dev, &attr, - file->ucontext, uhw); + cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; @@ -1420,10 +1236,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; - ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); - if (ret) - goto err_free; - memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uobject.id; resp.base.cqe = cq->cqe; @@ -1435,28 +1247,20 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, if (ret) goto err_cb; - mutex_lock(&file->mutex); - list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); - mutex_unlock(&file->mutex); - - obj->uobject.live = 1; - - up_write(&obj->uobject.mutex); + if (ev_uobj) + uverbs_finalize_object(ev_uobj, UVERBS_ACCESS_READ, true); + uverbs_finalize_object(&obj->uobject, UVERBS_ACCESS_NEW, true); return obj; err_cb: - idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); - -err_free: ib_destroy_cq(cq); err_file: - if (ev_file) - ib_uverbs_release_ucq(file, ev_file, obj); - + if (ev_uobj) + uverbs_finalize_object(ev_uobj, UVERBS_ACCESS_READ, false); err: - put_uobj_write(&obj->uobject); + uverbs_finalize_object(&obj->uobject, UVERBS_ACCESS_NEW, false); return ERR_PTR(ret); } @@ -1579,7 +1383,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = idr_read_cq(cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; @@ -1594,7 +1398,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, ret = -EFAULT; out: - put_cq_read(cq); + uverbs_finalize_object(cq->uobject, UVERBS_ACCESS_READ, !ret); return ret ? ret : in_len; } @@ -1641,7 +1445,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = idr_read_cq(cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; @@ -1673,7 +1477,8 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, ret = in_len; out_put: - put_cq_read(cq); + uverbs_finalize_object(cq->uobject, UVERBS_ACCESS_READ, + ret == in_len); return ret; } @@ -1688,14 +1493,14 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = idr_read_cq(cmd.cq_handle, file->ucontext); if (!cq) return -EINVAL; ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - put_cq_read(cq); + uverbs_finalize_object(cq->uobject, UVERBS_ACCESS_READ, true); return in_len; } @@ -1716,36 +1521,29 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_cq.alloc, + file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.cq_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + cq = uobj->object; ev_file = cq->cq_context; obj = container_of(cq->uobject, struct ib_ucq_object, uobject); ret = ib_destroy_cq(cq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) + if (ret) { + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return ret; + } - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - ib_uverbs_release_ucq(file, ev_file, obj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); memset(&resp, 0, sizeof resp); resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; - put_uobj(uobj); - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; @@ -1781,13 +1579,15 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - obj = kzalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uqp_object *)uverbs_get_uobject_from_context( + uverbs_type_qp.alloc, + file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + obj->uxrcd = NULL; + obj->uevent.uobject.user_handle = cmd->user_handle; - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, - &qp_lock_class); - down_write(&obj->uevent.uobject.mutex); if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { @@ -1840,7 +1640,7 @@ static int create_qp(struct ib_uverbs_file *file, if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { rcq = idr_read_cq(cmd->recv_cq_handle, - file->ucontext, 0); + file->ucontext); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1850,7 +1650,7 @@ static int create_qp(struct ib_uverbs_file *file, } if (has_sq) - scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext); if (!ind_tbl) rcq = rcq ?: scq; pd = idr_read_pd(cmd->pd_handle, file->ucontext); @@ -1939,9 +1739,6 @@ static int create_qp(struct ib_uverbs_file *file, qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; memset(&resp, 0, sizeof resp); resp.base.qpn = qp->qp_num; @@ -1963,50 +1760,42 @@ static int create_qp(struct ib_uverbs_file *file, obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); + uverbs_finalize_object(xrcd_uobj, UVERBS_ACCESS_READ, true); } if (pd) - put_pd_read(pd); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, true); if (scq) - put_cq_read(scq); + uverbs_finalize_object(scq->uobject, UVERBS_ACCESS_READ, true); if (rcq && rcq != scq) - put_cq_read(rcq); + uverbs_finalize_object(rcq->uobject, UVERBS_ACCESS_READ, true); if (srq) - put_srq_read(srq); + uverbs_finalize_object(srq->uobject, UVERBS_ACCESS_READ, true); if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; + uverbs_finalize_object(ind_tbl->uobject, UVERBS_ACCESS_READ, true); - up_write(&obj->uevent.uobject.mutex); + uverbs_finalize_object(&obj->uevent.uobject, UVERBS_ACCESS_NEW, true); return 0; -err_cb: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); -err_destroy: +err_cb: ib_destroy_qp(qp); err_put: if (xrcd) - put_xrcd_read(xrcd_uobj); + uverbs_finalize_object(xrcd_uobj, UVERBS_ACCESS_READ, false); if (pd) - put_pd_read(pd); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, false); if (scq) - put_cq_read(scq); + uverbs_finalize_object(scq->uobject, UVERBS_ACCESS_READ, false); if (rcq && rcq != scq) - put_cq_read(rcq); + uverbs_finalize_object(rcq->uobject, UVERBS_ACCESS_READ, false); if (srq) - put_srq_read(srq); + uverbs_finalize_object(srq->uobject, UVERBS_ACCESS_READ, false); if (ind_tbl) - put_rwq_indirection_table_read(ind_tbl); + uverbs_finalize_object(ind_tbl->uobject, UVERBS_ACCESS_READ, false); - put_uobj_write(&obj->uevent.uobject); + uverbs_finalize_object(&obj->uevent.uobject, UVERBS_ACCESS_NEW, false); return ret; } @@ -2142,12 +1931,13 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); - down_write(&obj->uevent.uobject.mutex); + obj = (struct ib_uqp_object *)uverbs_get_uobject_from_context( + uverbs_type_qp.alloc, + file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); + obj->uxrcd = NULL; xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); if (!xrcd) { @@ -2167,15 +1957,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, qp = ib_open_qp(xrcd, &attr); if (IS_ERR(qp)) { ret = PTR_ERR(qp); - goto err_put; + goto err_xrcd; } qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; - ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; memset(&resp, 0, sizeof resp); resp.qpn = qp->qp_num; @@ -2184,32 +1971,23 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_remove; + goto err_destroy; } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - put_xrcd_read(xrcd_uobj); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; + uverbs_finalize_object(xrcd_uobj, UVERBS_ACCESS_READ, true); - up_write(&obj->uevent.uobject.mutex); + uverbs_finalize_object(&obj->uevent.uobject, UVERBS_ACCESS_NEW, true); return in_len; -err_remove: - idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); - err_destroy: ib_destroy_qp(qp); - +err_xrcd: + uverbs_finalize_object(xrcd_uobj, UVERBS_ACCESS_READ, false); err_put: - put_xrcd_read(xrcd_uobj); - put_uobj_write(&obj->uevent.uobject); + uverbs_finalize_object(&obj->uevent.uobject, UVERBS_ACCESS_WRITE, false); return ret; } @@ -2242,11 +2020,10 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, } ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - - put_qp_read(qp); - if (ret) - goto out; + goto out_query; + + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_READ, true); memset(&resp, 0, sizeof resp); @@ -2312,20 +2089,10 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, kfree(init_attr); return ret ? ret : in_len; -} -/* Remove ignored fields set in the attribute mask */ -static int modify_qp_mask(enum ib_qp_type qp_type, int mask) -{ - switch (qp_type) { - case IB_QPT_XRC_INI: - return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); - case IB_QPT_XRC_TGT: - return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | - IB_QP_RNR_RETRY); - default: - return mask; - } +out_query: + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_READ, false); + return ret; } ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, @@ -2417,7 +2184,8 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, ret = in_len; release_qp: - put_qp_read(qp); + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_READ, + ret == in_len); out: kfree(attr); @@ -2442,40 +2210,34 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, memset(&resp, 0, sizeof resp); - uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_qp.alloc, file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.qp_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + qp = uobj->object; obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); if (!list_empty(&obj->mcast_list)) { - put_uobj_write(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return -EBUSY; } ret = ib_destroy_qp(qp); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) + if (ret) { + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return ret; + } if (obj->uxrcd) atomic_dec(&obj->uxrcd->refcnt); - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - ib_uverbs_release_uevent(file, &obj->uevent); resp.events_reported = obj->uevent.events_reported; - put_uobj(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -2662,11 +2424,13 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, ret = -EFAULT; out_put: - put_qp_read(qp); + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_READ, !ret); while (wr) { if (is_ud && ud_wr(wr)->ah) - put_ah_read(ud_wr(wr)->ah); + uverbs_finalize_object(ud_wr(wr)->ah->uobject, + UVERBS_ACCESS_READ, !ret); + next = wr->next; kfree(wr); wr = next; @@ -2790,14 +2554,16 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, resp.bad_wr = 0; ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); - put_qp_read(qp); - - if (ret) + if (ret) { + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_READ, false); for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } + } else { + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_READ, true); + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -2840,7 +2606,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, resp.bad_wr = 0; ret = srq->device->post_srq_recv(srq, wr, &bad_wr); - put_srq_read(srq); + uverbs_finalize_object(srq->uobject, UVERBS_ACCESS_READ, !ret); if (ret) for (next = wr; next; next = next->next) { @@ -2882,12 +2648,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = kmalloc(sizeof *uobj, GFP_KERNEL); - if (!uobj) - return -ENOMEM; - - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); - down_write(&uobj->mutex); + uobj = uverbs_get_uobject_from_context(uverbs_type_ah.alloc, + file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { @@ -2917,10 +2682,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, ah->uobject = uobj; uobj->object = ah; - ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); - if (ret) - goto err_destroy; - resp.ah_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, @@ -2929,29 +2690,19 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, goto err_copy; } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->ah_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, true); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, true); return in_len; err_copy: - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - -err_destroy: ib_destroy_ah(ah); err_put: - put_pd_read(pd); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, false); err: - put_uobj_write(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, false); return ret; } @@ -2967,29 +2718,22 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_ah.alloc, file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.ah_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + ah = uobj->object; ret = ib_destroy_ah(ah); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) + if (ret) { + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return ret; - - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - - return in_len; + } else { + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); + return in_len; + } } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, @@ -3035,9 +2779,13 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, kfree(mcast); out_put: - put_qp_write(qp); + if (ret) { + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_WRITE, false); + return ret; + } - return ret ? ret : in_len; + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_WRITE, true); + return in_len; } ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, @@ -3073,9 +2821,13 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, } out_put: - put_qp_write(qp); + if (ret) { + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_WRITE, false); + return ret; + } - return ret ? ret : in_len; + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_WRITE, true); + return in_len; } static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) @@ -3218,20 +2970,20 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - obj = kmalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) - return -ENOMEM; + obj = (struct ib_uwq_object *)uverbs_get_uobject_from_context( + uverbs_type_wq.alloc, + file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, - &wq_lock_class); - down_write(&obj->uevent.uobject.mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { err = -EINVAL; goto err_uobj; } - cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); + cq = idr_read_cq(cmd.cq_handle, file->ucontext); if (!cq) { err = -EINVAL; goto err_put_pd; @@ -3263,9 +3015,6 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, atomic_inc(&cq->usecnt); wq->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = wq; - err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); - if (err) - goto destroy_wq; memset(&resp, 0, sizeof(resp)); resp.wq_handle = obj->uevent.uobject.id; @@ -3278,27 +3027,19 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, if (err) goto err_copy; - put_pd_read(pd); - put_cq_read(cq); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - up_write(&obj->uevent.uobject.mutex); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, true); + uverbs_finalize_object(cq->uobject, UVERBS_ACCESS_READ, true); + uverbs_finalize_object(&obj->uevent.uobject, UVERBS_ACCESS_NEW, true); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); -destroy_wq: ib_destroy_wq(wq); err_put_cq: - put_cq_read(cq); + uverbs_finalize_object(cq->uobject, UVERBS_ACCESS_READ, false); err_put_pd: - put_pd_read(pd); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, false); err_uobj: - put_uobj_write(&obj->uevent.uobject); + uverbs_finalize_object(&obj->uevent.uobject, UVERBS_ACCESS_NEW, false); return err; } @@ -3339,30 +3080,23 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, return -EOPNOTSUPP; resp.response_length = required_resp_len; - uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, - file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_ah.alloc, file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.wq_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); wq = uobj->object; obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); ret = ib_destroy_wq(wq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - if (ret) + if (ret) { + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return ret; - - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); + } ib_uverbs_release_uevent(file, &obj->uevent); resp.events_reported = obj->uevent.events_reported; - put_uobj(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); ret = ib_copy_to_udata(ucore, &resp, resp.response_length); if (ret) @@ -3408,7 +3142,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, wq_attr.curr_wq_state = cmd.curr_wq_state; wq_attr.wq_state = cmd.wq_state; ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); - put_wq_read(wq); + uverbs_finalize_object(wq->uobject, UVERBS_ACCESS_READ, !ret); return ret; } @@ -3495,14 +3229,15 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, wqs[num_read_wqs] = wq; } - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { + uobj = uverbs_get_uobject_from_context(uverbs_type_rwq_ind_table.alloc, + file->ucontext, + UVERBS_ACCESS_NEW, + 0); + if (IS_ERR(uobj)) { err = -ENOMEM; goto put_wqs; } - init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); - down_write(&uobj->mutex); init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; init_attr.ind_tbl = wqs; rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); @@ -3522,10 +3257,6 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, for (i = 0; i < num_wq_handles; i++) atomic_inc(&wqs[i]->usecnt); - err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - if (err) - goto destroy_ind_tbl; - resp.ind_tbl_handle = uobj->id; resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; resp.response_length = required_resp_len; @@ -3538,26 +3269,18 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, kfree(wqs_handles); for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); - - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); - mutex_unlock(&file->mutex); + uverbs_finalize_object(wqs[j]->uobject, UVERBS_ACCESS_READ, true); - uobj->live = 1; - - up_write(&uobj->mutex); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, true); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); -destroy_ind_tbl: ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: - put_uobj_write(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, false); put_wqs: for (j = 0; j < num_read_wqs; j++) - put_wq_read(wqs[j]); + uverbs_finalize_object(wqs[j]->uobject, UVERBS_ACCESS_READ, false); err_free: kfree(wqs_handles); kfree(wqs); @@ -3593,29 +3316,23 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EOPNOTSUPP; - uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, - file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_rwq_ind_table.alloc, + file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.ind_tbl_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + rwq_ind_tbl = uobj->object; ind_tbl = rwq_ind_tbl->ind_tbl; ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) + if (ret) { + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return ret; + } - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); kfree(ind_tbl); return ret; } @@ -3691,13 +3408,12 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_flow_attr = &cmd.flow_attr; } - uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); - if (!uobj) { + uobj = uverbs_get_uobject_from_context(uverbs_type_flow.alloc, file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(uobj)) { err = -ENOMEM; goto err_free_attr; } - init_uobj(uobj, 0, file->ucontext, &rule_lock_class); - down_write(&uobj->mutex); qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) { @@ -3749,10 +3465,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, flow_id->uobject = uobj; uobj->object = flow_id; - err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); - if (err) - goto destroy_flow; - memset(&resp, 0, sizeof(resp)); resp.flow_handle = uobj->id; @@ -3761,28 +3473,20 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, if (err) goto err_copy; - put_qp_read(qp); - mutex_lock(&file->mutex); - list_add_tail(&uobj->list, &file->ucontext->rule_list); - mutex_unlock(&file->mutex); - - uobj->live = 1; - - up_write(&uobj->mutex); + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_READ, true); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, true); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); -destroy_flow: ib_destroy_flow(flow_id); err_free: kfree(flow_attr); err_put: - put_qp_read(qp); + uverbs_finalize_object(qp->uobject, UVERBS_ACCESS_READ, false); err_uobj: - put_uobj_write(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_NEW, false); err_free_attr: if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); @@ -3809,26 +3513,17 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, if (cmd.comp_mask) return -EINVAL; - uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, - file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_flow.alloc, + file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.flow_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + flow_id = uobj->object; ret = ib_destroy_flow(flow_id); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - - put_uobj(uobj); - + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, !ret); return ret; } @@ -3845,12 +3540,12 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_srq_init_attr attr; int ret; - obj = kmalloc(sizeof *obj, GFP_KERNEL); - if (!obj) - return -ENOMEM; - - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); - down_write(&obj->uevent.uobject.mutex); + obj = (struct ib_usrq_object *)uverbs_get_uobject_from_context( + uverbs_type_srq.alloc, + file->ucontext, + UVERBS_ACCESS_NEW, 0); + if (IS_ERR(obj)) + return PTR_ERR(obj); if (cmd->srq_type == IB_SRQT_XRC) { attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); @@ -3862,7 +3557,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext); if (!attr.ext.xrc.cq) { ret = -EINVAL; goto err_put_xrcd; @@ -3909,9 +3604,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, atomic_set(&srq->usecnt, 0); obj->uevent.uobject.object = srq; - ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); - if (ret) - goto err_destroy; + obj->uevent.uobject.user_handle = cmd->user_handle; memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uevent.uobject.id; @@ -3927,42 +3620,34 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, } if (cmd->srq_type == IB_SRQT_XRC) { - put_uobj_read(xrcd_uobj); - put_cq_read(attr.ext.xrc.cq); + uverbs_finalize_object(xrcd_uobj, UVERBS_ACCESS_READ, true); + uverbs_finalize_object(attr.ext.xrc.cq->uobject, + UVERBS_ACCESS_READ, true); } - put_pd_read(pd); - - mutex_lock(&file->mutex); - list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); - mutex_unlock(&file->mutex); - - obj->uevent.uobject.live = 1; - - up_write(&obj->uevent.uobject.mutex); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, true); + uverbs_finalize_object(&obj->uevent.uobject, UVERBS_ACCESS_NEW, true); return 0; err_copy: - idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); - -err_destroy: ib_destroy_srq(srq); err_put: - put_pd_read(pd); + uverbs_finalize_object(pd->uobject, UVERBS_ACCESS_READ, false); err_put_cq: if (cmd->srq_type == IB_SRQT_XRC) - put_cq_read(attr.ext.xrc.cq); + uverbs_finalize_object(attr.ext.xrc.cq->uobject, + UVERBS_ACCESS_READ, false); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { atomic_dec(&obj->uxrcd->refcnt); - put_uobj_read(xrcd_uobj); + uverbs_finalize_object(xrcd_uobj, UVERBS_ACCESS_READ, false); } err: - put_uobj_write(&obj->uevent.uobject); + uverbs_finalize_object(&obj->uevent.uobject, UVERBS_ACCESS_NEW, false); return ret; } @@ -4056,9 +3741,13 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); - put_srq_read(srq); + if (ret) { + uverbs_finalize_object(srq->uobject, UVERBS_ACCESS_READ, false); + return ret; + } - return ret ? ret : in_len; + uverbs_finalize_object(srq->uobject, UVERBS_ACCESS_READ, true); + return in_len; } ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, @@ -4084,10 +3773,12 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, ret = ib_query_srq(srq, &attr); - put_srq_read(srq); - - if (ret) + if (ret) { + uverbs_finalize_object(srq->uobject, UVERBS_ACCESS_READ, false); return ret; + } + + uverbs_finalize_object(srq->uobject, UVERBS_ACCESS_READ, true); memset(&resp, 0, sizeof resp); @@ -4119,39 +3810,34 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); - if (!uobj) - return -EINVAL; + uobj = uverbs_get_uobject_from_context(uverbs_type_srq.alloc, + file->ucontext, + UVERBS_ACCESS_DESTROY, + cmd.srq_handle); + if (IS_ERR(uobj)) + return PTR_ERR(uobj); + srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); srq_type = srq->srq_type; ret = ib_destroy_srq(srq); - if (!ret) - uobj->live = 0; - - put_uobj_write(uobj); - - if (ret) + if (ret) { + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, false); return ret; + } if (srq_type == IB_SRQT_XRC) { us = container_of(obj, struct ib_usrq_object, uevent); atomic_dec(&us->uxrcd->refcnt); } - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); - - mutex_lock(&file->mutex); - list_del(&uobj->list); - mutex_unlock(&file->mutex); - ib_uverbs_release_uevent(file, obj); memset(&resp, 0, sizeof resp); resp.events_reported = obj->events_reported; - put_uobj(uobj); + uverbs_finalize_object(uobj, UVERBS_ACCESS_DESTROY, true); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) @@ -4192,7 +3878,7 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, if (err) return err; - copy_query_dev_fields(file, ib_dev, &resp.base, &attr); + uverbs_copy_query_dev_fields(ib_dev, &resp.base, &attr); if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) goto end; diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c new file mode 100644 index 00000000000000..81e0f69626976b --- /dev/null +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "rdma_core.h" +#include "uverbs.h" + +static int uverbs_process_attr(struct ib_device *ibdev, + struct ib_ucontext *ucontext, + const struct ib_uverbs_attr *uattr, + u16 attr_id, + const struct uverbs_attr_spec_group *attr_spec_group, + struct uverbs_attr_array *attr_array, + struct ib_uverbs_attr __user *uattr_ptr, + bool w_legacy) +{ + const struct uverbs_attr_spec *spec; + struct uverbs_attr *e; + const struct uverbs_type *type; + struct uverbs_obj_attr *o_attr; + struct uverbs_attr *elements = attr_array->attrs; + + if (uattr->reserved) + return -EINVAL; + + if (attr_id >= attr_spec_group->num_attrs) { + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) + return -EINVAL; + else + return 0; + } + + spec = &attr_spec_group->attrs[attr_id]; + e = &elements[attr_id]; + + switch (spec->type) { + case UVERBS_ATTR_TYPE_PTR_IN: + case UVERBS_ATTR_TYPE_PTR_OUT: + if (uattr->len < spec->len || + (!(spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ) && + uattr->len > spec->len)) + return -EINVAL; + + e->ptr_attr.ptr = (void * __user)uattr->data; + e->ptr_attr.len = uattr->len; + break; + + case UVERBS_ATTR_TYPE_FLAG: + e->flag_attr.flags = uattr->data; + if (uattr->len) + return -EINVAL; + if (uattr->flags & UVERBS_ATTR_F_MANDATORY && + e->flag_attr.flags & ~spec->flag.mask) + return -EINVAL; + break; + + case UVERBS_ATTR_TYPE_IDR: + case UVERBS_ATTR_TYPE_FD: + if (uattr->len != 0 || (uattr->data >> 32) || (!ucontext)) + return -EINVAL; + + o_attr = &e->obj_attr; + type = uverbs_get_type(ibdev, spec->obj.obj_type); + if (!type) + return -EINVAL; + o_attr->type = type->alloc; + o_attr->uattr = uattr_ptr; + + if (spec->type == UVERBS_ATTR_TYPE_IDR) { + o_attr->uobj.idr = (uint32_t)uattr->data; + o_attr->uobject = uverbs_get_uobject_from_context( + o_attr->type, + ucontext, + spec->obj.access, + o_attr->uobj.idr); + } else { + o_attr->fd.fd = (int)uattr->data; + o_attr->uobject = uverbs_get_uobject_from_context( + o_attr->type, + ucontext, + spec->obj.access, + o_attr->fd.fd); + } + + if (IS_ERR(o_attr->uobject)) + return -EINVAL; + + if (spec->obj.access == UVERBS_ACCESS_NEW) { + u64 idr = o_attr->uobject->id; + + if (!w_legacy) { + if (put_user(idr, &o_attr->uattr->data)) { + uverbs_finalize_object(o_attr->uobject, + UVERBS_ACCESS_NEW, + false); + return -EFAULT; + } + } else { + o_attr->uattr->data = idr; + } + } + + break; + default: + return -EOPNOTSUPP; + }; + + set_bit(attr_id, attr_array->valid_bitmap); + return 0; +} + +static int uverbs_uattrs_process(struct ib_device *ibdev, + struct ib_ucontext *ucontext, + const struct ib_uverbs_attr *uattrs, + size_t num_uattrs, + const struct uverbs_action *action, + struct uverbs_attr_array *attr_array, + struct ib_uverbs_attr __user *uattr_ptr, + bool w_legacy) +{ + size_t i; + int ret; + int num_given_groups = 0; + + for (i = 0; i < num_uattrs; i++) { + const struct ib_uverbs_attr *uattr = &uattrs[i]; + u16 attr_id = uattr->attr_id; + const struct uverbs_attr_spec_group *attr_spec_group; + + ret = uverbs_group_idx(&attr_id, action->num_groups); + if (ret < 0) { + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) + return ret; + + continue; + } + + if (ret >= num_given_groups) + num_given_groups = ret + 1; + + attr_spec_group = action->attr_groups[ret]; + ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id, + attr_spec_group, &attr_array[ret], + uattr_ptr++, w_legacy); + if (ret) { + uverbs_finalize_objects(attr_array, num_given_groups, + action, false); + return ret; + } + } + + return ret ? ret : num_given_groups; +} + +static int uverbs_validate_kernel_mandatory(const struct uverbs_action *action, + struct uverbs_attr_array *attr_array, + unsigned int num_given_groups) +{ + unsigned int i; + + for (i = 0; i < num_given_groups; i++) { + const struct uverbs_attr_spec_group *attr_spec_group = + action->attr_groups[i]; + + if (!bitmap_subset(attr_spec_group->mandatory_attrs_bitmask, + attr_array[i].valid_bitmap, + attr_spec_group->num_attrs)) { + return -EINVAL; + } + } + + return 0; +} + +static int uverbs_handle_action(struct ib_uverbs_attr __user *uattr_ptr, + const struct ib_uverbs_attr *uattrs, + size_t num_uattrs, + struct ib_device *ibdev, + struct ib_uverbs_file *ufile, + const struct uverbs_action *action, + struct uverbs_attr_array *attr_array, + bool w_legacy) +{ + int ret; + int num_given_groups; + + num_given_groups = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs, + num_uattrs, action, attr_array, + uattr_ptr, w_legacy); + if (num_given_groups <= 0) + return -EINVAL; + + ret = uverbs_validate_kernel_mandatory(action, attr_array, + num_given_groups); + if (ret) + goto cleanup; + + ret = action->handler(ibdev, ufile, attr_array, num_given_groups); +cleanup: + uverbs_finalize_objects(attr_array, num_given_groups, action, !ret); + + return ret; +} + +#define UVERBS_OPTIMIZE_USING_STACK +#ifdef UVERBS_OPTIMIZE_USING_STACK +#define UVERBS_MAX_STACK_USAGE 256 +#endif +long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct ib_uverbs_ioctl_hdr *hdr, + void __user *buf, + bool w_legacy) +{ + const struct uverbs_type *type; + const struct uverbs_action *action; + long err = 0; + unsigned int i; + struct { + struct ib_uverbs_attr *uattrs; + struct uverbs_attr_array *uverbs_attr_array; + } *ctx = NULL; + struct uverbs_attr *curr_attr; + unsigned long *curr_bitmap; + size_t ctx_size; +#ifdef UVERBS_OPTIMIZE_USING_STACK + uintptr_t data[UVERBS_MAX_STACK_USAGE / sizeof(uintptr_t)]; +#endif + + if (hdr->reserved) + return -EINVAL; + + type = uverbs_get_type(ib_dev, hdr->object_type); + if (!type) + return -EOPNOTSUPP; + + action = uverbs_get_action(type, hdr->action); + if (!action) + return -EOPNOTSUPP; + + if ((action->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext) + return -EINVAL; + + ctx_size = sizeof(*ctx) + + sizeof(struct uverbs_attr_array) * action->num_groups + + sizeof(*ctx->uattrs) * hdr->num_attrs + + sizeof(*ctx->uverbs_attr_array->attrs) * action->num_child_attrs + + sizeof(*ctx->uverbs_attr_array->valid_bitmap) * + (action->num_child_attrs / BITS_PER_LONG + + action->num_groups); + +#ifdef UVERBS_OPTIMIZE_USING_STACK + if (ctx_size <= UVERBS_MAX_STACK_USAGE) + ctx = (void *)data; + + if (!ctx) +#endif + ctx = kmalloc(ctx_size, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->uverbs_attr_array = (void *)ctx + sizeof(*ctx); + ctx->uattrs = (void *)(ctx->uverbs_attr_array + + action->num_groups); + curr_attr = (void *)(ctx->uattrs + hdr->num_attrs); + curr_bitmap = (void *)(curr_attr + action->num_child_attrs); + + /* + * We just fill the pointers and num_attrs here. The data itself will be + * filled at a later stage (uverbs_process_attr) + */ + for (i = 0; i < action->num_groups; i++) { + unsigned int curr_num_attrs = action->attr_groups[i]->num_attrs; + + ctx->uverbs_attr_array[i].attrs = curr_attr; + curr_attr += curr_num_attrs; + ctx->uverbs_attr_array[i].num_attrs = curr_num_attrs; + ctx->uverbs_attr_array[i].valid_bitmap = curr_bitmap; + bitmap_zero(curr_bitmap, curr_num_attrs); + curr_bitmap += BITS_TO_LONGS(curr_num_attrs); + } + + if (w_legacy) { + memcpy(ctx->uattrs, buf, + sizeof(*ctx->uattrs) * hdr->num_attrs); + } else { + err = copy_from_user(ctx->uattrs, buf, + sizeof(*ctx->uattrs) * hdr->num_attrs); + if (err) { + err = -EFAULT; + goto out; + } + } + + err = uverbs_handle_action(buf, ctx->uattrs, hdr->num_attrs, ib_dev, + file, action, ctx->uverbs_attr_array, + w_legacy); +out: +#ifdef UVERBS_OPTIMIZE_USING_STACK + if (ctx_size > UVERBS_MAX_STACK_USAGE) +#endif + kfree(ctx); + return err; +} + +#define IB_UVERBS_MAX_CMD_SZ 4096 + +long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_ioctl_hdr __user *user_hdr = + (struct ib_uverbs_ioctl_hdr __user *)arg; + struct ib_uverbs_ioctl_hdr hdr; + struct ib_device *ib_dev; + int srcu_key; + long err; + + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + err = -EIO; + goto out; + } + + if (cmd == RDMA_VERBS_IOCTL) { + err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + + if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ || + hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) { + err = -EINVAL; + goto out; + } + + /* currently there are no flags supported */ + if (hdr.flags) { + err = -EOPNOTSUPP; + goto out; + } + + err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, + (__user void *)arg + sizeof(hdr), + false); + } else { + err = -ENOIOCTLCMD; + } +out: + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + + return err; +} diff --git a/drivers/infiniband/core/uverbs_ioctl_cmd.c b/drivers/infiniband/core/uverbs_ioctl_cmd.c new file mode 100644 index 00000000000000..2b475753e60fba --- /dev/null +++ b/drivers/infiniband/core/uverbs_ioctl_cmd.c @@ -0,0 +1,1060 @@ +/* + * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "rdma_core.h" +#include "uverbs.h" +#include "core_priv.h" + +void uverbs_free_ah(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + ib_destroy_ah((struct ib_ah *)uobject->object); +} + +void uverbs_free_flow(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + ib_destroy_flow((struct ib_flow *)uobject->object); +} + +void uverbs_free_mw(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + uverbs_dealloc_mw((struct ib_mw *)uobject->object); +} + +void uverbs_free_qp(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + struct ib_qp *qp = uobject->object; + struct ib_uqp_object *uqp = + container_of(uobject, struct ib_uqp_object, uevent.uobject); + + if (qp != qp->real_qp) { + ib_close_qp(qp); + } else { + ib_uverbs_detach_umcast(qp, uqp); + ib_destroy_qp(qp); + } + ib_uverbs_release_uevent(uobject->context->ufile, &uqp->uevent); +} + +void uverbs_free_rwq_ind_tbl(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object; + struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; + + ib_destroy_rwq_ind_table(rwq_ind_tbl); + kfree(ind_tbl); +} + +void uverbs_free_wq(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + struct ib_wq *wq = uobject->object; + struct ib_uwq_object *uwq = + container_of(uobject, struct ib_uwq_object, uevent.uobject); + + ib_destroy_wq(wq); + ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); +} + +void uverbs_free_srq(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + struct ib_srq *srq = uobject->object; + struct ib_uevent_object *uevent = + container_of(uobject, struct ib_uevent_object, uobject); + + ib_destroy_srq(srq); + ib_uverbs_release_uevent(uobject->context->ufile, uevent); +} + +void uverbs_free_cq(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + struct ib_cq *cq = uobject->object; + struct ib_uverbs_event_file *ev_file = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobject, struct ib_ucq_object, uobject); + + ib_destroy_cq(cq); + ib_uverbs_release_ucq(uobject->context->ufile, ev_file, ucq); +} + +void uverbs_free_mr(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + ib_dereg_mr((struct ib_mr *)uobject->object); +} + +void uverbs_free_xrcd(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + struct ib_xrcd *xrcd = uobject->object; + + mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); + ib_uverbs_dealloc_xrcd(uobject->context->ufile->device, xrcd); + mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); +} + +void uverbs_free_pd(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + ib_dealloc_pd((struct ib_pd *)uobject->object); +} + +void uverbs_free_event_file(const struct uverbs_type_alloc_action *type_alloc_action, + struct ib_uobject *uobject) +{ + struct ib_uverbs_event_file *event_file = (void *)(uobject + 1); + + spin_lock_irq(&event_file->lock); + event_file->is_closed = 1; + spin_unlock_irq(&event_file->lock); + + wake_up_interruptible(&event_file->poll_wait); + kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); +}; + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_uhw_compat_spec, + UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ)), + UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ))); + +static void create_udata(struct uverbs_attr_array *ctx, size_t num, + struct ib_udata *udata) +{ + /* + * This is for ease of conversion. The purpose is to convert all drivers + * to use uverbs_attr_array instead of ib_udata. + * Assume attr == 0 is input and attr == 1 is output. + */ + void * __user inbuf; + size_t inbuf_len = 0; + void * __user outbuf; + size_t outbuf_len = 0; + + if (num >= UVERBS_UHW_NUM) { + struct uverbs_attr_array *driver = &ctx[UVERBS_UDATA_DRIVER_DATA_GROUP]; + + if (uverbs_is_valid(driver, UVERBS_UHW_IN)) { + inbuf = driver->attrs[UVERBS_UHW_IN].ptr_attr.ptr; + inbuf_len = driver->attrs[UVERBS_UHW_IN].ptr_attr.len; + } + + if (driver->num_attrs >= UVERBS_UHW_OUT && + uverbs_is_valid(driver, UVERBS_UHW_OUT)) { + outbuf = driver->attrs[UVERBS_UHW_OUT].ptr_attr.ptr; + outbuf_len = driver->attrs[UVERBS_UHW_OUT].ptr_attr.len; + } + } + INIT_UDATA_BUF_OR_NULL(udata, inbuf, outbuf, inbuf_len, outbuf_len); +} + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_get_context_spec, + UVERBS_ATTR_PTR_OUT(GET_CONTEXT_RESP, + struct ib_uverbs_get_context_resp)); + +int uverbs_get_context(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_udata uhw; + struct ib_uverbs_get_context_resp resp; + struct ib_ucontext *ucontext; + struct file *filp; + int ret; + + if (!uverbs_is_valid(common, GET_CONTEXT_RESP)) + return -EINVAL; + + /* Temporary, only until drivers get the new uverbs_attr_array */ + create_udata(ctx, num, &uhw); + + mutex_lock(&file->mutex); + + if (file->ucontext) { + ret = -EINVAL; + goto err; + } + + ucontext = ib_dev->alloc_ucontext(ib_dev, &uhw); + if (IS_ERR(ucontext)) { + ret = PTR_ERR(ucontext); + goto err; + } + + ucontext->device = ib_dev; + ret = ib_uverbs_uobject_type_initialize_ucontext(ucontext); + if (ret) + goto err_ctx; + + rcu_read_lock(); + ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); + rcu_read_unlock(); + ucontext->closing = 0; + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + ucontext->umem_tree = RB_ROOT; + init_rwsem(&ucontext->umem_rwsem); + ucontext->odp_mrs_count = 0; + INIT_LIST_HEAD(&ucontext->no_private_counters); + + if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) + ucontext->invalidate_range = NULL; + +#endif + + resp.num_comp_vectors = file->device->num_comp_vectors; + + ret = get_unused_fd_flags(O_CLOEXEC); + if (ret < 0) + goto err_free; + resp.async_fd = ret; + + filp = ib_uverbs_alloc_async_event_file(file, ib_dev); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_fd; + } + + ret = uverbs_copy_to(common, GET_CONTEXT_RESP, &resp); + if (ret) + goto err_file; + + file->ucontext = ucontext; + ucontext->ufile = file; + + fd_install(resp.async_fd, filp); + + mutex_unlock(&file->mutex); + + return 0; + +err_file: + ib_uverbs_free_async_event_file(file); + fput(filp); + +err_fd: + put_unused_fd(resp.async_fd); + +err_free: + put_pid(ucontext->tgid); + ib_uverbs_uobject_type_release_ucontext(ucontext); + +err_ctx: + ib_dev->dealloc_ucontext(ucontext); +err: + mutex_unlock(&file->mutex); + return ret; +} + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_query_device_spec, + UVERBS_ATTR_PTR_OUT(QUERY_DEVICE_RESP, struct ib_uverbs_query_device_resp), + UVERBS_ATTR_PTR_OUT(QUERY_DEVICE_ODP, struct ib_uverbs_odp_caps), + UVERBS_ATTR_PTR_OUT(QUERY_DEVICE_TIMESTAMP_MASK, u64), + UVERBS_ATTR_PTR_OUT(QUERY_DEVICE_HCA_CORE_CLOCK, u64), + UVERBS_ATTR_PTR_OUT(QUERY_DEVICE_CAP_FLAGS, u64)); + +int uverbs_query_device_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_device_attr attr = {}; + struct ib_udata uhw; + int err; + + /* Temporary, only until drivers get the new uverbs_attr_array */ + create_udata(ctx, num, &uhw); + + err = ib_dev->query_device(ib_dev, &attr, &uhw); + if (err) + return err; + + if (uverbs_is_valid(common, QUERY_DEVICE_RESP)) { + struct ib_uverbs_query_device_resp resp = {}; + + uverbs_copy_query_dev_fields(ib_dev, &resp, &attr); + if (uverbs_copy_to(common, QUERY_DEVICE_RESP, &resp)) + return -EFAULT; + } + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (uverbs_is_valid(common, QUERY_DEVICE_ODP)) { + struct ib_uverbs_odp_caps odp_caps; + + odp_caps.general_caps = attr.odp_caps.general_caps; + odp_caps.per_transport_caps.rc_odp_caps = + attr.odp_caps.per_transport_caps.rc_odp_caps; + odp_caps.per_transport_caps.uc_odp_caps = + attr.odp_caps.per_transport_caps.uc_odp_caps; + odp_caps.per_transport_caps.ud_odp_caps = + attr.odp_caps.per_transport_caps.ud_odp_caps; + + if (uverbs_copy_to(common, QUERY_DEVICE_ODP, &odp_caps)) + return -EFAULT; + } +#endif + if (uverbs_copy_to(common, QUERY_DEVICE_TIMESTAMP_MASK, + &attr.timestamp_mask) == -EFAULT) + return -EFAULT; + + if (uverbs_copy_to(common, QUERY_DEVICE_HCA_CORE_CLOCK, + &attr.hca_core_clock) == -EFAULT) + return -EFAULT; + + if (uverbs_copy_to(common, QUERY_DEVICE_CAP_FLAGS, + &attr.device_cap_flags) == -EFAULT) + return -EFAULT; + + return 0; +} + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_alloc_pd_spec, + UVERBS_ATTR_IDR(ALLOC_PD_HANDLE, UVERBS_TYPE_PD, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +int uverbs_alloc_pd_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_ucontext *ucontext = file->ucontext; + struct ib_udata uhw; + struct ib_uobject *uobject; + struct ib_pd *pd; + + /* Temporary, only until drivers get the new uverbs_attr_array */ + create_udata(ctx, num, &uhw); + + pd = ib_dev->alloc_pd(ib_dev, ucontext, &uhw); + if (IS_ERR(pd)) + return PTR_ERR(pd); + + uobject = common->attrs[ALLOC_PD_HANDLE].obj_attr.uobject; + pd->device = ib_dev; + pd->uobject = uobject; + pd->__internal_mr = NULL; + uobject->object = pd; + atomic_set(&pd->usecnt, 0); + + return 0; +} + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_reg_mr_spec, + UVERBS_ATTR_IDR(REG_MR_HANDLE, UVERBS_TYPE_MR, UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_IDR(REG_MR_PD_HANDLE, UVERBS_TYPE_PD, UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(REG_MR_CMD, struct ib_uverbs_ioctl_reg_mr, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_OUT(REG_MR_RESP, struct ib_uverbs_ioctl_reg_mr_resp, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +int uverbs_reg_mr_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_uverbs_ioctl_reg_mr cmd; + struct ib_uverbs_ioctl_reg_mr_resp resp; + struct ib_udata uhw; + struct ib_uobject *uobject; + struct ib_pd *pd; + struct ib_mr *mr; + int ret; + + if (uverbs_copy_from(&cmd, common, REG_MR_CMD)) + return -EFAULT; + + if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + return -EINVAL; + + ret = ib_check_mr_access(cmd.access_flags); + if (ret) + return ret; + + /* Temporary, only until drivers get the new uverbs_attr_array */ + create_udata(ctx, num, &uhw); + + uobject = common->attrs[REG_MR_HANDLE].obj_attr.uobject; + pd = common->attrs[REG_MR_PD_HANDLE].obj_attr.uobject->object; + + if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { + if (!(pd->device->attrs.device_cap_flags & + IB_DEVICE_ON_DEMAND_PAGING)) { + pr_debug("ODP support not available\n"); + return -EINVAL; + } + } + + mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, + cmd.access_flags, &uhw); + if (IS_ERR(mr)) + return PTR_ERR(mr); + + mr->device = pd->device; + mr->pd = pd; + mr->uobject = uobject; + atomic_inc(&pd->usecnt); + uobject->object = mr; + + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + + if (uverbs_copy_to(common, REG_MR_RESP, &resp)) { + ret = -EFAULT; + goto err; + } + + return 0; + +err: + ib_dereg_mr(mr); + return ret; +} + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_dereg_mr_spec, + UVERBS_ATTR_IDR(DEREG_MR_HANDLE, UVERBS_TYPE_MR, UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +int uverbs_dereg_mr_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_mr *mr; + + mr = common->attrs[DEREG_MR_HANDLE].obj_attr.uobject->object; + + /* dereg_mr doesn't support driver data */ + return ib_dereg_mr(mr); +}; + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_create_comp_channel_spec, + UVERBS_ATTR_FD(CREATE_COMP_CHANNEL_FD, UVERBS_TYPE_COMP_CHANNEL, + UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +int uverbs_create_comp_channel_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_uverbs_event_file *ev_file; + + ev_file = uverbs_fd_uobj_to_priv(common->attrs[CREATE_COMP_CHANNEL_FD].obj_attr.uobject); + kref_init(&ev_file->ref); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + ev_file->async_queue = NULL; + ev_file->uverbs_file = file; + ev_file->is_closed = 0; + + return 0; +} + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_create_cq_spec, + UVERBS_ATTR_IDR(CREATE_CQ_HANDLE, UVERBS_TYPE_CQ, UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(CREATE_CQ_CQE, u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(CREATE_CQ_USER_HANDLE, u64), + UVERBS_ATTR_FD(CREATE_CQ_COMP_CHANNEL, UVERBS_TYPE_COMP_CHANNEL, UVERBS_ACCESS_READ), + /* + * Currently, COMP_VECTOR is mandatory, but that could be lifted in the + * future. + */ + UVERBS_ATTR_PTR_IN(CREATE_CQ_COMP_VECTOR, u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(CREATE_CQ_FLAGS, u32), + UVERBS_ATTR_PTR_OUT(CREATE_CQ_RESP_CQE, u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +int uverbs_create_cq_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_ucontext *ucontext = file->ucontext; + struct ib_ucq_object *obj; + struct ib_udata uhw; + int ret; + u64 user_handle = 0; + struct ib_cq_init_attr attr = {}; + struct ib_cq *cq; + struct ib_uverbs_event_file *ev_file = NULL; + + ret = uverbs_copy_from(&attr.comp_vector, common, CREATE_CQ_COMP_VECTOR); + if (!ret) + ret = uverbs_copy_from(&attr.cqe, common, CREATE_CQ_CQE); + if (ret) + return ret; + + /* Optional params, if they don't exist, we get -ENOENT and skip them */ + if (uverbs_copy_from(&attr.flags, common, CREATE_CQ_FLAGS) == -EFAULT || + uverbs_copy_from(&user_handle, common, CREATE_CQ_USER_HANDLE) == -EFAULT) + return -EFAULT; + + if (uverbs_is_valid(common, CREATE_CQ_COMP_CHANNEL)) { + ev_file = uverbs_fd_uobj_to_priv(common->attrs[CREATE_CQ_COMP_CHANNEL].obj_attr.uobject); + kref_get(&ev_file->ref); + } + + if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) + return -EINVAL; + + obj = container_of(common->attrs[CREATE_CQ_HANDLE].obj_attr.uobject, + typeof(*obj), uobject); + obj->uverbs_file = ucontext->ufile; + obj->comp_events_reported = 0; + obj->async_events_reported = 0; + INIT_LIST_HEAD(&obj->comp_list); + INIT_LIST_HEAD(&obj->async_list); + + /* Temporary, only until drivers get the new uverbs_attr_array */ + create_udata(ctx, num, &uhw); + + cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); + if (IS_ERR(cq)) + return PTR_ERR(cq); + + cq->device = ib_dev; + cq->uobject = &obj->uobject; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = ev_file; + obj->uobject.object = cq; + obj->uobject.user_handle = user_handle; + atomic_set(&cq->usecnt, 0); + + ret = uverbs_copy_to(common, CREATE_CQ_RESP_CQE, &cq->cqe); + if (ret) + goto err; + + return 0; +err: + ib_destroy_cq(cq); + return ret; +}; + +static int qp_fill_attrs(struct ib_qp_init_attr *attr, struct ib_ucontext *ctx, + const struct ib_uverbs_ioctl_create_qp *cmd, + u32 create_flags) +{ + if (create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | + IB_QP_CREATE_CROSS_CHANNEL | + IB_QP_CREATE_MANAGED_SEND | + IB_QP_CREATE_MANAGED_RECV | + IB_QP_CREATE_SCATTER_FCS)) + return -EINVAL; + + attr->create_flags = create_flags; + attr->event_handler = ib_uverbs_qp_event_handler; + attr->qp_context = ctx->ufile; + attr->sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : + IB_SIGNAL_REQ_WR; + attr->qp_type = cmd->qp_type; + + attr->cap.max_send_wr = cmd->max_send_wr; + attr->cap.max_recv_wr = cmd->max_recv_wr; + attr->cap.max_send_sge = cmd->max_send_sge; + attr->cap.max_recv_sge = cmd->max_recv_sge; + attr->cap.max_inline_data = cmd->max_inline_data; + + return 0; +} + +static void qp_init_uqp(struct ib_uqp_object *obj) +{ + obj->uevent.events_reported = 0; + INIT_LIST_HEAD(&obj->uevent.event_list); + INIT_LIST_HEAD(&obj->mcast_list); +} + +static int qp_write_resp(const struct ib_qp_init_attr *attr, + const struct ib_qp *qp, + struct uverbs_attr_array *common) +{ + struct ib_uverbs_ioctl_create_qp_resp resp = { + .qpn = qp->qp_num, + .max_recv_sge = attr->cap.max_recv_sge, + .max_send_sge = attr->cap.max_send_sge, + .max_recv_wr = attr->cap.max_recv_wr, + .max_send_wr = attr->cap.max_send_wr, + .max_inline_data = attr->cap.max_inline_data}; + + return uverbs_copy_to(common, CREATE_QP_RESP, &resp); +} + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_create_qp_spec, + UVERBS_ATTR_IDR(CREATE_QP_HANDLE, UVERBS_TYPE_QP, UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_IDR(CREATE_QP_PD_HANDLE, UVERBS_TYPE_PD, UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_IDR(CREATE_QP_SEND_CQ, UVERBS_TYPE_CQ, UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_IDR(CREATE_QP_RECV_CQ, UVERBS_TYPE_CQ, UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_IDR(CREATE_QP_SRQ, UVERBS_TYPE_SRQ, UVERBS_ACCESS_READ), + UVERBS_ATTR_PTR_IN(CREATE_QP_USER_HANDLE, u64), + UVERBS_ATTR_PTR_IN(CREATE_QP_CMD, struct ib_uverbs_ioctl_create_qp), + UVERBS_ATTR_PTR_IN(CREATE_QP_CMD_FLAGS, u32), + UVERBS_ATTR_PTR_OUT(CREATE_QP_RESP, struct ib_uverbs_ioctl_create_qp_resp, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +int uverbs_create_qp_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_ucontext *ucontext = file->ucontext; + struct ib_uqp_object *obj; + struct ib_udata uhw; + int ret; + u64 user_handle = 0; + u32 create_flags = 0; + struct ib_uverbs_ioctl_create_qp cmd; + struct ib_qp_init_attr attr = {}; + struct ib_qp *qp; + struct ib_pd *pd; + + ret = uverbs_copy_from(&cmd, common, CREATE_QP_CMD); + if (ret) + return ret; + + /* Optional params */ + if (uverbs_copy_from(&create_flags, common, CREATE_QP_CMD_FLAGS) == -EFAULT || + uverbs_copy_from(&user_handle, common, CREATE_QP_USER_HANDLE) == -EFAULT) + return -EFAULT; + + if (cmd.qp_type == IB_QPT_XRC_INI) { + cmd.max_recv_wr = 0; + cmd.max_recv_sge = 0; + } + + ret = qp_fill_attrs(&attr, ucontext, &cmd, create_flags); + if (ret) + return ret; + + pd = common->attrs[CREATE_QP_PD_HANDLE].obj_attr.uobject->object; + attr.send_cq = common->attrs[CREATE_QP_SEND_CQ].obj_attr.uobject->object; + attr.recv_cq = common->attrs[CREATE_QP_RECV_CQ].obj_attr.uobject->object; + if (uverbs_is_valid(common, CREATE_QP_SRQ)) + attr.srq = common->attrs[CREATE_QP_SRQ].obj_attr.uobject->object; + obj = (struct ib_uqp_object *)common->attrs[CREATE_QP_HANDLE].obj_attr.uobject; + + if (attr.srq && attr.srq->srq_type != IB_SRQT_BASIC) + return -EINVAL; + + qp_init_uqp(obj); + create_udata(ctx, num, &uhw); + qp = pd->device->create_qp(pd, &attr, &uhw); + if (IS_ERR(qp)) + return PTR_ERR(qp); + qp->real_qp = qp; + qp->device = pd->device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_set(&qp->usecnt, 0); + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + if (attr.recv_cq) + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + qp->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = qp; + obj->uevent.uobject.user_handle = user_handle; + + ret = qp_write_resp(&attr, qp, common); + if (ret) { + ib_destroy_qp(qp); + return ret; + } + + return 0; +} + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_create_qp_xrc_tgt_spec, + UVERBS_ATTR_IDR(CREATE_QP_XRC_TGT_HANDLE, UVERBS_TYPE_QP, UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_IDR(CREATE_QP_XRC_TGT_XRCD, UVERBS_TYPE_XRCD, UVERBS_ACCESS_READ, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(CREATE_QP_XRC_TGT_USER_HANDLE, u64), + UVERBS_ATTR_PTR_IN(CREATE_QP_XRC_TGT_CMD, struct ib_uverbs_ioctl_create_qp), + UVERBS_ATTR_PTR_IN(CREATE_QP_XRC_TGT_CMD_FLAGS, u32), + UVERBS_ATTR_PTR_OUT(CREATE_QP_XRC_TGT_RESP, struct ib_uverbs_ioctl_create_qp_resp, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +int uverbs_create_qp_xrc_tgt_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_ucontext *ucontext = file->ucontext; + struct ib_uqp_object *obj; + int ret; + u64 user_handle = 0; + u32 create_flags = 0; + struct ib_uverbs_ioctl_create_qp cmd; + struct ib_qp_init_attr attr = {}; + struct ib_qp *qp; + + ret = uverbs_copy_from(&cmd, common, CREATE_QP_XRC_TGT_CMD); + if (ret) + return ret; + + /* Optional params */ + if (uverbs_copy_from(&create_flags, common, CREATE_QP_CMD_FLAGS) == -EFAULT || + uverbs_copy_from(&user_handle, common, CREATE_QP_USER_HANDLE) == -EFAULT) + return -EFAULT; + + ret = qp_fill_attrs(&attr, ucontext, &cmd, create_flags); + if (ret) + return ret; + + obj = (struct ib_uqp_object *)common->attrs[CREATE_QP_HANDLE].obj_attr.uobject; + obj->uxrcd = container_of(common->attrs[CREATE_QP_XRC_TGT_XRCD].obj_attr.uobject, + struct ib_uxrcd_object, uobject); + attr.xrcd = obj->uxrcd->uobject.object; + + qp_init_uqp(obj); + qp = ib_create_qp(NULL, &attr); + if (IS_ERR(qp)) + return PTR_ERR(qp); + qp->uobject = &obj->uevent.uobject; + obj->uevent.uobject.object = qp; + obj->uevent.uobject.user_handle = user_handle; + atomic_inc(&obj->uxrcd->refcnt); + + ret = qp_write_resp(&attr, qp, common); + if (ret) { + ib_destroy_qp(qp); + return ret; + } + + return 0; +} + +DECLARE_UVERBS_ATTR_SPEC( + uverbs_modify_qp_spec, + UVERBS_ATTR_IDR(MODIFY_QP_HANDLE, UVERBS_TYPE_QP, UVERBS_ACCESS_WRITE, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_IN(MODIFY_QP_STATE, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_CUR_STATE, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_EN_SQD_ASYNC_NOTIFY, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_ACCESS_FLAGS, u32), + UVERBS_ATTR_PTR_IN(MODIFY_QP_PKEY_INDEX, u16), + UVERBS_ATTR_PTR_IN(MODIFY_QP_PORT, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_QKEY, u32), + UVERBS_ATTR_PTR_IN(MODIFY_QP_AV, struct ib_uverbs_qp_dest), + UVERBS_ATTR_PTR_IN(MODIFY_QP_PATH_MTU, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_TIMEOUT, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_RETRY_CNT, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_RNR_RETRY, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_RQ_PSN, u32), + UVERBS_ATTR_PTR_IN(MODIFY_QP_MAX_RD_ATOMIC, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_ALT_PATH, struct ib_uverbs_qp_alt_path), + UVERBS_ATTR_PTR_IN(MODIFY_QP_MIN_RNR_TIMER, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_SQ_PSN, u32), + UVERBS_ATTR_PTR_IN(MODIFY_QP_MAX_DEST_RD_ATOMIC, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_PATH_MIG_STATE, u8), + UVERBS_ATTR_PTR_IN(MODIFY_QP_DEST_QPN, u32)); + +int uverbs_modify_qp_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_array *ctx, size_t num) +{ + struct uverbs_attr_array *common = &ctx[0]; + struct ib_udata uhw; + struct ib_qp *qp; + struct ib_qp_attr *attr; + struct ib_uverbs_qp_dest av; + struct ib_uverbs_qp_alt_path alt_path; + u32 attr_mask = 0; + int ret; + + qp = common->attrs[MODIFY_QP_HANDLE].obj_attr.uobject->object; + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + +#define MODIFY_QP_CPY(_param, _fld, _attr) \ + ({ \ + int ret = uverbs_copy_from(_fld, common, _param); \ + if (!ret) \ + attr_mask |= _attr; \ + ret == -EFAULT ? ret : 0; \ + }) + + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_STATE, &attr->qp_state, + IB_QP_STATE); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_CUR_STATE, &attr->cur_qp_state, + IB_QP_CUR_STATE); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_EN_SQD_ASYNC_NOTIFY, + &attr->en_sqd_async_notify, + IB_QP_EN_SQD_ASYNC_NOTIFY); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_ACCESS_FLAGS, + &attr->qp_access_flags, IB_QP_ACCESS_FLAGS); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_PKEY_INDEX, &attr->pkey_index, + IB_QP_PKEY_INDEX); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_PORT, &attr->port_num, IB_QP_PORT); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_QKEY, &attr->qkey, IB_QP_QKEY); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_PATH_MTU, &attr->path_mtu, + IB_QP_PATH_MTU); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_TIMEOUT, &attr->timeout, + IB_QP_TIMEOUT); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_RETRY_CNT, &attr->retry_cnt, + IB_QP_RETRY_CNT); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_RNR_RETRY, &attr->rnr_retry, + IB_QP_RNR_RETRY); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_RQ_PSN, &attr->rq_psn, + IB_QP_RQ_PSN); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_MAX_RD_ATOMIC, + &attr->max_rd_atomic, + IB_QP_MAX_QP_RD_ATOMIC); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_MIN_RNR_TIMER, + &attr->min_rnr_timer, IB_QP_MIN_RNR_TIMER); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_SQ_PSN, &attr->sq_psn, + IB_QP_SQ_PSN); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_MAX_DEST_RD_ATOMIC, + &attr->max_dest_rd_atomic, + IB_QP_MAX_DEST_RD_ATOMIC); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_PATH_MIG_STATE, + &attr->path_mig_state, IB_QP_PATH_MIG_STATE); + ret = ret ?: MODIFY_QP_CPY(MODIFY_QP_DEST_QPN, &attr->dest_qp_num, + IB_QP_DEST_QPN); + + if (ret) + goto err; + + ret = uverbs_copy_from(&av, common, MODIFY_QP_AV); + if (!ret) { + attr_mask |= IB_QP_AV; + memcpy(attr->ah_attr.grh.dgid.raw, av.dgid, 16); + attr->ah_attr.grh.flow_label = av.flow_label; + attr->ah_attr.grh.sgid_index = av.sgid_index; + attr->ah_attr.grh.hop_limit = av.hop_limit; + attr->ah_attr.grh.traffic_class = av.traffic_class; + attr->ah_attr.dlid = av.dlid; + attr->ah_attr.sl = av.sl; + attr->ah_attr.src_path_bits = av.src_path_bits; + attr->ah_attr.static_rate = av.static_rate; + attr->ah_attr.ah_flags = av.is_global ? IB_AH_GRH : 0; + attr->ah_attr.port_num = av.port_num; + } else if (ret == -EFAULT) { + goto err; + } + + ret = uverbs_copy_from(&alt_path, common, MODIFY_QP_ALT_PATH); + if (!ret) { + attr_mask |= IB_QP_ALT_PATH; + memcpy(attr->alt_ah_attr.grh.dgid.raw, alt_path.dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = alt_path.dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = alt_path.dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = alt_path.dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = alt_path.dest.traffic_class; + attr->alt_ah_attr.dlid = alt_path.dest.dlid; + attr->alt_ah_attr.sl = alt_path.dest.sl; + attr->alt_ah_attr.src_path_bits = alt_path.dest.src_path_bits; + attr->alt_ah_attr.static_rate = alt_path.dest.static_rate; + attr->alt_ah_attr.ah_flags = alt_path.dest.is_global ? IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = alt_path.dest.port_num; + attr->alt_pkey_index = alt_path.pkey_index; + attr->alt_port_num = alt_path.port_num; + attr->alt_timeout = alt_path.timeout; + } else if (ret == -EFAULT) { + goto err; + } + + create_udata(ctx, num, &uhw); + + if (qp->real_qp == qp) { + ret = ib_resolve_eth_dmac(qp, attr, &attr_mask); + if (ret) + goto err; + ret = qp->device->modify_qp(qp, attr, + modify_qp_mask(qp->qp_type, attr_mask), &uhw); + } else { + ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, attr_mask)); + } + + if (ret) + goto err; + + return 0; +err: + kfree(attr); + return ret; +} + +DECLARE_UVERBS_TYPE(uverbs_type_comp_channel, + &UVERBS_TYPE_ALLOC_FD(0, sizeof(struct ib_uobject) + sizeof(struct ib_uverbs_event_file), + uverbs_free_event_file, + &uverbs_event_fops, + "[infinibandevent]", O_RDONLY), + &UVERBS_ACTIONS( + ADD_UVERBS_ACTION(UVERBS_COMP_CHANNEL_CREATE, + uverbs_create_comp_channel_handler, + &uverbs_create_comp_channel_spec))); + +DECLARE_UVERBS_TYPE(uverbs_type_cq, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, + uverbs_free_cq), + &UVERBS_ACTIONS( + ADD_UVERBS_ACTION(UVERBS_CQ_CREATE, + uverbs_create_cq_handler, + &uverbs_create_cq_spec))); + +DECLARE_UVERBS_TYPE(uverbs_type_qp, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, + uverbs_free_qp), + &UVERBS_ACTIONS( + ADD_UVERBS_ACTION(UVERBS_QP_CREATE, + uverbs_create_qp_handler, + &uverbs_create_qp_spec), + ADD_UVERBS_ACTION(UVERBS_QP_CREATE_XRC_TGT, + uverbs_create_qp_xrc_tgt_handler, + &uverbs_create_qp_xrc_tgt_spec), + ADD_UVERBS_ACTION(UVERBS_QP_MODIFY, + uverbs_modify_qp_handler, + &uverbs_modify_qp_spec)), +); + +DECLARE_UVERBS_TYPE(uverbs_type_mw, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw), + /* TODO: implement actions for mw */ + NULL); + +DECLARE_UVERBS_TYPE(uverbs_type_mr, + /* 1 is used in order to free the MR after all the MWs */ + &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr), + &UVERBS_ACTIONS( + ADD_UVERBS_ACTION(UVERBS_MR_REG, uverbs_reg_mr_handler, + &uverbs_reg_mr_spec), + ADD_UVERBS_ACTION(UVERBS_MR_DEREG, + uverbs_dereg_mr_handler, + &uverbs_dereg_mr_spec))); + +DECLARE_UVERBS_TYPE(uverbs_type_srq, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0, + uverbs_free_srq), + /* TODO: implement actions for srq */ + NULL); + +DECLARE_UVERBS_TYPE(uverbs_type_ah, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah), + /* TODO: implement actions for ah */ + NULL); + +DECLARE_UVERBS_TYPE(uverbs_type_flow, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow), + /* TODO: implement actions for flow */ + NULL); + +DECLARE_UVERBS_TYPE(uverbs_type_wq, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0, + uverbs_free_wq), + /* TODO: implement actions for wq */ + NULL); + +DECLARE_UVERBS_TYPE(uverbs_type_rwq_ind_table, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl), + /* TODO: implement actions for rwq_ind_table */ + NULL); + +DECLARE_UVERBS_TYPE(uverbs_type_xrcd, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0, + uverbs_free_xrcd), + /* TODO: implement actions for xrcd */ + NULL); + +DECLARE_UVERBS_TYPE(uverbs_type_pd, + /* 2 is used in order to free the PD after MRs */ + &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd), + &UVERBS_ACTIONS( + ADD_UVERBS_ACTION(UVERBS_PD_ALLOC, + uverbs_alloc_pd_handler, + &uverbs_alloc_pd_spec))); + +DECLARE_UVERBS_TYPE(uverbs_type_device, NULL, + &UVERBS_ACTIONS( + ADD_UVERBS_CTX_ACTION(UVERBS_DEVICE_ALLOC_CONTEXT, + uverbs_get_context, + &uverbs_get_context_spec), + ADD_UVERBS_ACTION(UVERBS_DEVICE_QUERY, + &uverbs_query_device_handler, + &uverbs_query_device_spec))); + +DECLARE_UVERBS_TYPES(uverbs_common_types, + ADD_UVERBS_TYPE(UVERBS_TYPE_DEVICE, uverbs_type_device), + ADD_UVERBS_TYPE(UVERBS_TYPE_PD, uverbs_type_pd), + ADD_UVERBS_TYPE(UVERBS_TYPE_MR, uverbs_type_mr), + ADD_UVERBS_TYPE(UVERBS_TYPE_COMP_CHANNEL, uverbs_type_comp_channel), + ADD_UVERBS_TYPE(UVERBS_TYPE_CQ, uverbs_type_cq), + ADD_UVERBS_TYPE(UVERBS_TYPE_QP, uverbs_type_qp), + ADD_UVERBS_TYPE(UVERBS_TYPE_AH, uverbs_type_ah), + ADD_UVERBS_TYPE(UVERBS_TYPE_MW, uverbs_type_mw), + ADD_UVERBS_TYPE(UVERBS_TYPE_SRQ, uverbs_type_srq), + ADD_UVERBS_TYPE(UVERBS_TYPE_FLOW, uverbs_type_flow), + ADD_UVERBS_TYPE(UVERBS_TYPE_WQ, uverbs_type_wq), + ADD_UVERBS_TYPE(UVERBS_TYPE_RWQ_IND_TBL, + uverbs_type_rwq_ind_table), + ADD_UVERBS_TYPE(UVERBS_TYPE_XRCD, uverbs_type_xrcd), +); +EXPORT_SYMBOL(uverbs_common_types); diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c new file mode 100644 index 00000000000000..2628dc9c7caf59 --- /dev/null +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -0,0 +1,668 @@ +/* + * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "uverbs.h" + +#define UVERBS_NUM_GROUPS (UVERBS_ID_RESERVED_MASK >> UVERBS_ID_RESERVED_SHIFT) + +static const struct uverbs_type **get_next_type(const struct uverbs_type_group *types, + const struct uverbs_type **type) +{ + while (type - types->types < types->num_types && !(*type)) + type++; + + return type - types->types < types->num_types ? type : NULL; +} + +static const struct uverbs_action **get_next_action(const struct uverbs_action_group *group, + const struct uverbs_action **pcurr) +{ + while (pcurr - group->actions < group->num_actions && !(*pcurr)) + pcurr++; + + return pcurr - group->actions < group->num_actions ? pcurr : NULL; +} + +static const struct uverbs_attr_spec *get_next_attr(const struct uverbs_attr_spec_group *group, + const struct uverbs_attr_spec *pcurr) +{ + while (pcurr - group->attrs < group->num_attrs && !pcurr->type) + pcurr++; + + return pcurr - group->attrs < group->num_attrs ? pcurr : NULL; +} + +static void _free_attr_spec_group(struct uverbs_attr_spec_group **attr_group, + unsigned int num_groups) +{ + unsigned int i; + + for (i = 0; i < num_groups; i++) + kfree((void *)attr_group[i]); +} + +static void free_attr_spec_group(struct uverbs_attr_spec_group **attr_group, + unsigned int num_groups) +{ + _free_attr_spec_group(attr_group, num_groups); + kfree(attr_group); +} + +static size_t get_attrs_from_trees(const struct uverbs_action **action_arr, + unsigned int elements, + struct uverbs_attr_spec_group ***out) +{ + unsigned int group_idx; + struct uverbs_attr_spec_group *attr_spec_group[UVERBS_NUM_GROUPS]; + unsigned int max_action_specs = 0; + unsigned int i; + int ret; + + for (group_idx = 0; group_idx < UVERBS_NUM_GROUPS; group_idx++) { + const struct uverbs_attr_spec_group *attr_group_trees[elements]; + unsigned int num_attr_group_trees = 0; + const struct uverbs_attr_spec *attr_trees[elements]; + unsigned int num_attr_groups = 0; + unsigned int attrs_in_group = 0; + unsigned long *mandatory_attr_mask; + + for (i = 0; i < elements; i++) { + const struct uverbs_action *action = action_arr[i]; + + if (action->num_groups > group_idx && + action->attr_groups[group_idx]) { + const struct uverbs_attr_spec_group *spec_group = + action->attr_groups[group_idx]; + + attr_group_trees[num_attr_group_trees++] = + spec_group; + attr_trees[num_attr_groups++] = + spec_group->attrs; + if (spec_group->num_attrs > attrs_in_group) + attrs_in_group = spec_group->num_attrs; + } + } + + if (!attrs_in_group) { + attr_spec_group[group_idx] = NULL; + continue; + } + + attr_spec_group[group_idx] = + kzalloc(sizeof(*attr_spec_group[group_idx]) + + sizeof(struct uverbs_attr_spec) * attrs_in_group + + sizeof(unsigned long) * BITS_TO_LONGS(attrs_in_group), + GFP_KERNEL); + if (!attr_spec_group[group_idx]) { + ret = -ENOMEM; + goto free_groups; + } + + attr_spec_group[group_idx]->attrs = + (void *)(attr_spec_group[group_idx] + 1); + attr_spec_group[group_idx]->num_attrs = attrs_in_group; + attr_spec_group[group_idx]->mandatory_attrs_bitmask = + (void *)(attr_spec_group[group_idx]->attrs + attrs_in_group); + mandatory_attr_mask = + attr_spec_group[group_idx]->mandatory_attrs_bitmask; + + do { + unsigned int tree_idx; + bool found_next = false; + unsigned int attr_trees_idx[num_attr_groups]; + unsigned int min_attr = INT_MAX; + const struct uverbs_attr_spec *single_attr_trees[num_attr_groups]; + unsigned int num_single_attr_trees = 0; + unsigned int num_attr_trees = 0; + struct uverbs_attr_spec *allocated_attr; + enum uverbs_attr_type cur_type = UVERBS_ATTR_TYPE_NA; + unsigned int attr_type_idx = 0; + + for (tree_idx = 0; tree_idx < num_attr_group_trees; + tree_idx++) { + const struct uverbs_attr_spec *next = + get_next_attr(attr_group_trees[tree_idx], + attr_trees[tree_idx]); + + if (next) { + found_next = true; + attr_trees[num_attr_trees] = next; + attr_trees_idx[num_attr_trees] = + next - attr_group_trees[tree_idx]->attrs; + if (min_attr > attr_trees_idx[num_attr_trees]) + min_attr = attr_trees_idx[num_attr_trees]; + num_attr_trees++; + } + } + + if (!found_next) + break; + + max_action_specs = group_idx + 1; + + allocated_attr = + attr_spec_group[group_idx]->attrs + min_attr; + + for (i = 0; i < num_attr_trees; i++) { + if (attr_trees_idx[i] == min_attr) { + single_attr_trees[num_single_attr_trees++] = + attr_trees[i]; + attr_trees[i]++; + } + } + + for (i = 0; i < num_single_attr_trees; i++) + switch (cur_type) { + case UVERBS_ATTR_TYPE_NA: + cur_type = single_attr_trees[i]->type; + attr_type_idx = i; + continue; + case UVERBS_ATTR_TYPE_PTR_IN: + case UVERBS_ATTR_TYPE_PTR_OUT: + case UVERBS_ATTR_TYPE_IDR: + case UVERBS_ATTR_TYPE_FD: + if (single_attr_trees[i]->type != + UVERBS_ATTR_TYPE_NA) + WARN("%s\n", "uverbs_merge: Two types for the same attribute"); + break; + case UVERBS_ATTR_TYPE_FLAG: + if (single_attr_trees[i]->type != + UVERBS_ATTR_TYPE_FLAG && + single_attr_trees[i]->type != + UVERBS_ATTR_TYPE_NA) + WARN("%s\n", "uverbs_merge: Two types for the same attribute"); + break; + default: + WARN("%s\n", "uverbs_merge: Unknown attribute type given"); + } + + switch (cur_type) { + case UVERBS_ATTR_TYPE_PTR_IN: + case UVERBS_ATTR_TYPE_PTR_OUT: + case UVERBS_ATTR_TYPE_IDR: + case UVERBS_ATTR_TYPE_FD: + /* PTR_IN and PTR_OUT can't be merged between trees */ + memcpy(allocated_attr, + single_attr_trees[attr_type_idx], + sizeof(*allocated_attr)); + break; + case UVERBS_ATTR_TYPE_FLAG: + allocated_attr->type = + UVERBS_ATTR_TYPE_FLAG; + allocated_attr->flags = 0; + allocated_attr->flag.mask = 0; + for (i = 0; i < num_single_attr_trees; i++) { + allocated_attr->flags |= + single_attr_trees[i]->flags; + allocated_attr->flag.mask |= + single_attr_trees[i]->flag.mask; + } + break; + default: + return -EINVAL; + }; + + if (allocated_attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY) + set_bit(min_attr, mandatory_attr_mask); + } while (1); + } + + *out = kcalloc(max_action_specs, sizeof(struct uverbs_attr_spec_group *), + GFP_KERNEL); + if (!(*out)) + goto free_groups; + + for (group_idx = 0; group_idx < max_action_specs; group_idx++) + (*out)[group_idx] = attr_spec_group[group_idx]; + + return max_action_specs; + +free_groups: + _free_attr_spec_group(attr_spec_group, group_idx); + + return ret; +} + +struct action_alloc_list { + struct uverbs_action action; + unsigned int action_idx; + /* next is used in order to construct the group later on */ + struct list_head list; +}; + +static void _free_type_actions_group(struct uverbs_action_group **action_groups, + unsigned int num_groups) { + unsigned int i, j; + + for (i = 0; i < num_groups; i++) { + if (!action_groups[i]) + continue; + + for (j = 0; j < action_groups[i]->num_actions; j++) { + if (!action_groups[i]->actions[j]->attr_groups) + continue; + + free_attr_spec_group((struct uverbs_attr_spec_group **) + action_groups[i]->actions[j]->attr_groups, + action_groups[i]->actions[j]->num_groups); + kfree((void *)action_groups[i]->actions[j]); + } + kfree(action_groups[i]); + } +} + +static void free_type_actions_group(struct uverbs_action_group **action_groups, + unsigned int num_groups) +{ + _free_type_actions_group(action_groups, num_groups); + kfree(action_groups); +} + +static int get_actions_from_trees(const struct uverbs_type **type_arr, + unsigned int elements, + struct uverbs_action_group ***out) +{ + unsigned int group_idx; + struct uverbs_action_group *action_groups[UVERBS_NUM_GROUPS]; + unsigned int max_action_groups = 0; + struct uverbs_action_group **allocated_type_actions_group = NULL; + int i; + + for (group_idx = 0; group_idx < UVERBS_NUM_GROUPS; group_idx++) { + const struct uverbs_action_group *actions_group_trees[elements]; + unsigned int num_actions_group_trees = 0; + const struct uverbs_action **action_trees[elements]; + unsigned int num_action_trees = 0; + unsigned int actions_in_group = 0; + LIST_HEAD(allocated_group_list); + + for (i = 0; i < elements; i++) { + if (type_arr[i]->num_groups > group_idx && + type_arr[i]->action_groups[group_idx]) { + actions_group_trees[num_actions_group_trees++] = + type_arr[i]->action_groups[group_idx]; + action_trees[num_action_trees++] = + type_arr[i]->action_groups[group_idx]->actions; + } + } + + do { + unsigned int tree_idx; + bool found_next = false; + unsigned int action_trees_idx[num_action_trees]; + unsigned int min_action = INT_MAX; + const struct uverbs_action *single_action_trees[num_action_trees]; + unsigned int num_single_action_trees = 0; + unsigned int num_action_trees = 0; + struct action_alloc_list *allocated_action = NULL; + int ret; + + for (tree_idx = 0; tree_idx < num_actions_group_trees; + tree_idx++) { + const struct uverbs_action **next = + get_next_action(actions_group_trees[tree_idx], + action_trees[tree_idx]); + + if (!next) + continue; + + found_next = true; + action_trees[num_action_trees] = next; + action_trees_idx[num_action_trees] = + next - actions_group_trees[tree_idx]->actions; + if (min_action > action_trees_idx[num_action_trees]) + min_action = action_trees_idx[num_action_trees]; + num_action_trees++; + } + + if (!found_next) + break; + + for (i = 0; i < num_action_trees; i++) { + if (action_trees_idx[i] == min_action) { + single_action_trees[num_single_action_trees++] = + *action_trees[i]; + action_trees[i]++; + } + } + + actions_in_group = min_action + 1; + + /* Now we have an array of all attributes of the same actions */ + allocated_action = kmalloc(sizeof(*allocated_action), + GFP_KERNEL); + if (!allocated_action) + goto free_list; + + /* Take the last tree which is parameter != NULL */ + for (i = num_single_action_trees - 1; + i >= 0 && !single_action_trees[i]->handler; i--) + ; + if (WARN_ON(i < 0)) { + allocated_action->action.flags = 0; + allocated_action->action.handler = NULL; + } else { + allocated_action->action.flags = + single_action_trees[i]->flags; + allocated_action->action.handler = + single_action_trees[i]->handler; + } + allocated_action->action.num_child_attrs = 0; + + ret = get_attrs_from_trees(single_action_trees, + num_single_action_trees, + (struct uverbs_attr_spec_group ***) + &allocated_action->action.attr_groups); + if (ret < 0) { + kfree(allocated_action); + goto free_list; + } + + allocated_action->action.num_groups = ret; + + for (i = 0; i < allocated_action->action.num_groups; + allocated_action->action.num_child_attrs += + allocated_action->action.attr_groups[i]->num_attrs, i++) + ; + + allocated_action->action_idx = min_action; + list_add_tail(&allocated_action->list, + &allocated_group_list); + } while (1); + + if (!actions_in_group) { + action_groups[group_idx] = NULL; + continue; + } + + action_groups[group_idx] = + kmalloc(sizeof(*action_groups[group_idx]) + + sizeof(struct uverbs_action *) * actions_in_group, + GFP_KERNEL); + + if (!action_groups[group_idx]) + goto free_list; + + action_groups[group_idx]->num_actions = actions_in_group; + action_groups[group_idx]->actions = + (void *)(action_groups[group_idx] + 1); + { + struct action_alloc_list *iter; + + list_for_each_entry(iter, &allocated_group_list, list) + action_groups[group_idx]->actions[iter->action_idx] = + (const struct uverbs_action *)&iter->action; + } + + max_action_groups = group_idx + 1; + + continue; + +free_list: + { + struct action_alloc_list *iter, *tmp; + + list_for_each_entry_safe(iter, tmp, + &allocated_group_list, list) + kfree(iter); + + goto free_groups; + } + } + + allocated_type_actions_group = + kmalloc(sizeof(*allocated_type_actions_group) * max_action_groups, + GFP_KERNEL); + if (!allocated_type_actions_group) + goto free_groups; + + memcpy(allocated_type_actions_group, action_groups, + sizeof(*allocated_type_actions_group) * max_action_groups); + + *out = allocated_type_actions_group; + + return max_action_groups; + +free_groups: + _free_type_actions_group(action_groups, max_action_groups); + + return -ENOMEM; +} + +struct type_alloc_list { + struct uverbs_type type; + unsigned int type_idx; + /* next is used in order to construct the group later on */ + struct list_head list; +}; + +static void _free_types(struct uverbs_type_group **types, unsigned int num_types) +{ + unsigned int i, j; + + for (i = 0; i < num_types; i++) { + if (!types[i]) + continue; + + for (j = 0; j < types[i]->num_types; j++) { + if (!types[i]->types[j]) + continue; + + free_type_actions_group((struct uverbs_action_group **) + types[i]->types[j]->action_groups, + types[i]->types[j]->num_groups); + kfree((void *)types[i]->types[j]); + } + kfree(types[i]); + } +} + +struct uverbs_root *uverbs_alloc_spec_tree(unsigned int num_trees, + const struct uverbs_root_spec *trees) +{ + unsigned int group_idx; + struct uverbs_type_group *types_groups[UVERBS_NUM_GROUPS]; + unsigned int max_types_groups = 0; + struct uverbs_root *allocated_types_group = NULL; + int i; + + memset(types_groups, 0, sizeof(*types_groups)); + + for (group_idx = 0; group_idx < UVERBS_NUM_GROUPS; group_idx++) { + const struct uverbs_type **type_trees[num_trees]; + unsigned int types_in_group = 0; + LIST_HEAD(allocated_group_list); + + for (i = 0; i < num_trees; i++) + type_trees[i] = trees[i].types->types; + + do { + const struct uverbs_type *curr_type[num_trees]; + unsigned int type_trees_idx[num_trees]; + unsigned int trees_for_curr_type = 0; + unsigned int min_type = INT_MAX; + unsigned int num_type_trees = 0; + bool found_next = false; + unsigned int tree_idx; + int res; + struct type_alloc_list *allocated_type = NULL; + + for (tree_idx = 0; tree_idx < num_trees; tree_idx++) { + if (trees[tree_idx].group_id == group_idx) { + const struct uverbs_type **next = + get_next_type(trees[tree_idx].types, + type_trees[tree_idx]); + + if (!next) + continue; + + found_next = true; + type_trees[num_type_trees] = next; + type_trees_idx[num_type_trees] = + next - trees[tree_idx].types->types; + if (min_type > type_trees_idx[num_type_trees]) + min_type = type_trees_idx[num_type_trees]; + num_type_trees++; + } + } + + if (!found_next) + break; + + max_types_groups = group_idx + 1; + + for (i = 0; i < num_type_trees; i++) + /* + * We must have at least one hit here, + * as we found this min type + */ + if (type_trees_idx[i] == min_type) { + curr_type[trees_for_curr_type++] = + *type_trees[i]; + type_trees[i]++; + } + + types_in_group = min_type + 1; + + /* + * Do things for type: + * 1. Get action_groups and num_group. + * 2. Allocate uverbs_type. Copy alloc pointer + * (shallow copy) and fill in num_groups and + * action_groups. + * In order to hash them, allocate a struct of + * {uverbs_type, list_head} + * 3. Put that pointer in types_group[group_idx]. + */ + allocated_type = kmalloc(sizeof(*allocated_type), + GFP_KERNEL); + if (!allocated_type) + goto free_list; + + /* Take the last tree which is parameter != NULL */ + for (i = trees_for_curr_type - 1; + i >= 0 && !curr_type[i]->alloc; i--) + ; + if (i < 0) + allocated_type->type.alloc = NULL; + else + allocated_type->type.alloc = curr_type[i]->alloc; + + res = get_actions_from_trees(curr_type, + trees_for_curr_type, + (struct uverbs_action_group ***) + &allocated_type->type.action_groups); + if (res < 0) { + kfree(allocated_type); + goto free_list; + } + + allocated_type->type.num_groups = res; + allocated_type->type_idx = min_type; + list_add_tail(&allocated_type->list, + &allocated_group_list); + } while (1); + + if (!types_in_group) { + types_groups[group_idx] = NULL; + continue; + } + + types_groups[group_idx] = kzalloc(sizeof(*types_groups[group_idx]) + + sizeof(struct uverbs_type *) * types_in_group, + GFP_KERNEL); + if (!types_groups[group_idx]) + goto free_list; + + types_groups[group_idx]->num_types = types_in_group; + types_groups[group_idx]->types = + (void *)(types_groups[group_idx] + 1); + { + struct type_alloc_list *iter; + + list_for_each_entry(iter, &allocated_group_list, list) + types_groups[group_idx]->types[iter->type_idx] = + (const struct uverbs_type *)&iter->type; + } + + continue; + +free_list: + { + struct type_alloc_list *iter, *tmp; + + list_for_each_entry_safe(iter, tmp, + &allocated_group_list, list) + kfree(iter); + + goto free_groups; + } + } + + /* + * 1. Allocate struct uverbs_root + space for type_groups array. + * 2. Fill it with types_group + * memcpy(allocated_space + 1, types_group, + * sizeof(types_group[0]) * max_types_groups) + * 3. If anything fails goto free_groups; + */ + allocated_types_group = + kmalloc(sizeof(*allocated_types_group) + + sizeof(*allocated_types_group->type_groups) * max_types_groups, + GFP_KERNEL); + if (!allocated_types_group) + goto free_groups; + + allocated_types_group->type_groups = (void *)(allocated_types_group + 1); + memcpy(allocated_types_group->type_groups, types_groups, + sizeof(*allocated_types_group->type_groups) * max_types_groups); + allocated_types_group->num_groups = max_types_groups; + + return allocated_types_group; + +free_groups: + _free_types(types_groups, max_types_groups); + + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL(uverbs_alloc_spec_tree); + +void uverbs_specs_free(struct uverbs_root *root) +{ + _free_types((struct uverbs_type_group **)root->type_groups, + root->num_groups); + kfree(root); +} +EXPORT_SYMBOL(uverbs_specs_free); + diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 0012fa58c105de..1a3e9d471a612e 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -49,8 +49,10 @@ #include #include +#include #include "uverbs.h" +#include "rdma_core.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); @@ -66,19 +68,6 @@ enum { static struct class *uverbs_class; -DEFINE_SPINLOCK(ib_uverbs_idr_lock); -DEFINE_IDR(ib_uverbs_pd_idr); -DEFINE_IDR(ib_uverbs_mr_idr); -DEFINE_IDR(ib_uverbs_mw_idr); -DEFINE_IDR(ib_uverbs_ah_idr); -DEFINE_IDR(ib_uverbs_cq_idr); -DEFINE_IDR(ib_uverbs_qp_idr); -DEFINE_IDR(ib_uverbs_srq_idr); -DEFINE_IDR(ib_uverbs_xrcd_idr); -DEFINE_IDR(ib_uverbs_rule_idr); -DEFINE_IDR(ib_uverbs_wq_idr); -DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); - static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -166,7 +155,7 @@ static struct kobj_type ib_uverbs_dev_ktype = { .release = ib_uverbs_release_dev, }; -static void ib_uverbs_release_event_file(struct kref *ref) +static void ib_uverbs_release_async_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = container_of(ref, struct ib_uverbs_event_file, ref); @@ -174,6 +163,14 @@ static void ib_uverbs_release_event_file(struct kref *ref) kfree(file); } +static void ib_uverbs_release_event_file(struct kref *ref) +{ + struct ib_uverbs_event_file *file = + container_of(ref, struct ib_uverbs_event_file, ref); + + ib_uverbs_cleanup_fd(file); +} + void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, struct ib_ucq_object *uobj) @@ -212,8 +209,8 @@ void ib_uverbs_release_uevent(struct ib_uverbs_file *file, spin_unlock_irq(&file->async_file->lock); } -static void ib_uverbs_detach_umcast(struct ib_qp *qp, - struct ib_uqp_object *uobj) +void ib_uverbs_detach_umcast(struct ib_qp *qp, + struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; @@ -227,123 +224,9 @@ static void ib_uverbs_detach_umcast(struct ib_qp *qp, static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_ucontext *context) { - struct ib_uobject *uobj, *tmp; - context->closing = 1; - - list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { - struct ib_ah *ah = uobj->object; - - idr_remove_uobj(&ib_uverbs_ah_idr, uobj); - ib_destroy_ah(ah); - kfree(uobj); - } - - /* Remove MWs before QPs, in order to support type 2A MWs. */ - list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { - struct ib_mw *mw = uobj->object; - - idr_remove_uobj(&ib_uverbs_mw_idr, uobj); - uverbs_dealloc_mw(mw); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { - struct ib_flow *flow_id = uobj->object; - - idr_remove_uobj(&ib_uverbs_rule_idr, uobj); - ib_destroy_flow(flow_id); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { - struct ib_qp *qp = uobj->object; - struct ib_uqp_object *uqp = - container_of(uobj, struct ib_uqp_object, uevent.uobject); - - idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - if (qp != qp->real_qp) { - ib_close_qp(qp); - } else { - ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); - } - ib_uverbs_release_uevent(file, &uqp->uevent); - kfree(uqp); - } - - list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { - struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; - struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; - - idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); - ib_destroy_rwq_ind_table(rwq_ind_tbl); - kfree(ind_tbl); - kfree(uobj); - } - - list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { - struct ib_wq *wq = uobj->object; - struct ib_uwq_object *uwq = - container_of(uobj, struct ib_uwq_object, uevent.uobject); - - idr_remove_uobj(&ib_uverbs_wq_idr, uobj); - ib_destroy_wq(wq); - ib_uverbs_release_uevent(file, &uwq->uevent); - kfree(uwq); - } - - list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { - struct ib_srq *srq = uobj->object; - struct ib_uevent_object *uevent = - container_of(uobj, struct ib_uevent_object, uobject); - - idr_remove_uobj(&ib_uverbs_srq_idr, uobj); - ib_destroy_srq(srq); - ib_uverbs_release_uevent(file, uevent); - kfree(uevent); - } - - list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { - struct ib_cq *cq = uobj->object; - struct ib_uverbs_event_file *ev_file = cq->cq_context; - struct ib_ucq_object *ucq = - container_of(uobj, struct ib_ucq_object, uobject); - - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); - ib_destroy_cq(cq); - ib_uverbs_release_ucq(file, ev_file, ucq); - kfree(ucq); - } - - list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { - struct ib_mr *mr = uobj->object; - - idr_remove_uobj(&ib_uverbs_mr_idr, uobj); - ib_dereg_mr(mr); - kfree(uobj); - } - - mutex_lock(&file->device->xrcd_tree_mutex); - list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { - struct ib_xrcd *xrcd = uobj->object; - struct ib_uxrcd_object *uxrcd = - container_of(uobj, struct ib_uxrcd_object, uobject); - - idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); - ib_uverbs_dealloc_xrcd(file->device, xrcd); - kfree(uxrcd); - } - mutex_unlock(&file->device->xrcd_tree_mutex); - - list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { - struct ib_pd *pd = uobj->object; - - idr_remove_uobj(&ib_uverbs_pd_idr, uobj); - ib_dealloc_pd(pd); - kfree(uobj); - } - + ib_uverbs_uobject_type_cleanup_ucontext(context, + context->device->specs_root); put_pid(context->tgid); return context->device->dealloc_ucontext(context); @@ -354,7 +237,7 @@ static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) complete(&dev->comp); } -static void ib_uverbs_release_file(struct kref *ref) +void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); @@ -462,7 +345,7 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) return fasync_helper(fd, filp, on, &file->async_queue); } -static int ib_uverbs_event_close(struct inode *inode, struct file *filp) +static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *entry, *tmp; @@ -487,12 +370,31 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp) mutex_unlock(&file->uverbs_file->device->lists_mutex); kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + kref_put(&file->ref, ib_uverbs_release_async_event_file); + + return 0; +} + +static int ib_uverbs_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event *entry, *tmp; + + spin_lock_irq(&file->lock); + list_for_each_entry_safe(entry, tmp, &file->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); + } + spin_unlock_irq(&file->lock); + + ib_uverbs_close_fd(filp); kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } -static const struct file_operations uverbs_event_fops = { +const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, @@ -501,6 +403,15 @@ static const struct file_operations uverbs_event_fops = { .llseek = no_llseek, }; +static const struct file_operations uverbs_async_event_fops = { + .owner = THIS_MODULE, + .read = ib_uverbs_event_read, + .poll = ib_uverbs_event_poll, + .release = ib_uverbs_async_event_close, + .fasync = ib_uverbs_event_fasync, + .llseek = no_llseek, +}; + void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { struct ib_uverbs_event_file *file = cq_context; @@ -585,7 +496,8 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) struct ib_uevent_object *uobj; /* for XRC target qp's, check that qp is live */ - if (!event->element.qp->uobject || !event->element.qp->uobject->live) + if (!event->element.qp->uobject || + !uverbs_is_live(event->element.qp->uobject)) return; uobj = container_of(event->element.qp->uobject, @@ -630,13 +542,12 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) { - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file); file->async_file = NULL; } -struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, - struct ib_device *ib_dev, - int is_async) +struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, + struct ib_device *ib_dev) { struct ib_uverbs_event_file *ev_file; struct file *filp; @@ -655,7 +566,7 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, ev_file->async_queue = NULL; ev_file->is_closed = 0; - filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops, + filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops, ev_file, O_RDONLY); if (IS_ERR(filp)) goto err_put_refs; @@ -665,26 +576,25 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, &uverbs_file->device->uverbs_events_file_list); mutex_unlock(&uverbs_file->device->lists_mutex); - if (is_async) { - WARN_ON(uverbs_file->async_file); - uverbs_file->async_file = ev_file; - kref_get(&uverbs_file->async_file->ref); - INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, - ib_dev, - ib_uverbs_event_handler); - ret = ib_register_event_handler(&uverbs_file->event_handler); - if (ret) - goto err_put_file; - - /* At that point async file stuff was fully set */ - ev_file->is_async = 1; - } + WARN_ON(uverbs_file->async_file); + uverbs_file->async_file = ev_file; + kref_get(&uverbs_file->async_file->ref); + INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, + ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&uverbs_file->event_handler); + if (ret) + goto err_put_file; + + /* At that point async file stuff was fully set */ + ev_file->is_async = 1; return filp; err_put_file: fput(filp); - kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file); + kref_put(&uverbs_file->async_file->ref, + ib_uverbs_release_async_event_file); uverbs_file->async_file = NULL; return ERR_PTR(ret); @@ -694,35 +604,6 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, return filp; } -/* - * Look up a completion event file by FD. If lookup is successful, - * takes a ref to the event file struct that it returns; if - * unsuccessful, returns NULL. - */ -struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) -{ - struct ib_uverbs_event_file *ev_file = NULL; - struct fd f = fdget(fd); - - if (!f.file) - return NULL; - - if (f.file->f_op != &uverbs_event_fops) - goto out; - - ev_file = f.file->private_data; - if (ev_file->is_async) { - ev_file = NULL; - goto out; - } - - kref_get(&ev_file->ref); - -out: - fdput(f); - return ev_file; -} - static int verify_command_mask(struct ib_device *ib_dev, __u32 command) { u64 mask; @@ -1011,7 +892,8 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) mutex_unlock(&file->device->lists_mutex); if (file->async_file) - kref_put(&file->async_file->ref, ib_uverbs_release_event_file); + kref_put(&file->async_file->ref, + ib_uverbs_release_async_event_file); kref_put(&file->ref, ib_uverbs_release_file); kobject_put(&dev->kobj); @@ -1025,6 +907,9 @@ static const struct file_operations uverbs_fops = { .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, +#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) + .unlocked_ioctl = ib_uverbs_ioctl, +#endif }; static const struct file_operations uverbs_mmap_fops = { @@ -1034,6 +919,9 @@ static const struct file_operations uverbs_mmap_fops = { .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, +#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) + .unlocked_ioctl = ib_uverbs_ioctl, +#endif }; static struct ib_client uverbs_client = { @@ -1375,13 +1263,6 @@ static void __exit ib_uverbs_cleanup(void) unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); - idr_destroy(&ib_uverbs_pd_idr); - idr_destroy(&ib_uverbs_mr_idr); - idr_destroy(&ib_uverbs_mw_idr); - idr_destroy(&ib_uverbs_ah_idr); - idr_destroy(&ib_uverbs_cq_idr); - idr_destroy(&ib_uverbs_qp_idr); - idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index cba57bb53dba31..f88dd2876ea1d3 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "cxio_hal.h" #include "iwch.h" @@ -1362,6 +1363,12 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str, int iwch_register_device(struct iwch_dev *dev) { + static const struct uverbs_root_spec root_spec[] = { + [0] = { + .types = &uverbs_common_types, + .group_id = 0 + }, + }; int ret; int i; @@ -1453,9 +1460,16 @@ int iwch_register_device(struct iwch_dev *dev) memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name, sizeof(dev->ibdev.iwcm->ifname)); + dev->ibdev.specs_root = uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(dev->ibdev.specs_root)) { + ret = PTR_ERR(dev->ibdev.specs_root); + goto bail1; + } + ret = ib_register_device(&dev->ibdev, NULL); if (ret) - goto bail1; + goto dealloc_spec; for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) { ret = device_create_file(&dev->ibdev.dev, @@ -1467,6 +1481,8 @@ int iwch_register_device(struct iwch_dev *dev) return 0; bail2: ib_unregister_device(&dev->ibdev); +dealloc_spec: + uverbs_specs_free(dev->ibdev.specs_root); bail1: kfree(dev->ibdev.iwcm); return ret; @@ -1481,6 +1497,7 @@ void iwch_unregister_device(struct iwch_dev *dev) device_remove_file(&dev->ibdev.dev, iwch_class_attributes[i]); ib_unregister_device(&dev->ibdev); + uverbs_specs_free(dev->ibdev.specs_root); kfree(dev->ibdev.iwcm); return; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 645e606a17c569..b437076de52be5 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "iw_cxgb4.h" @@ -532,6 +533,12 @@ int c4iw_register_device(struct c4iw_dev *dev) { int ret; int i; + static const struct uverbs_root_spec root_spec[] = { + [0] = { + .types = &uverbs_common_types, + .group_id = 0 + }, + }; PDBG("%s c4iw_dev %p\n", __func__, dev); BUG_ON(!dev->rdev.lldi.ports[0]); @@ -623,9 +630,16 @@ int c4iw_register_device(struct c4iw_dev *dev) memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, sizeof(dev->ibdev.iwcm->ifname)); + dev->ibdev.specs_root = uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(dev->ibdev.specs_root)) { + ret = PTR_ERR(dev->ibdev.specs_root); + goto bail1; + } + ret = ib_register_device(&dev->ibdev, NULL); if (ret) - goto bail1; + goto dealloc_spec; for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) { ret = device_create_file(&dev->ibdev.dev, @@ -636,6 +650,8 @@ int c4iw_register_device(struct c4iw_dev *dev) return 0; bail2: ib_unregister_device(&dev->ibdev); +dealloc_spec: + uverbs_specs_free(dev->ibdev.specs_root); bail1: kfree(dev->ibdev.iwcm); return ret; @@ -650,6 +666,7 @@ void c4iw_unregister_device(struct c4iw_dev *dev) device_remove_file(&dev->ibdev.dev, c4iw_class_attributes[i]); ib_unregister_device(&dev->ibdev); + uverbs_specs_free(dev->ibdev.specs_root); kfree(dev->ibdev.iwcm); return; } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f64f0dde9a882c..2a6f3937684980 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_user.h" @@ -574,6 +575,7 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) unregister_inetaddr_notifier(&iboe->nb_inet); unregister_netdevice_notifier(&iboe->nb); ib_unregister_device(&hr_dev->ib_dev); + uverbs_specs_free(hr_dev->ib_dev.specs_root); } static int hns_roce_register_device(struct hns_roce_dev *hr_dev) @@ -582,6 +584,12 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) struct hns_roce_ib_iboe *iboe = NULL; struct ib_device *ib_dev = NULL; struct device *dev = &hr_dev->pdev->dev; + static const struct uverbs_root_spec root_spec[] = { + [0] = { + .types = &uverbs_common_types, + .group_id = 0 + }, + }; iboe = &hr_dev->iboe; @@ -655,10 +663,15 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* OTHERS */ ib_dev->get_port_immutable = hns_roce_port_immutable; + ib_dev->specs_root = uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(ibdev->specs_root)) + return PTR_ERR(ibdev->specs_root); + ret = ib_register_device(ib_dev, NULL); if (ret) { dev_err(dev, "ib_register_device failed!\n"); - return ret; + goto dealloc_spec; } ret = hns_roce_setup_mtu_gids(hr_dev); @@ -691,6 +704,9 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) error_failed_setup_mtu_gids: ib_unregister_device(ib_dev); +dealloc_spec: + uverbs_specs_free(ib_dev->specs_root); + return ret; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 2360338877bf68..a0e458b8b1019e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include "i40iw.h" @@ -2707,6 +2708,7 @@ static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev) device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]); ib_unregister_device(&iwibdev->ibdev); + uverbs_specs_free(iwibdev->ibdev.specs_root); } /** @@ -2732,15 +2734,28 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) { int i, ret; struct i40iw_ib_device *iwibdev; + static const struct uverbs_root_spec root_spec[] = { + [0] = { + .types = &uverbs_common_types, + .group_id = 0 + }, + }; iwdev->iwibdev = i40iw_init_rdma_device(iwdev); if (!iwdev->iwibdev) return -ENOMEM; iwibdev = iwdev->iwibdev; + iwibdev->ibdev.specs_root = uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(iwibdev->ibdev.specs_root)) + goto error; + ret = ib_register_device(&iwibdev->ibdev, NULL); - if (ret) + if (ret) { + uverbs_specs_free(iwibdev->ibdev.specs_root); goto error; + } for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) { ret = @@ -2752,6 +2767,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]); } ib_unregister_device(&iwibdev->ibdev); + uverbs_specs_free(iwibdev->ibdev.specs_root); goto error; } } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1811eb5b6aabfc..3bed26d02f40f3 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -45,6 +45,7 @@ #include #include +#include #include #include @@ -2555,6 +2556,12 @@ static void get_fw_ver_str(struct ib_device *device, char *str, static void *mlx4_ib_add(struct mlx4_dev *dev) { + static const struct uverbs_root_spec root_spec[] = { + [0] = { + .types = &uverbs_common_types, + .group_id = 0 + }, + }; struct mlx4_ib_dev *ibdev; int num_ports = 0; int i, j; @@ -2833,9 +2840,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_alloc_diag_counters(ibdev)) goto err_steer_free_bitmap; - if (ib_register_device(&ibdev->ib_dev, NULL)) + ibdev->ib_dev.specs_root = uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(ibdev->ib_dev.specs_root)) goto err_diag_counters; + if (ib_register_device(&ibdev->ib_dev, NULL)) + goto dealloc_spec; + if (mlx4_ib_mad_init(ibdev)) goto err_reg; @@ -2901,6 +2913,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) err_reg: ib_unregister_device(&ibdev->ib_dev); +dealloc_spec: + uverbs_specs_free(ibdev->ib_dev.specs_root); + err_diag_counters: mlx4_ib_diag_cleanup(ibdev); @@ -3007,6 +3022,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) mlx4_ib_close_sriov(ibdev); mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); + uverbs_specs_free(ibdev->ib_dev.specs_root); mlx4_ib_diag_cleanup(ibdev); if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 7493a83acd28dc..aa035bb82f8d4f 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o uverbs_tree.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f4160d56dc4f3b..4cc393b4a37a9e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -51,10 +51,12 @@ #include #include #include +#include #include #include #include #include "mlx5_ib.h" +#include #define DRIVER_NAME "mlx5_ib" #define DRIVER_VERSION "2.2-1" @@ -2926,6 +2928,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) const char *name; int err; int i; + static const struct uverbs_root_spec root_spec[] = { + [0] = {.types = &uverbs_common_types, + .group_id = 0}, + [1] = {.types = &mlx5_common_types, + .group_id = 0}, + }; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -3128,9 +3136,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_odp; + dev->ib_dev.specs_root = + uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(dev->ib_dev.specs_root)) + goto err_q_cnt; + err = ib_register_device(&dev->ib_dev, NULL); if (err) - goto err_q_cnt; + goto dealloc_spec; err = create_umr_res(dev); if (err) @@ -3153,6 +3167,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) err_dev: ib_unregister_device(&dev->ib_dev); +dealloc_spec: + uverbs_specs_free(dev->ib_dev.specs_root); + err_q_cnt: mlx5_ib_dealloc_q_counters(dev); @@ -3184,6 +3201,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_remove_roce_notifier(dev); ib_unregister_device(&dev->ib_dev); + uverbs_specs_free(dev->ib_dev.specs_root); mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); mlx5_ib_odp_remove_one(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1df8a67d4f0232..bb12c6670af407 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -45,6 +45,7 @@ #include #include #include +#include #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -906,6 +907,7 @@ int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); +extern const struct uverbs_type_group mlx5_common_types; static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/infiniband/hw/mlx5/uverbs_tree.c b/drivers/infiniband/hw/mlx5/uverbs_tree.c new file mode 100644 index 00000000000000..704b177f80147a --- /dev/null +++ b/drivers/infiniband/hw/mlx5/uverbs_tree.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include "mlx5_ib.h" + +DECLARE_UVERBS_ATTR_SPEC( + mlx5_spec_create_qp, + UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ)), + UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ))); + +DECLARE_UVERBS_ATTR_SPEC( + mlx5_spec_create_cq, + UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, + offsetof(struct mlx5_ib_create_cq, reserved), + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | + UVERBS_ATTR_SPEC_F_MIN_SZ)), + UVERBS_ATTR_PTR_OUT(UVERBS_UHW_OUT, __u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_ATTR_SPEC( + mlx5_spec_alloc_pd, + UVERBS_ATTR_PTR_OUT(UVERBS_UHW_OUT, struct mlx5_ib_alloc_pd_resp, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + +DECLARE_UVERBS_ATTR_SPEC( + mlx5_spec_device_query, + UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ))); +/* TODO: fix sizes */ +DECLARE_UVERBS_ATTR_SPEC( + mlx5_spec_alloc_context, + UVERBS_ATTR_PTR_IN(UVERBS_UHW_IN, struct mlx5_ib_alloc_ucontext_req, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ | + UVERBS_ATTR_SPEC_F_MANDATORY)), + UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ))); + +DECLARE_UVERBS_TYPE(mlx5_type_qp, NULL, + &UVERBS_ACTIONS( + ADD_UVERBS_ACTION(UVERBS_QP_CREATE, NULL, NULL, + &mlx5_spec_create_qp))); + +DECLARE_UVERBS_TYPE(mlx5_type_cq, NULL, + &UVERBS_ACTIONS( + ADD_UVERBS_ACTION(UVERBS_CQ_CREATE, NULL, NULL, + &mlx5_spec_create_cq))); + +DECLARE_UVERBS_TYPE(mlx5_type_pd, NULL, + &UVERBS_ACTIONS( + ADD_UVERBS_ACTION(UVERBS_PD_ALLOC, NULL, NULL, + &mlx5_spec_alloc_pd))); + +DECLARE_UVERBS_TYPE(mlx5_type_device, NULL, + &UVERBS_ACTIONS( + ADD_UVERBS_CTX_ACTION(UVERBS_DEVICE_ALLOC_CONTEXT, + NULL, NULL, + &mlx5_spec_alloc_context), + ADD_UVERBS_ACTION(UVERBS_DEVICE_QUERY, + NULL, NULL, + &mlx5_spec_device_query))); + +DECLARE_UVERBS_TYPES(mlx5_common_types, + ADD_UVERBS_TYPE(UVERBS_TYPE_DEVICE, mlx5_type_device), + ADD_UVERBS_TYPE(UVERBS_TYPE_PD, mlx5_type_pd), + ADD_UVERBS_TYPE(UVERBS_TYPE_CQ, mlx5_type_cq), + ADD_UVERBS_TYPE(UVERBS_TYPE_QP, mlx5_type_qp)); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 358930a41e36c3..899c330e528bfe 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1189,6 +1190,12 @@ static void get_dev_fw_str(struct ib_device *device, char *str, int mthca_register_device(struct mthca_dev *dev) { + static const struct uverbs_root_spec root_spec[] = { + [0] = { + .types = &uverbs_common_types, + .group_id = 0 + }, + }; int ret; int i; @@ -1294,15 +1301,23 @@ int mthca_register_device(struct mthca_dev *dev) mutex_init(&dev->cap_mask_mutex); + dev->ib_dev.specs_root = uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(dev->ib_dev.specs_root)) + return PTR_ERR(dev->ib_dev.specs_root); + ret = ib_register_device(&dev->ib_dev, NULL); - if (ret) + if (ret) { + uverbs_specs_free(dev->ib_dev.specs_root); return ret; + } for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) { ret = device_create_file(&dev->ib_dev.dev, mthca_dev_attributes[i]); if (ret) { ib_unregister_device(&dev->ib_dev); + uverbs_specs_free(dev->ib_dev.specs_root); return ret; } } @@ -1316,4 +1331,5 @@ void mthca_unregister_device(struct mthca_dev *dev) { mthca_stop_catas_poll(dev); ib_unregister_device(&dev->ib_dev); + uverbs_specs_free(dev->ib_dev.specs_root); } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index bd69125731c181..3248f04dedda73 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "nes.h" @@ -3873,10 +3874,22 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev) struct nes_vnic *nesvnic = nesibdev->nesvnic; struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; + static const struct uverbs_root_spec root_spec[] = { + [0] = { + .types = &uverbs_common_types, + .group_id = 0 + }, + }; int i, ret; + nesvnic->nesibdev->ibdev.specs_root = uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(nesvnic->nesibdev->ibdev.specs_root)) + return PTR_ERR(nesvnic->nesibdev->ibdev.specs_root); + ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL); if (ret) { + uverbs_specs_free(nesvnic->nesibdev->ibdev.specs_root); return ret; } @@ -3895,6 +3908,7 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev) nes_dev_attributes[i]); } ib_unregister_device(&nesibdev->ibdev); + uverbs_specs_free(nesibdev->ibdev.specs_root); return ret; } } @@ -3919,6 +3933,7 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev) if (nesvnic->of_device_registered) { ib_unregister_device(&nesibdev->ibdev); + uverbs_specs_free(nesibdev->ibdev.specs_root); } nesvnic->of_device_registered = 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 896071502739a8..269df05377d58a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -117,6 +118,13 @@ static void get_dev_fw_str(struct ib_device *device, char *str, static int ocrdma_register_device(struct ocrdma_dev *dev) { + int ret; + static const struct uverbs_root_spec root_spec[] = { + [0] = { + .types = &uverbs_common_types, + .group_id = 0 + }, + }; strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); @@ -218,7 +226,15 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.destroy_srq = ocrdma_destroy_srq; dev->ibdev.post_srq_recv = ocrdma_post_srq_recv; } - return ib_register_device(&dev->ibdev, NULL); + dev->ibdev.specs_root = uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(dev->ibdev.specs_root)) + return PTR_ERR(dev->ibdev.specs_root); + + ret = ib_register_device(&dev->ibdev, NULL); + if (ret) + uverbs_specs_free(dev->ibdev.specs_root); + return ret; } static int ocrdma_alloc_resources(struct ocrdma_dev *dev) @@ -381,6 +397,7 @@ static void ocrdma_remove(struct ocrdma_dev *dev) cancel_delayed_work_sync(&dev->eqd_work); ocrdma_remove_sysfiles(dev); ib_unregister_device(&dev->ibdev); + uverbs_specs_free(dev->ibdev.specs_root); ocrdma_rem_port_stats(dev); ocrdma_free_resources(dev); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index c229b9f4a52da6..4a0a83793eb94c 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -48,6 +48,7 @@ #include #include +#include #include #include "usnic_abi.h" @@ -349,6 +350,12 @@ static void usnic_get_dev_fw_str(struct ib_device *device, /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { + static const struct uverbs_root_spec root_spec[] = { + [0] = { + .types = &uverbs_common_types, + .group_id = 0 + }, + }; struct usnic_ib_dev *us_ibdev; union ib_gid gid; struct in_ifaddr *in; @@ -432,9 +439,14 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str; - if (ib_register_device(&us_ibdev->ib_dev, NULL)) + us_ibdev->ib_dev.specs_root = uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(us_ibdev->ib_dev.specs_root)) goto err_fwd_dealloc; + if (ib_register_device(&us_ibdev->ib_dev, NULL)) + goto dealloc_spec; + usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr); if (netif_carrier_ok(us_ibdev->netdev)) @@ -456,6 +468,8 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ufdev->mtu); return us_ibdev; +dealloc_spec: + uverbs_specs_free(us_ibdev->ib_dev.specs_root); err_fwd_dealloc: usnic_fwd_dev_free(us_ibdev->ufdev); err_dealloc: @@ -470,6 +484,7 @@ static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev) usnic_ib_sysfs_unregister_usdev(us_ibdev); usnic_fwd_dev_free(us_ibdev->ufdev); ib_unregister_device(&us_ibdev->ib_dev); + uverbs_specs_free(us_ibdev->ib_dev.specs_root); ib_dealloc_device(&us_ibdev->ib_dev); } diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index d430c2f7cec4ce..cdff9fe712bd64 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -47,6 +47,7 @@ #include #include +#include #include "vt.h" #include "trace.h" @@ -726,6 +727,10 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) int rvt_register_device(struct rvt_dev_info *rdi) { int ret = 0, i; + static const struct uverbs_root_spec root_spec[] = { + [0] = {.types = &uverbs_common_types, + .group_id = 0}, + }; if (!rdi) return -EINVAL; @@ -826,10 +831,18 @@ int rvt_register_device(struct rvt_dev_info *rdi) rdi->ibdev.num_comp_vectors = 1; /* We are now good to announce we exist */ + rdi->ibdev.specs_root = + uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(rdi->ibdev.specs_root)) { + ret = PTR_ERR(rdi->ibdev.specs_root); + goto bail_cq; + } + ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); - goto bail_cq; + goto bail_dealloc_specs; } rvt_create_mad_agents(rdi); @@ -837,6 +850,9 @@ int rvt_register_device(struct rvt_dev_info *rdi) rvt_pr_info(rdi, "Registration with rdmavt done.\n"); return ret; +bail_dealloc_specs: + uverbs_specs_free(rdi->ibdev.specs_root); + bail_cq: rvt_cq_exit(rdi); @@ -863,6 +879,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) rvt_free_mad_agents(rdi); ib_unregister_device(&rdi->ibdev); + uverbs_specs_free(rdi->ibdev.specs_root); rvt_cq_exit(rdi); rvt_mr_exit(rdi); rvt_qp_exit(rdi); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4552be960c6ace..4f3425bd9736c3 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -34,6 +34,7 @@ #include "rxe.h" #include "rxe_loc.h" #include "rxe_queue.h" +#include static int rxe_query_device(struct ib_device *dev, struct ib_device_attr *attr, @@ -1201,6 +1202,10 @@ int rxe_register_device(struct rxe_dev *rxe) int err; int i; struct ib_device *dev = &rxe->ib_dev; + static const struct uverbs_root_spec root_spec[] = { + [0] = {.types = &uverbs_common_types, + .group_id = 0}, + }; strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX); strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); @@ -1293,10 +1298,16 @@ int rxe_register_device(struct rxe_dev *rxe) dev->attach_mcast = rxe_attach_mcast; dev->detach_mcast = rxe_detach_mcast; + dev->specs_root = + uverbs_alloc_spec_tree(ARRAY_SIZE(root_spec), + root_spec); + if (IS_ERR(dev->specs_root)) + goto err1; + err = ib_register_device(dev, NULL); if (err) { pr_warn("rxe_register_device failed, err = %d\n", err); - goto err1; + goto err2; } for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) { @@ -1304,14 +1315,16 @@ int rxe_register_device(struct rxe_dev *rxe) if (err) { pr_warn("device_create_file failed, i = %d, err = %d\n", i, err); - goto err2; + goto err3; } } return 0; -err2: +err3: ib_unregister_device(dev); +err2: + uverbs_specs_free(dev->specs_root); err1: return err; } @@ -1325,6 +1338,7 @@ int rxe_unregister_device(struct rxe_dev *rxe) device_remove_file(&dev->dev, rxe_dev_attributes[i]); ib_unregister_device(dev); + uverbs_specs_free(dev->specs_root); return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d3fba0a56e1707..896d37f03a7e9e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1329,21 +1329,17 @@ struct ib_fmr_attr { struct ib_umem; +struct ib_ucontext_lock; + struct ib_ucontext { struct ib_device *device; - struct list_head pd_list; - struct list_head mr_list; - struct list_head mw_list; - struct list_head cq_list; - struct list_head qp_list; - struct list_head srq_list; - struct list_head ah_list; - struct list_head xrcd_list; - struct list_head rule_list; - struct list_head wq_list; - struct list_head rwq_ind_tbl_list; + struct ib_uverbs_file *ufile; int closing; + /* lock for uobjects list */ + struct ib_ucontext_lock *uobjects_lock; + struct list_head uobjects; + struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct rb_root umem_tree; @@ -1368,11 +1364,12 @@ struct ib_uobject { struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ - int id; /* index into kernel idr */ - struct kref ref; - struct rw_semaphore mutex; /* protects .live */ + int id; /* index into kernel idr/fd */ + struct rw_semaphore usecnt; /* protects exclusive access */ struct rcu_head rcu; /* kfree_rcu() overhead */ - int live; + + const struct uverbs_type_alloc_action *type; + struct ib_ucontext_lock *uobjects_lock; }; struct ib_udata { @@ -1835,6 +1832,10 @@ struct ib_device { struct iw_cm_verbs *iwcm; + struct idr idr; + /* Global lock in use to safely release device IDR */ + spinlock_t idr_lock; + /** * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the * driver initialized data. The struct is kfree()'ed by the sysfs @@ -2097,6 +2098,8 @@ struct ib_device { */ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len); + + struct uverbs_root *specs_root; }; struct ib_client { diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h new file mode 100644 index 00000000000000..4955b25da4207f --- /dev/null +++ b/include/rdma/uverbs_ioctl.h @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_IOCTL_ +#define _UVERBS_IOCTL_ + +#include +#include +#include +#include + +struct uverbs_object_type; +struct uverbs_uobject_type; + +/* + * ======================================= + * Verbs action specifications + * ======================================= + */ + +#define UVERBS_ID_RESERVED_MASK 0xF000 +#define UVERBS_ID_RESERVED_SHIFT 12 + +enum uverbs_attr_type { + UVERBS_ATTR_TYPE_NA, + UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_TYPE_PTR_OUT, + UVERBS_ATTR_TYPE_IDR, + UVERBS_ATTR_TYPE_FD, + UVERBS_ATTR_TYPE_FLAG, +}; + +enum uverbs_idr_access { + UVERBS_ACCESS_READ, + UVERBS_ACCESS_WRITE, + UVERBS_ACCESS_NEW, + UVERBS_ACCESS_DESTROY +}; + +enum uverbs_attr_spec_flags { + UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0, + UVERBS_ATTR_SPEC_F_MIN_SZ = 1U << 1, +}; + +struct uverbs_attr_spec { + enum uverbs_attr_type type; + u8 flags; + union { + u16 len; + struct { + u16 obj_type; + u8 access; + } obj; + struct { + /* flags are always 64bits */ + u64 mask; + } flag; + }; +}; + +struct uverbs_attr_spec_group { + struct uverbs_attr_spec *attrs; + size_t num_attrs; + /* populate at runtime */ + unsigned long *mandatory_attrs_bitmask; +}; + +struct uverbs_attr_array; +struct ib_uverbs_file; + +enum uverbs_action_flags { + UVERBS_ACTION_FLAG_CREATE_ROOT = 1 << 0, +}; + +struct uverbs_action { + const struct uverbs_attr_spec_group **attr_groups; + size_t num_groups; + u32 flags; + int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, + struct uverbs_attr_array *ctx, size_t num); + u16 num_child_attrs; +}; + +struct uverbs_type_alloc_action; +typedef void (*free_type)(const struct uverbs_type_alloc_action *uobject_type, + struct ib_uobject *uobject); + +struct uverbs_type_alloc_action { + enum uverbs_attr_type type; + int order; + size_t obj_size; + free_type free_fn; + struct { + const struct file_operations *fops; + const char *name; + int flags; + } fd; +}; + +struct uverbs_action_group { + size_t num_actions; + const struct uverbs_action **actions; +}; + +struct uverbs_type { + size_t num_groups; + const struct uverbs_action_group **action_groups; + const struct uverbs_type_alloc_action *alloc; +}; + +struct uverbs_type_group { + size_t num_types; + const struct uverbs_type **types; +}; + +struct uverbs_root { + const struct uverbs_type_group **type_groups; + size_t num_groups; +}; + +#define UA_FLAGS(_flags) .flags = _flags +#define UVERBS_ATTR(_id, _len, _type, ...) \ + [_id] = {.len = _len, .type = _type, ##__VA_ARGS__} +#define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...) \ + UVERBS_ATTR(_id, _len, UVERBS_ATTR_TYPE_PTR_IN, ##__VA_ARGS__) +#define UVERBS_ATTR_PTR_IN(_id, _type, ...) \ + UVERBS_ATTR_PTR_IN_SZ(_id, sizeof(_type), ##__VA_ARGS__) +#define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...) \ + UVERBS_ATTR(_id, _len, UVERBS_ATTR_TYPE_PTR_OUT, ##__VA_ARGS__) +#define UVERBS_ATTR_PTR_OUT(_id, _type, ...) \ + UVERBS_ATTR_PTR_OUT_SZ(_id, sizeof(_type), ##__VA_ARGS__) +#define UVERBS_ATTR_IDR(_id, _idr_type, _access, ...) \ + [_id] = {.type = UVERBS_ATTR_TYPE_IDR, \ + .obj = {.obj_type = _idr_type, \ + .access = _access \ + }, ##__VA_ARGS__ } +#define UVERBS_ATTR_FD(_id, _fd_type, _access, ...) \ + [_id] = {.type = UVERBS_ATTR_TYPE_FD, \ + .obj = {.obj_type = _fd_type, \ + .access = _access + BUILD_BUG_ON_ZERO( \ + _access != UVERBS_ACCESS_NEW && \ + _access != UVERBS_ACCESS_READ) \ + }, ##__VA_ARGS__ } +#define UVERBS_ATTR_FLAG(_id, _mask, ...) \ + [_id] = {.type = UVERBS_ATTR_TYPE_FLAG, \ + .flag = {.mask = _mask}, ##__VA_ARGS__ } +#define _UVERBS_ATTR_SPEC_SZ(...) \ + (sizeof((const struct uverbs_attr_spec[]){__VA_ARGS__}) / \ + sizeof(const struct uverbs_attr_spec)) +#define UVERBS_ATTR_SPEC(...) \ + ((const struct uverbs_attr_spec_group) \ + {.attrs = (struct uverbs_attr_spec[]){__VA_ARGS__}, \ + .num_attrs = _UVERBS_ATTR_SPEC_SZ(__VA_ARGS__)}) +#define DECLARE_UVERBS_ATTR_SPEC(name, ...) \ + const struct uverbs_attr_spec_group name = \ + UVERBS_ATTR_SPEC(__VA_ARGS__) +#define _UVERBS_ATTR_ACTION_SPEC_SZ(...) \ + (sizeof((const struct uverbs_attr_spec_group *[]){__VA_ARGS__}) / \ + sizeof(const struct uverbs_attr_spec_group *)) +#define _UVERBS_ACTION(_handler, _flags, ...) \ + ((const struct uverbs_action) { \ + .flags = _flags, \ + .handler = _handler, \ + .num_groups = _UVERBS_ATTR_ACTION_SPEC_SZ(__VA_ARGS__), \ + .attr_groups = (const struct uverbs_attr_spec_group *[]){__VA_ARGS__} }) +#define UVERBS_ACTION(_handler, ...) \ + _UVERBS_ACTION(_handler, 0, __VA_ARGS__) +#define UVERBS_CTX_ACTION(_handler, ...) \ + _UVERBS_ACTION(_handler, UVERBS_ACTION_FLAG_CREATE_ROOT, __VA_ARGS__) +#define _UVERBS_ACTIONS_SZ(...) \ + (sizeof((const struct uverbs_action *[]){__VA_ARGS__}) / \ + sizeof(const struct uverbs_action *)) +#define ADD_UVERBS_ACTION(action_idx, _handler, ...) \ + [action_idx] = &UVERBS_ACTION(_handler, __VA_ARGS__) +#define DECLARE_UVERBS_ACTION(name, _handler, ...) \ + const struct uverbs_action name = \ + UVERBS_ACTION(_handler, __VA_ARGS__) +#define ADD_UVERBS_CTX_ACTION(action_idx, _handler, ...) \ + [action_idx] = &UVERBS_CTX_ACTION(_handler, __VA_ARGS__) +#define DECLARE_UVERBS_CTX_ACTION(name, _handler, ...) \ + const struct uverbs_action name = \ + UVERBS_CTX_ACTION(_handler, __VA_ARGS__) +#define ADD_UVERBS_ACTION_PTR(idx, ptr) \ + [idx] = ptr +#define UVERBS_ACTIONS(...) \ + ((const struct uverbs_action_group) \ + {.num_actions = _UVERBS_ACTIONS_SZ(__VA_ARGS__), \ + .actions = (const struct uverbs_action *[]){__VA_ARGS__} }) +#define DECLARE_UVERBS_ACTIONS(name, ...) \ + const struct uverbs_type_actions_group name = \ + UVERBS_ACTIONS(__VA_ARGS__) +#define _UVERBS_ACTIONS_GROUP_SZ(...) \ + (sizeof((const struct uverbs_action_group*[]){__VA_ARGS__}) / \ + sizeof(const struct uverbs_action_group *)) +#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ + sizeof(char)) +#define UVERBS_TYPE_ALLOC_FD(_order, _obj_size, _free_fn, _fops, _name, _flags)\ + ((const struct uverbs_type_alloc_action) \ + {.type = UVERBS_ATTR_TYPE_FD, \ + .order = _order, \ + .obj_size = _obj_size + \ + UVERBS_BUILD_BUG_ON(_obj_size < sizeof(struct ib_uobject)), \ + .free_fn = _free_fn, \ + .fd = {.fops = _fops, \ + .name = _name, \ + .flags = _flags} }) +#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order, _free_fn) \ + ((const struct uverbs_type_alloc_action) \ + {.type = UVERBS_ATTR_TYPE_IDR, \ + .order = _order, \ + .free_fn = _free_fn, \ + .obj_size = _size + \ + UVERBS_BUILD_BUG_ON(_size < sizeof(struct ib_uobject)),}) +#define UVERBS_TYPE_ALLOC_IDR(_order, _free_fn) \ + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order, _free_fn) +#define DECLARE_UVERBS_TYPE(name, _alloc, ...) \ + const struct uverbs_type name = { \ + .alloc = _alloc, \ + .num_groups = _UVERBS_ACTIONS_GROUP_SZ(__VA_ARGS__), \ + .action_groups = (const struct uverbs_action_group *[]){__VA_ARGS__} \ + } +#define _UVERBS_TYPE_SZ(...) \ + (sizeof((const struct uverbs_type *[]){__VA_ARGS__}) / \ + sizeof(const struct uverbs_type *)) +#define ADD_UVERBS_TYPE_ACTIONS(type_idx, ...) \ + [type_idx] = &UVERBS_ACTIONS(__VA_ARGS__) +#define ADD_UVERBS_TYPE(type_idx, type_ptr) \ + [type_idx] = ((const struct uverbs_type * const)&type_ptr) +#define UVERBS_TYPES(...) ((const struct uverbs_type_group) \ + {.num_types = _UVERBS_TYPE_SZ(__VA_ARGS__), \ + .types = (const struct uverbs_type *[]){__VA_ARGS__} }) +#define DECLARE_UVERBS_TYPES(name, ...) \ + const struct uverbs_type_group name = UVERBS_TYPES(__VA_ARGS__) + +#define _UVERBS_TYPES_SZ(...) \ + (sizeof((const struct uverbs_type_group *[]){__VA_ARGS__}) / \ + sizeof(const struct uverbs_type_group *)) + +#define UVERBS_TYPES_GROUP(...) \ + ((const struct uverbs_root){ \ + .type_groups = (const struct uverbs_type_group *[]){__VA_ARGS__},\ + .num_groups = _UVERBS_TYPES_SZ(__VA_ARGS__)}) +#define DECLARE_UVERBS_TYPES_GROUP(name, ...) \ + const struct uverbs_root name = UVERBS_TYPES_GROUP(__VA_ARGS__) + +/* ================================================= + * Parsing infrastructure + * ================================================= + */ + +struct uverbs_ptr_attr { + void * __user ptr; + u16 len; +}; + +struct uverbs_fd_attr { + int fd; +}; + +struct uverbs_uobj_attr { + /* idr handle */ + u32 idr; +}; + +struct uverbs_flag_attr { + u64 flags; +}; + +struct uverbs_obj_attr { + /* pointer to the kernel descriptor -> type, access, etc */ + struct ib_uverbs_attr __user *uattr; + const struct uverbs_type_alloc_action *type; + struct ib_uobject *uobject; + union { + struct uverbs_fd_attr fd; + struct uverbs_uobj_attr uobj; + }; +}; + +struct uverbs_attr { + union { + struct uverbs_ptr_attr ptr_attr; + struct uverbs_obj_attr obj_attr; + struct uverbs_flag_attr flag_attr; + }; +}; + +/* output of one validator */ +struct uverbs_attr_array { + unsigned long *valid_bitmap; + size_t num_attrs; + /* arrays of attrubytes, index is the id i.e SEND_CQ */ + struct uverbs_attr *attrs; +}; + +static inline bool uverbs_is_valid(const struct uverbs_attr_array *attr_array, + unsigned int idx) +{ + return test_bit(idx, attr_array->valid_bitmap); +} + +/* TODO: Add debug version for these macros/inline func */ +static inline int uverbs_copy_to(struct uverbs_attr_array *attr_array, + size_t idx, const void *from) +{ + if (!uverbs_is_valid(attr_array, idx)) + return -ENOENT; + + return copy_to_user(attr_array->attrs[idx].ptr_attr.ptr, from, + attr_array->attrs[idx].ptr_attr.len) ? -EFAULT : 0; +} + +#define uverbs_copy_from(to, attr_array, idx) \ + (uverbs_is_valid((attr_array), idx) ? \ + (sizeof(*to) <= sizeof(((struct ib_uverbs_attr *)0)->data) ?\ + (memcpy(to, &(attr_array)->attrs[idx].ptr_attr.ptr, \ + (attr_array)->attrs[idx].ptr_attr.len), 0) : \ + (copy_from_user((to), (attr_array)->attrs[idx].ptr_attr.ptr, \ + (attr_array)->attrs[idx].ptr_attr.len) ? \ + -EFAULT : 0)) : -ENOENT) +#define uverbs_get_attr(to, attr_array, idx) \ + (uverbs_is_valid((attr_array), idx) ? \ + (sizeof(to) <= sizeof(((struct ib_uverbs_attr *)0)->data) ? \ + (sizeof(to) == sizeof((&(to))[0]) ? \ + ((to) = *(typeof(to) *)&(attr_array)->attrs[idx].ptr_attr.ptr, 0) :\ + (memcpy(&(to), &(attr_array)->attrs[idx].ptr_attr.ptr, \ + (attr_array)->attrs[idx].ptr_attr.len), 0)) : \ + (copy_from_user(&(to), (attr_array)->attrs[idx].ptr_attr.ptr, \ + (attr_array)->attrs[idx].ptr_attr.len) ? \ + -EFAULT : 0)) : -ENOENT) + +/* ================================================= + * Types infrastructure + * ================================================= + */ + +struct uverbs_root_spec { + const struct uverbs_type_group *types; + u8 group_id; +}; + +struct uverbs_root *uverbs_alloc_spec_tree(unsigned int num_trees, + const struct uverbs_root_spec *trees); +void uverbs_specs_free(struct uverbs_root *root); + +#endif diff --git a/include/rdma/uverbs_ioctl_cmd.h b/include/rdma/uverbs_ioctl_cmd.h new file mode 100644 index 00000000000000..7cff7c7ea44268 --- /dev/null +++ b/include/rdma/uverbs_ioctl_cmd.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_IOCTL_CMD_ +#define _UVERBS_IOCTL_CMD_ + +#include + +#define UVERBS_UDATA_DRIVER_DATA_GROUP 1 +#define UVERBS_UDATA_DRIVER_DATA_FLAG BIT(UVERBS_ID_RESERVED_SHIFT) + +enum { + UVERBS_UHW_IN, + UVERBS_UHW_OUT, + UVERBS_UHW_NUM +}; + +enum uverbs_common_types { + UVERBS_TYPE_DEVICE, /* No instances of DEVICE are allowed */ + UVERBS_TYPE_PD, + UVERBS_TYPE_COMP_CHANNEL, + UVERBS_TYPE_CQ, + UVERBS_TYPE_QP, + UVERBS_TYPE_SRQ, + UVERBS_TYPE_AH, + UVERBS_TYPE_MR, + UVERBS_TYPE_MW, + UVERBS_TYPE_FLOW, + UVERBS_TYPE_XRCD, + UVERBS_TYPE_RWQ_IND_TBL, + UVERBS_TYPE_WQ, + UVERBS_TYPE_LAST, +}; + +enum uverbs_create_qp_cmd_attr_ids { + CREATE_QP_HANDLE, + CREATE_QP_PD_HANDLE, + CREATE_QP_SEND_CQ, + CREATE_QP_RECV_CQ, + CREATE_QP_SRQ, + CREATE_QP_USER_HANDLE, + CREATE_QP_CMD, + CREATE_QP_CMD_FLAGS, + CREATE_QP_RESP +}; + +enum uverbs_create_cq_cmd_attr_ids { + CREATE_CQ_HANDLE, + CREATE_CQ_CQE, + CREATE_CQ_USER_HANDLE, + CREATE_CQ_COMP_CHANNEL, + CREATE_CQ_COMP_VECTOR, + CREATE_CQ_FLAGS, + CREATE_CQ_RESP_CQE, +}; + +enum uverbs_create_qp_xrc_tgt_cmd_attr_ids { + CREATE_QP_XRC_TGT_HANDLE, + CREATE_QP_XRC_TGT_XRCD, + CREATE_QP_XRC_TGT_USER_HANDLE, + CREATE_QP_XRC_TGT_CMD, + CREATE_QP_XRC_TGT_CMD_FLAGS, + CREATE_QP_XRC_TGT_RESP +}; + +enum uverbs_modify_qp_cmd_attr_ids { + MODIFY_QP_HANDLE, + MODIFY_QP_STATE, + MODIFY_QP_CUR_STATE, + MODIFY_QP_EN_SQD_ASYNC_NOTIFY, + MODIFY_QP_ACCESS_FLAGS, + MODIFY_QP_PKEY_INDEX, + MODIFY_QP_PORT, + MODIFY_QP_QKEY, + MODIFY_QP_AV, + MODIFY_QP_PATH_MTU, + MODIFY_QP_TIMEOUT, + MODIFY_QP_RETRY_CNT, + MODIFY_QP_RNR_RETRY, + MODIFY_QP_RQ_PSN, + MODIFY_QP_MAX_RD_ATOMIC, + MODIFY_QP_ALT_PATH, + MODIFY_QP_MIN_RNR_TIMER, + MODIFY_QP_SQ_PSN, + MODIFY_QP_MAX_DEST_RD_ATOMIC, + MODIFY_QP_PATH_MIG_STATE, + MODIFY_QP_DEST_QPN +}; + +enum uverbs_create_comp_channel_cmd_attr_ids { + CREATE_COMP_CHANNEL_FD, +}; + +enum uverbs_get_context_cmd_attr_ids { + GET_CONTEXT_RESP, +}; + +enum uverbs_query_device_cmd_attr_ids { + QUERY_DEVICE_RESP, + QUERY_DEVICE_ODP, + QUERY_DEVICE_TIMESTAMP_MASK, + QUERY_DEVICE_HCA_CORE_CLOCK, + QUERY_DEVICE_CAP_FLAGS, +}; + +enum uverbs_alloc_pd_cmd_attr_ids { + ALLOC_PD_HANDLE, +}; + +enum uverbs_reg_mr_cmd_attr_ids { + REG_MR_HANDLE, + REG_MR_PD_HANDLE, + REG_MR_CMD, + REG_MR_RESP +}; + +enum uverbs_dereg_mr_cmd_attr_ids { + DEREG_MR_HANDLE, +}; + +extern const struct uverbs_attr_spec_group uverbs_uhw_compat_spec; +extern const struct uverbs_attr_spec_group uverbs_get_context_spec; +extern const struct uverbs_attr_spec_group uverbs_query_device_spec; +extern const struct uverbs_attr_spec_group uverbs_alloc_pd_spec; +extern const struct uverbs_attr_spec_group uverbs_reg_mr_spec; +extern const struct uverbs_attr_spec_group uverbs_dereg_mr_spec; + +enum uverbs_actions_mr_ops { + UVERBS_MR_REG, + UVERBS_MR_DEREG, +}; + +extern const struct uverbs_action_group uverbs_actions_mr; + +enum uverbs_actions_comp_channel_ops { + UVERBS_COMP_CHANNEL_CREATE, +}; + +extern const struct uverbs_action_group uverbs_actions_comp_channel; + +enum uverbs_actions_cq_ops { + UVERBS_CQ_CREATE, +}; + +extern const struct uverbs_action_group uverbs_actions_cq; + +enum uverbs_actions_qp_ops { + UVERBS_QP_CREATE, + UVERBS_QP_CREATE_XRC_TGT, + UVERBS_QP_MODIFY, +}; + +extern const struct uverbs_action_group uverbs_actions_qp; + +enum uverbs_actions_pd_ops { + UVERBS_PD_ALLOC +}; + +extern const struct uverbs_action_group uverbs_actions_pd; + +enum uverbs_actions_device_ops { + UVERBS_DEVICE_ALLOC_CONTEXT, + UVERBS_DEVICE_QUERY, +}; + +extern const struct uverbs_action_group uverbs_actions_device; + +extern const struct uverbs_type uverbs_type_cq; +extern const struct uverbs_type uverbs_type_qp; +extern const struct uverbs_type uverbs_type_rwq_ind_table; +extern const struct uverbs_type uverbs_type_wq; +extern const struct uverbs_type uverbs_type_srq; +extern const struct uverbs_type uverbs_type_ah; +extern const struct uverbs_type uverbs_type_flow; +extern const struct uverbs_type uverbs_type_comp_channel; +extern const struct uverbs_type uverbs_type_mr; +extern const struct uverbs_type uverbs_type_mw; +extern const struct uverbs_type uverbs_type_pd; +extern const struct uverbs_type uverbs_type_xrcd; +extern const struct uverbs_type uverbs_type_device; +extern const struct uverbs_type_group uverbs_common_types; +#endif + diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index f14ab7ff5fee75..ea187484789074 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -1,5 +1,6 @@ # UAPI Header export list header-y += ib_user_cm.h +header-y += rdma_user_ioctl.h header-y += ib_user_mad.h header-y += ib_user_sa.h header-y += ib_user_verbs.h diff --git a/include/uapi/rdma/hfi/Kbuild b/include/uapi/rdma/hfi/Kbuild index ef23c294fc71f9..b65b0b3a5f632a 100644 --- a/include/uapi/rdma/hfi/Kbuild +++ b/include/uapi/rdma/hfi/Kbuild @@ -1,2 +1,3 @@ # UAPI Header export list header-y += hfi1_user.h +header-y += hfi1_ioctl.h diff --git a/include/uapi/rdma/hfi/hfi1_ioctl.h b/include/uapi/rdma/hfi/hfi1_ioctl.h new file mode 100644 index 00000000000000..4791cc8cb09b6b --- /dev/null +++ b/include/uapi/rdma/hfi/hfi1_ioctl.h @@ -0,0 +1,173 @@ +/* + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _LINUX__HFI1_IOCTL_H +#define _LINUX__HFI1_IOCTL_H +#include + +/* + * This structure is passed to the driver to tell it where + * user code buffers are, sizes, etc. The offsets and sizes of the + * fields must remain unchanged, for binary compatibility. It can + * be extended, if userversion is changed so user code can tell, if needed + */ +struct hfi1_user_info { + /* + * version of user software, to detect compatibility issues. + * Should be set to HFI1_USER_SWVERSION. + */ + __u32 userversion; + __u32 pad; + /* + * If two or more processes wish to share a context, each process + * must set the subcontext_cnt and subcontext_id to the same + * values. The only restriction on the subcontext_id is that + * it be unique for a given node. + */ + __u16 subctxt_cnt; + __u16 subctxt_id; + /* 128bit UUID passed in by PSM. */ + __u8 uuid[16]; +}; + +struct hfi1_ctxt_info { + __u64 runtime_flags; /* chip/drv runtime flags (HFI1_CAP_*) */ + __u32 rcvegr_size; /* size of each eager buffer */ + __u16 num_active; /* number of active units */ + __u16 unit; /* unit (chip) assigned to caller */ + __u16 ctxt; /* ctxt on unit assigned to caller */ + __u16 subctxt; /* subctxt on unit assigned to caller */ + __u16 rcvtids; /* number of Rcv TIDs for this context */ + __u16 credits; /* number of PIO credits for this context */ + __u16 numa_node; /* NUMA node of the assigned device */ + __u16 rec_cpu; /* cpu # for affinity (0xffff if none) */ + __u16 send_ctxt; /* send context in use by this user context */ + __u16 egrtids; /* number of RcvArray entries for Eager Rcvs */ + __u16 rcvhdrq_cnt; /* number of RcvHdrQ entries */ + __u16 rcvhdrq_entsize; /* size (in bytes) for each RcvHdrQ entry */ + __u16 sdma_ring_size; /* number of entries in SDMA request ring */ +}; + +struct hfi1_tid_info { + /* virtual address of first page in transfer */ + __u64 vaddr; + /* pointer to tid array. this array is big enough */ + __u64 tidlist; + /* number of tids programmed by this request */ + __u32 tidcnt; + /* length of transfer buffer programmed by this request */ + __u32 length; +}; + +/* + * This structure is returned by the driver immediately after + * open to get implementation-specific info, and info specific to this + * instance. + * + * This struct must have explicit pad fields where type sizes + * may result in different alignments between 32 and 64 bit + * programs, since the 64 bit * bit kernel requires the user code + * to have matching offsets + */ +struct hfi1_base_info { + /* version of hardware, for feature checking. */ + __u32 hw_version; + /* version of software, for feature checking. */ + __u32 sw_version; + /* Job key */ + __u16 jkey; + __u16 padding1; + /* + * The special QP (queue pair) value that identifies PSM + * protocol packet from standard IB packets. + */ + __u32 bthqp; + /* PIO credit return address, */ + __u64 sc_credits_addr; + /* + * Base address of write-only pio buffers for this process. + * Each buffer has sendpio_credits*64 bytes. + */ + __u64 pio_bufbase_sop; + /* + * Base address of write-only pio buffers for this process. + * Each buffer has sendpio_credits*64 bytes. + */ + __u64 pio_bufbase; + /* address where receive buffer queue is mapped into */ + __u64 rcvhdr_bufbase; + /* base address of Eager receive buffers. */ + __u64 rcvegr_bufbase; + /* base address of SDMA completion ring */ + __u64 sdma_comp_bufbase; + /* + * User register base for init code, not to be used directly by + * protocol or applications. Always maps real chip register space. + * the register addresses are: + * ur_rcvhdrhead, ur_rcvhdrtail, ur_rcvegrhead, ur_rcvegrtail, + * ur_rcvtidflow + */ + __u64 user_regbase; + /* notification events */ + __u64 events_bufbase; + /* status page */ + __u64 status_bufbase; + /* rcvhdrtail update */ + __u64 rcvhdrtail_base; + /* + * shared memory pages for subctxts if ctxt is shared; these cover + * all the processes in the group sharing a single context. + * all have enough space for the num_subcontexts value on this job. + */ + __u64 subctxt_uregbase; + __u64 subctxt_rcvegrbuf; + __u64 subctxt_rcvhdrbuf; +}; +#endif /* _LINIUX__HFI1_IOCTL_H */ diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h index d15e7289d8356e..8807f062dfd5fd 100644 --- a/include/uapi/rdma/hfi/hfi1_user.h +++ b/include/uapi/rdma/hfi/hfi1_user.h @@ -57,6 +57,7 @@ #define _LINUX__HFI1_USER_H #include +#include /* * This version number is given to the driver by the user code during @@ -112,61 +113,6 @@ #define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1) #define HFI1_RCVDHR_ENTSIZE_32 (1UL << 2) -/* User commands. */ -#define HFI1_CMD_ASSIGN_CTXT 1 /* allocate HFI and context */ -#define HFI1_CMD_CTXT_INFO 2 /* find out what resources we got */ -#define HFI1_CMD_USER_INFO 3 /* set up userspace */ -#define HFI1_CMD_TID_UPDATE 4 /* update expected TID entries */ -#define HFI1_CMD_TID_FREE 5 /* free expected TID entries */ -#define HFI1_CMD_CREDIT_UPD 6 /* force an update of PIO credit */ - -#define HFI1_CMD_RECV_CTRL 8 /* control receipt of packets */ -#define HFI1_CMD_POLL_TYPE 9 /* set the kind of polling we want */ -#define HFI1_CMD_ACK_EVENT 10 /* ack & clear user status bits */ -#define HFI1_CMD_SET_PKEY 11 /* set context's pkey */ -#define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */ -#define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */ -#define HFI1_CMD_GET_VERS 14 /* get the version of the user cdev */ - -/* - * User IOCTLs can not go above 128 if they do then see common.h and change the - * base for the snoop ioctl - */ -#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl/ioctl-number.txt */ - -/* - * Make the ioctls occupy the last 0xf0-0xff portion of the IB range - */ -#define __NUM(cmd) (HFI1_CMD_##cmd + 0xe0) - -struct hfi1_cmd; -#define HFI1_IOCTL_ASSIGN_CTXT \ - _IOWR(IB_IOCTL_MAGIC, __NUM(ASSIGN_CTXT), struct hfi1_user_info) -#define HFI1_IOCTL_CTXT_INFO \ - _IOW(IB_IOCTL_MAGIC, __NUM(CTXT_INFO), struct hfi1_ctxt_info) -#define HFI1_IOCTL_USER_INFO \ - _IOW(IB_IOCTL_MAGIC, __NUM(USER_INFO), struct hfi1_base_info) -#define HFI1_IOCTL_TID_UPDATE \ - _IOWR(IB_IOCTL_MAGIC, __NUM(TID_UPDATE), struct hfi1_tid_info) -#define HFI1_IOCTL_TID_FREE \ - _IOWR(IB_IOCTL_MAGIC, __NUM(TID_FREE), struct hfi1_tid_info) -#define HFI1_IOCTL_CREDIT_UPD \ - _IO(IB_IOCTL_MAGIC, __NUM(CREDIT_UPD)) -#define HFI1_IOCTL_RECV_CTRL \ - _IOW(IB_IOCTL_MAGIC, __NUM(RECV_CTRL), int) -#define HFI1_IOCTL_POLL_TYPE \ - _IOW(IB_IOCTL_MAGIC, __NUM(POLL_TYPE), int) -#define HFI1_IOCTL_ACK_EVENT \ - _IOW(IB_IOCTL_MAGIC, __NUM(ACK_EVENT), unsigned long) -#define HFI1_IOCTL_SET_PKEY \ - _IOW(IB_IOCTL_MAGIC, __NUM(SET_PKEY), __u16) -#define HFI1_IOCTL_CTXT_RESET \ - _IO(IB_IOCTL_MAGIC, __NUM(CTXT_RESET)) -#define HFI1_IOCTL_TID_INVAL_READ \ - _IOWR(IB_IOCTL_MAGIC, __NUM(TID_INVAL_READ), struct hfi1_tid_info) -#define HFI1_IOCTL_GET_VERS \ - _IOR(IB_IOCTL_MAGIC, __NUM(GET_VERS), int) - #define _HFI1_EVENT_FROZEN_BIT 0 #define _HFI1_EVENT_LINKDOWN_BIT 1 #define _HFI1_EVENT_LID_CHANGE_BIT 2 @@ -211,60 +157,6 @@ struct hfi1_cmd; #define HFI1_POLL_TYPE_ANYRCV 0x0 #define HFI1_POLL_TYPE_URGENT 0x1 -/* - * This structure is passed to the driver to tell it where - * user code buffers are, sizes, etc. The offsets and sizes of the - * fields must remain unchanged, for binary compatibility. It can - * be extended, if userversion is changed so user code can tell, if needed - */ -struct hfi1_user_info { - /* - * version of user software, to detect compatibility issues. - * Should be set to HFI1_USER_SWVERSION. - */ - __u32 userversion; - __u32 pad; - /* - * If two or more processes wish to share a context, each process - * must set the subcontext_cnt and subcontext_id to the same - * values. The only restriction on the subcontext_id is that - * it be unique for a given node. - */ - __u16 subctxt_cnt; - __u16 subctxt_id; - /* 128bit UUID passed in by PSM. */ - __u8 uuid[16]; -}; - -struct hfi1_ctxt_info { - __u64 runtime_flags; /* chip/drv runtime flags (HFI1_CAP_*) */ - __u32 rcvegr_size; /* size of each eager buffer */ - __u16 num_active; /* number of active units */ - __u16 unit; /* unit (chip) assigned to caller */ - __u16 ctxt; /* ctxt on unit assigned to caller */ - __u16 subctxt; /* subctxt on unit assigned to caller */ - __u16 rcvtids; /* number of Rcv TIDs for this context */ - __u16 credits; /* number of PIO credits for this context */ - __u16 numa_node; /* NUMA node of the assigned device */ - __u16 rec_cpu; /* cpu # for affinity (0xffff if none) */ - __u16 send_ctxt; /* send context in use by this user context */ - __u16 egrtids; /* number of RcvArray entries for Eager Rcvs */ - __u16 rcvhdrq_cnt; /* number of RcvHdrQ entries */ - __u16 rcvhdrq_entsize; /* size (in bytes) for each RcvHdrQ entry */ - __u16 sdma_ring_size; /* number of entries in SDMA request ring */ -}; - -struct hfi1_tid_info { - /* virtual address of first page in transfer */ - __u64 vaddr; - /* pointer to tid array. this array is big enough */ - __u64 tidlist; - /* number of tids programmed by this request */ - __u32 tidcnt; - /* length of transfer buffer programmed by this request */ - __u32 length; -}; - enum hfi1_sdma_comp_state { FREE = 0, QUEUED, @@ -289,71 +181,6 @@ struct hfi1_status { char freezemsg[0]; }; -/* - * This structure is returned by the driver immediately after - * open to get implementation-specific info, and info specific to this - * instance. - * - * This struct must have explicit pad fields where type sizes - * may result in different alignments between 32 and 64 bit - * programs, since the 64 bit * bit kernel requires the user code - * to have matching offsets - */ -struct hfi1_base_info { - /* version of hardware, for feature checking. */ - __u32 hw_version; - /* version of software, for feature checking. */ - __u32 sw_version; - /* Job key */ - __u16 jkey; - __u16 padding1; - /* - * The special QP (queue pair) value that identifies PSM - * protocol packet from standard IB packets. - */ - __u32 bthqp; - /* PIO credit return address, */ - __u64 sc_credits_addr; - /* - * Base address of write-only pio buffers for this process. - * Each buffer has sendpio_credits*64 bytes. - */ - __u64 pio_bufbase_sop; - /* - * Base address of write-only pio buffers for this process. - * Each buffer has sendpio_credits*64 bytes. - */ - __u64 pio_bufbase; - /* address where receive buffer queue is mapped into */ - __u64 rcvhdr_bufbase; - /* base address of Eager receive buffers. */ - __u64 rcvegr_bufbase; - /* base address of SDMA completion ring */ - __u64 sdma_comp_bufbase; - /* - * User register base for init code, not to be used directly by - * protocol or applications. Always maps real chip register space. - * the register addresses are: - * ur_rcvhdrhead, ur_rcvhdrtail, ur_rcvegrhead, ur_rcvegrtail, - * ur_rcvtidflow - */ - __u64 user_regbase; - /* notification events */ - __u64 events_bufbase; - /* status page */ - __u64 status_bufbase; - /* rcvhdrtail update */ - __u64 rcvhdrtail_base; - /* - * shared memory pages for subctxts if ctxt is shared; these cover - * all the processes in the group sharing a single context. - * all have enough space for the num_subcontexts value on this job. - */ - __u64 subctxt_uregbase; - __u64 subctxt_rcvegrbuf; - __u64 subctxt_rcvhdrbuf; -}; - enum sdma_req_opcode { EXPECTED = 0, EAGER diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h index 09f809f323eaaf..5c7abd859e0fda 100644 --- a/include/uapi/rdma/ib_user_mad.h +++ b/include/uapi/rdma/ib_user_mad.h @@ -35,7 +35,7 @@ #define IB_USER_MAD_H #include -#include +#include /* * Increment this value if any changes that break userspace ABI @@ -230,16 +230,4 @@ struct ib_user_mad_reg_req2 { __u8 reserved[3]; }; -#define IB_IOCTL_MAGIC 0x1b - -#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ - struct ib_user_mad_reg_req) - -#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) - -#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) - -#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(IB_IOCTL_MAGIC, 4, \ - struct ib_user_mad_reg_req2) - #endif /* IB_USER_MAD_H */ diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 25225ebbc7d525..e9d3569dbdeac8 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -317,12 +317,25 @@ struct ib_uverbs_reg_mr { __u64 driver_data[0]; }; +struct ib_uverbs_ioctl_reg_mr { + __u64 start; + __u64 length; + __u64 hca_va; + __u32 access_flags; + __u32 reserved; +}; + struct ib_uverbs_reg_mr_resp { __u32 mr_handle; __u32 lkey; __u32 rkey; }; +struct ib_uverbs_ioctl_reg_mr_resp { + __u32 lkey; + __u32 rkey; +}; + struct ib_uverbs_rereg_mr { __u64 response; __u32 mr_handle; @@ -566,6 +579,17 @@ struct ib_uverbs_ex_create_qp { __u32 reserved1; }; +struct ib_uverbs_ioctl_create_qp { + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u16 reserved; +}; + struct ib_uverbs_open_qp { __u64 response; __u64 user_handle; @@ -588,6 +612,15 @@ struct ib_uverbs_create_qp_resp { __u32 reserved; }; +struct ib_uverbs_ioctl_create_qp_resp { + __u32 qpn; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; +}; + struct ib_uverbs_ex_create_qp_resp { struct ib_uverbs_create_qp_resp base; __u32 comp_mask; @@ -613,6 +646,13 @@ struct ib_uverbs_qp_dest { __u8 port_num; }; +struct ib_uverbs_qp_alt_path { + struct ib_uverbs_qp_dest dest; + __u16 pkey_index; + __u8 port_num; + __u8 timeout; +}; + struct ib_uverbs_query_qp { __u64 response; __u32 qp_handle; diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h new file mode 100644 index 00000000000000..897fb9f4ad1a1c --- /dev/null +++ b/include/uapi/rdma/rdma_user_ioctl.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016 Mellanox Technologies, LTD. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_USER_IOCTL_H +#define RDMA_USER_IOCTL_H + +#include +#include +#include +#include + +/* Documentation/ioctl/ioctl-number.txt */ +#define RDMA_IOCTL_MAGIC 0x1b +/* Legacy name, for user space application which already use it */ +#define IB_IOCTL_MAGIC RDMA_IOCTL_MAGIC + +#define RDMA_VERBS_IOCTL \ + _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr) + +enum ib_uverbs_attr_flags { + UVERBS_ATTR_F_MANDATORY = 1U << 0, +}; + +struct ib_uverbs_attr { + __u16 attr_id; /* command specific type attribute */ + __u16 len; /* NA for idr */ + __u16 flags; /* combination of uverbs_attr_flags */ + __u16 reserved; + __u64 data; /* ptr to command, inline data, flag or idr/fd */ +}; + +struct ib_uverbs_ioctl_hdr { + __u16 length; + __u16 flags; + __u16 object_type; + __u16 reserved; /* future use for driver_id */ + __u16 action; + __u16 num_attrs; + struct ib_uverbs_attr attrs[0]; +}; + +/* + * General blocks assignments + * It is closed on purpose do not expose it it user space + * #define MAD_CMD_BASE 0x00 + * #define HFI1_CMD_BAS 0xE0 + */ + +/* MAD specific section */ +#define IB_USER_MAD_REGISTER_AGENT _IOWR(RDMA_IOCTL_MAGIC, 0x01, struct ib_user_mad_reg_req) +#define IB_USER_MAD_UNREGISTER_AGENT _IOW(RDMA_IOCTL_MAGIC, 0x02, __u32) +#define IB_USER_MAD_ENABLE_PKEY _IO(RDMA_IOCTL_MAGIC, 0x03) +#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(RDMA_IOCTL_MAGIC, 0x04, struct ib_user_mad_reg_req2) + +/* HFI specific section */ +/* allocate HFI and context */ +#define HFI1_IOCTL_ASSIGN_CTXT _IOWR(RDMA_IOCTL_MAGIC, 0xE1, struct hfi1_user_info) +/* find out what resources we got */ +#define HFI1_IOCTL_CTXT_INFO _IOW(RDMA_IOCTL_MAGIC, 0xE2, struct hfi1_ctxt_info) +/* set up userspace */ +#define HFI1_IOCTL_USER_INFO _IOW(RDMA_IOCTL_MAGIC, 0xE3, struct hfi1_base_info) +/* update expected TID entries */ +#define HFI1_IOCTL_TID_UPDATE _IOWR(RDMA_IOCTL_MAGIC, 0xE4, struct hfi1_tid_info) +/* free expected TID entries */ +#define HFI1_IOCTL_TID_FREE _IOWR(RDMA_IOCTL_MAGIC, 0xE5, struct hfi1_tid_info) +/* force an update of PIO credit */ +#define HFI1_IOCTL_CREDIT_UPD _IO(RDMA_IOCTL_MAGIC, 0xE6) +/* control receipt of packets */ +#define HFI1_IOCTL_RECV_CTRL _IOW(RDMA_IOCTL_MAGIC, 0xE8, int) +/* set the kind of polling we want */ +#define HFI1_IOCTL_POLL_TYPE _IOW(RDMA_IOCTL_MAGIC, 0xE9, int) +/* ack & clear user status bits */ +#define HFI1_IOCTL_ACK_EVENT _IOW(RDMA_IOCTL_MAGIC, 0xEA, unsigned long) +/* set context's pkey */ +#define HFI1_IOCTL_SET_PKEY _IOW(RDMA_IOCTL_MAGIC, 0xEB, __u16) +/* reset context's HW send context */ +#define HFI1_IOCTL_CTXT_RESET _IO(RDMA_IOCTL_MAGIC, 0xEC) +/* read TID cache invalidations */ +#define HFI1_IOCTL_TID_INVAL_READ _IOWR(RDMA_IOCTL_MAGIC, 0xED, struct hfi1_tid_info) +/* get the version of the user cdev */ +#define HFI1_IOCTL_GET_VERS _IOR(RDMA_IOCTL_MAGIC, 0xEE, int) + +#endif /* RDMA_USER_IOCTL_H */