}
}
-static void pkt_end_io_read(struct bio *bio, int err)
+static void pkt_end_io_read(struct bio *bio)
{
struct packet_data *pkt = bio->bi_private;
struct pktcdvd_device *pd = pkt->pd;
pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
bio, (unsigned long long)pkt->sector,
- (unsigned long long)bio->bi_iter.bi_sector, err);
+ (unsigned long long)bio->bi_iter.bi_sector, bio->bi_error);
- if (err)
+ if (bio->bi_error)
atomic_inc(&pkt->io_errors);
if (atomic_dec_and_test(&pkt->io_wait)) {
atomic_inc(&pkt->run_sm);
pkt_bio_finished(pd);
}
-static void pkt_end_io_packet_write(struct bio *bio, int err)
+static void pkt_end_io_packet_write(struct bio *bio)
{
struct packet_data *pkt = bio->bi_private;
struct pktcdvd_device *pd = pkt->pd;
BUG_ON(!pd);
- pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, err);
+ pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error);
pd->stats.pkt_ended++;
pkt_queue_bio(pd, pkt->w_bio);
}
-static void pkt_finish_packet(struct packet_data *pkt, int uptodate)
+static void pkt_finish_packet(struct packet_data *pkt, int error)
{
struct bio *bio;
- if (!uptodate)
+ if (error)
pkt->cache_valid = 0;
/* Finish all bios corresponding to this packet */
- while ((bio = bio_list_pop(&pkt->orig_bios)))
- bio_endio(bio, uptodate ? 0 : -EIO);
+ while ((bio = bio_list_pop(&pkt->orig_bios))) {
+ bio->bi_error = error;
+ bio_endio(bio);
+ }
}
static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
{
- int uptodate;
-
pkt_dbg(2, pd, "pkt %d\n", pkt->id);
for (;;) {
if (atomic_read(&pkt->io_wait) > 0)
return;
- if (test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags)) {
+ if (!pkt->w_bio->bi_error) {
pkt_set_state(pkt, PACKET_FINISHED_STATE);
} else {
pkt_set_state(pkt, PACKET_RECOVERY_STATE);
break;
case PACKET_FINISHED_STATE:
- uptodate = test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags);
- pkt_finish_packet(pkt, uptodate);
+ pkt_finish_packet(pkt, pkt->w_bio->bi_error);
return;
default:
}
-static void pkt_end_io_read_cloned(struct bio *bio, int err)
+static void pkt_end_io_read_cloned(struct bio *bio)
{
struct packet_stacked_data *psd = bio->bi_private;
struct pktcdvd_device *pd = psd->pd;
+ psd->bio->bi_error = bio->bi_error;
bio_put(bio);
- bio_endio(psd->bio, err);
+ bio_endio(psd->bio);
mempool_free(psd, psd_pool);
pkt_bio_finished(pd);
}
char b[BDEVNAME_SIZE];
struct bio *split;
+ blk_queue_bounce(q, &bio);
+
+ blk_queue_split(q, &bio, q->bio_split);
+
pd = q->queuedata;
if (!pd) {
pr_err("%s incorrect request queue\n",
goto end_io;
}
- blk_queue_bounce(q, &bio);
-
do {
sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
-static int pkt_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
- struct bio_vec *bvec)
-{
- struct pktcdvd_device *pd = q->queuedata;
- sector_t zone = get_zone(bmd->bi_sector, pd);
- int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size;
- int remaining = (pd->settings.size << 9) - used;
- int remaining2;
-
- /*
- * A bio <= PAGE_SIZE must be allowed. If it crosses a packet
- * boundary, pkt_make_request() will split the bio.
- */
- remaining2 = PAGE_SIZE - bmd->bi_size;
- remaining = max(remaining, remaining2);
-
- BUG_ON(remaining < 0);
- return remaining;
-}
-
static void pkt_init_queue(struct pktcdvd_device *pd)
{
struct request_queue *q = pd->disk->queue;
blk_queue_make_request(q, pkt_make_request);
blk_queue_logical_block_size(q, CD_FRAMESIZE);
blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
- blk_queue_merge_bvec(q, pkt_merge_bvec);
q->queuedata = pd;
}
out_mem2:
put_disk(disk);
out_mem:
- if (pd->rb_pool)
- mempool_destroy(pd->rb_pool);
+ mempool_destroy(pd->rb_pool);
kfree(pd);
out_mutex:
mutex_unlock(&ctl_mutex);
#define NETCP_SOP_OFFSET (NET_IP_ALIGN + NET_SKB_PAD)
#define NETCP_NAPI_WEIGHT 64
#define NETCP_TX_TIMEOUT (5 * HZ)
+#define NETCP_PACKET_SIZE (ETH_FRAME_LEN + ETH_FCS_LEN)
#define NETCP_MIN_PACKET_SIZE ETH_ZLEN
#define NETCP_MAX_MCAST_ADDR 16
NETIF_MSG_PKTDATA | NETIF_MSG_TX_QUEUED | \
NETIF_MSG_RX_STATUS)
+#define NETCP_EFUSE_ADDR_SWAP 2
+
#define knav_queue_get_id(q) knav_queue_device_control(q, \
KNAV_QUEUE_GET_ID, (unsigned long)NULL)
}
/* Read the e-fuse value as 32 bit values to be endian independent */
-static int emac_arch_get_mac_addr(char *x, void __iomem *efuse_mac)
+static int emac_arch_get_mac_addr(char *x, void __iomem *efuse_mac, u32 swap)
{
unsigned int addr0, addr1;
addr1 = readl(efuse_mac + 4);
addr0 = readl(efuse_mac);
+ switch (swap) {
+ case NETCP_EFUSE_ADDR_SWAP:
+ addr0 = addr1;
+ addr1 = readl(efuse_mac);
+ break;
+ default:
+ break;
+ }
+
x[0] = (addr1 & 0x0000ff00) >> 8;
x[1] = addr1 & 0x000000ff;
x[2] = (addr0 & 0xff000000) >> 24;
interface_list) {
struct netcp_intf_modpriv *intf_modpriv;
- /* If interface not registered then register now */
- if (!netcp_intf->netdev_registered)
- ret = netcp_register_interface(netcp_intf);
-
- if (ret)
- return -ENODEV;
-
intf_modpriv = devm_kzalloc(dev, sizeof(*intf_modpriv),
GFP_KERNEL);
if (!intf_modpriv)
interface = of_parse_phandle(netcp_intf->node_interface,
module->name, 0);
+ if (!interface) {
+ devm_kfree(dev, intf_modpriv);
+ continue;
+ }
+
intf_modpriv->netcp_priv = netcp_intf;
intf_modpriv->netcp_module = module;
list_add_tail(&intf_modpriv->intf_list,
continue;
}
}
+
+ /* Now register the interface with netdev */
+ list_for_each_entry(netcp_intf,
+ &netcp_device->interface_head,
+ interface_list) {
+ /* If interface not registered then register now */
+ if (!netcp_intf->netdev_registered) {
+ ret = netcp_register_interface(netcp_intf);
+ if (ret)
+ return -ENODEV;
+ }
+ }
return 0;
}
if (ret < 0)
goto fail;
}
-
mutex_unlock(&netcp_modules_lock);
return 0;
netcp->rx_pool = NULL;
}
-static void netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
+static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
{
struct knav_dma_desc *hwdesc;
unsigned int buf_len, dma_sz;
hwdesc = knav_pool_desc_get(netcp->rx_pool);
if (IS_ERR_OR_NULL(hwdesc)) {
dev_dbg(netcp->ndev_dev, "out of rx pool desc\n");
- return;
+ return -ENOMEM;
}
if (likely(fdq == 0)) {
unsigned int primary_buf_len;
/* Allocate a primary receive queue entry */
- buf_len = netcp->rx_buffer_sizes[0] + NETCP_SOP_OFFSET;
+ buf_len = NETCP_PACKET_SIZE + NETCP_SOP_OFFSET;
primary_buf_len = SKB_DATA_ALIGN(buf_len) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- if (primary_buf_len <= PAGE_SIZE) {
- bufptr = netdev_alloc_frag(primary_buf_len);
- pad[1] = primary_buf_len;
- } else {
- bufptr = kmalloc(primary_buf_len, GFP_ATOMIC |
- GFP_DMA32 | __GFP_COLD);
- pad[1] = 0;
- }
+ bufptr = netdev_alloc_frag(primary_buf_len);
+ pad[1] = primary_buf_len;
if (unlikely(!bufptr)) {
- dev_warn_ratelimited(netcp->ndev_dev, "Primary RX buffer alloc failed\n");
+ dev_warn_ratelimited(netcp->ndev_dev,
+ "Primary RX buffer alloc failed\n");
goto fail;
}
dma = dma_map_single(netcp->dev, bufptr, buf_len,
DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(netcp->dev, dma)))
+ goto fail;
+
pad[0] = (u32)bufptr;
} else {
/* Allocate a secondary receive queue entry */
- page = alloc_page(GFP_ATOMIC | GFP_DMA32 | __GFP_COLD);
+ page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
if (unlikely(!page)) {
dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n");
goto fail;
knav_pool_desc_map(netcp->rx_pool, hwdesc, sizeof(*hwdesc), &dma,
&dma_sz);
knav_queue_push(netcp->rx_fdq[fdq], dma, sizeof(*hwdesc), 0);
- return;
+ return 0;
fail:
knav_pool_desc_put(netcp->rx_pool, hwdesc);
+ return -ENOMEM;
}
/* Refill Rx FDQ with descriptors & attached buffers */
static void netcp_rxpool_refill(struct netcp_intf *netcp)
{
u32 fdq_deficit[KNAV_DMA_FDQ_PER_CHAN] = {0};
- int i;
+ int i, ret = 0;
/* Calculate the FDQ deficit and refill */
for (i = 0; i < KNAV_DMA_FDQ_PER_CHAN && netcp->rx_fdq[i]; i++) {
fdq_deficit[i] = netcp->rx_queue_depths[i] -
knav_queue_get_count(netcp->rx_fdq[i]);
- while (fdq_deficit[i]--)
- netcp_allocate_rx_buf(netcp, i);
+ while (fdq_deficit[i]-- && !ret)
+ ret = netcp_allocate_rx_buf(netcp, i);
} /* end for fdqs */
}
packets = netcp_process_rx_packets(netcp, budget);
+ netcp_rxpool_refill(netcp);
if (packets < budget) {
napi_complete(&netcp->rx_napi);
knav_queue_enable_notify(netcp->rx_queue);
}
- netcp_rxpool_refill(netcp);
return packets;
}
/* Map the linear buffer */
dma_addr = dma_map_single(dev, skb->data, pkt_len, DMA_TO_DEVICE);
- if (unlikely(!dma_addr)) {
+ if (unlikely(dma_mapping_error(dev, dma_addr))) {
dev_err(netcp->ndev_dev, "Failed to map skb buffer\n");
return NULL;
}
desc = knav_pool_desc_get(netcp->tx_pool);
- if (unlikely(IS_ERR_OR_NULL(desc))) {
+ if (IS_ERR_OR_NULL(desc)) {
dev_err(netcp->ndev_dev, "out of TX desc\n");
dma_unmap_single(dev, dma_addr, pkt_len, DMA_TO_DEVICE);
return NULL;
}
ndesc = knav_pool_desc_get(netcp->tx_pool);
- if (unlikely(IS_ERR_OR_NULL(ndesc))) {
+ if (IS_ERR_OR_NULL(ndesc)) {
dev_err(netcp->ndev_dev, "out of TX desc for frags\n");
dma_unmap_page(dev, dma_addr, buf_len, DMA_TO_DEVICE);
goto free_descs;
continue;
dev_dbg(netcp->ndev_dev, "deleting address %pM, type %x\n",
naddr->addr, naddr->type);
- mutex_lock(&netcp_modules_lock);
for_each_module(netcp, priv) {
module = priv->netcp_module;
if (!module->del_addr)
naddr);
WARN_ON(error);
}
- mutex_unlock(&netcp_modules_lock);
netcp_addr_del(netcp, naddr);
}
}
continue;
dev_dbg(netcp->ndev_dev, "adding address %pM, type %x\n",
naddr->addr, naddr->type);
- mutex_lock(&netcp_modules_lock);
+
for_each_module(netcp, priv) {
module = priv->netcp_module;
if (!module->add_addr)
error = module->add_addr(priv->module_priv, naddr);
WARN_ON(error);
}
- mutex_unlock(&netcp_modules_lock);
}
}
ndev->flags & IFF_ALLMULTI ||
netdev_mc_count(ndev) > NETCP_MAX_MCAST_ADDR);
+ spin_lock(&netcp->lock);
/* first clear all marks */
netcp_addr_clear_mark(netcp);
/* finally sweep and callout into modules */
netcp_addr_sweep_del(netcp);
netcp_addr_sweep_add(netcp);
+ spin_unlock(&netcp->lock);
}
static void netcp_free_navigator_resources(struct netcp_intf *netcp)
knav_queue_disable_notify(netcp->rx_queue);
/* open Rx FDQs */
- for (i = 0; i < KNAV_DMA_FDQ_PER_CHAN &&
- netcp->rx_queue_depths[i] && netcp->rx_buffer_sizes[i]; ++i) {
+ for (i = 0; i < KNAV_DMA_FDQ_PER_CHAN && netcp->rx_queue_depths[i];
+ ++i) {
snprintf(name, sizeof(name), "rx-fdq-%s-%d", ndev->name, i);
netcp->rx_fdq[i] = knav_queue_open(name, KNAV_QUEUE_GP, 0);
if (IS_ERR_OR_NULL(netcp->rx_fdq[i])) {
goto fail;
}
- mutex_lock(&netcp_modules_lock);
for_each_module(netcp, intf_modpriv) {
module = intf_modpriv->netcp_module;
if (module->open) {
}
}
}
- mutex_unlock(&netcp_modules_lock);
napi_enable(&netcp->rx_napi);
napi_enable(&netcp->tx_napi);
if (module->close)
module->close(intf_modpriv->module_priv, ndev);
}
- mutex_unlock(&netcp_modules_lock);
fail:
netcp_free_navigator_resources(netcp);
napi_disable(&netcp->rx_napi);
napi_disable(&netcp->tx_napi);
- mutex_lock(&netcp_modules_lock);
for_each_module(netcp, intf_modpriv) {
module = intf_modpriv->netcp_module;
if (module->close) {
dev_err(netcp->ndev_dev, "Close failed\n");
}
}
- mutex_unlock(&netcp_modules_lock);
/* Recycle Rx descriptors from completion queue */
netcp_empty_rx_queue(netcp);
if (!netif_running(ndev))
return -EINVAL;
- mutex_lock(&netcp_modules_lock);
for_each_module(netcp, intf_modpriv) {
module = intf_modpriv->netcp_module;
if (!module->ioctl)
}
out:
- mutex_unlock(&netcp_modules_lock);
return (ret == 0) ? 0 : err;
}
struct netcp_intf *netcp = netdev_priv(ndev);
struct netcp_intf_modpriv *intf_modpriv;
struct netcp_module *module;
+ unsigned long flags;
int err = 0;
dev_dbg(netcp->ndev_dev, "adding rx vlan id: %d\n", vid);
- mutex_lock(&netcp_modules_lock);
+ spin_lock_irqsave(&netcp->lock, flags);
for_each_module(netcp, intf_modpriv) {
module = intf_modpriv->netcp_module;
if ((module->add_vid) && (vid != 0)) {
}
}
}
- mutex_unlock(&netcp_modules_lock);
+ spin_unlock_irqrestore(&netcp->lock, flags);
+
return err;
}
struct netcp_intf *netcp = netdev_priv(ndev);
struct netcp_intf_modpriv *intf_modpriv;
struct netcp_module *module;
+ unsigned long flags;
int err = 0;
dev_dbg(netcp->ndev_dev, "removing rx vlan id: %d\n", vid);
- mutex_lock(&netcp_modules_lock);
+ spin_lock_irqsave(&netcp->lock, flags);
for_each_module(netcp, intf_modpriv) {
module = intf_modpriv->netcp_module;
if (module->del_vid) {
}
}
}
- mutex_unlock(&netcp_modules_lock);
+ spin_unlock_irqrestore(&netcp->lock, flags);
return err;
}
goto quit;
}
- emac_arch_get_mac_addr(efuse_mac_addr, efuse);
+ emac_arch_get_mac_addr(efuse_mac_addr, efuse, efuse_mac);
if (is_valid_ether_addr(efuse_mac_addr))
ether_addr_copy(ndev->dev_addr, efuse_mac_addr);
else
netcp->rx_queue_depths[0] = 128;
}
- ret = of_property_read_u32_array(node_interface, "rx-buffer-size",
- netcp->rx_buffer_sizes,
- KNAV_DMA_FDQ_PER_CHAN);
- if (ret) {
- dev_err(dev, "missing \"rx-buffer-size\" parameter\n");
- netcp->rx_buffer_sizes[0] = 1536;
- }
-
ret = of_property_read_u32_array(node_interface, "rx-pool", temp, 2);
if (ret < 0) {
dev_err(dev, "missing \"rx-pool\" parameter\n");
struct device_node *child, *interfaces;
struct netcp_device *netcp_device;
struct device *dev = &pdev->dev;
- struct netcp_module *module;
int ret;
if (!node) {
/* Add the device instance to the list */
list_add_tail(&netcp_device->device_list, &netcp_devices);
- /* Probe & attach any modules already registered */
- mutex_lock(&netcp_modules_lock);
- for_each_netcp_module(module) {
- ret = netcp_module_probe(netcp_device, module);
- if (ret < 0)
- dev_err(dev, "module(%s) probe failed\n", module->name);
- }
- mutex_unlock(&netcp_modules_lock);
return 0;
probe_quit_interface:
static struct platform_driver netcp_driver = {
.driver = {
.name = "netcp-1.0",
- .owner = THIS_MODULE,
.of_match_table = of_match,
},
.probe = netcp_probe,
system continues booting, and even probe devices on different
busses in parallel, leading to a significant speed-up.
- If you have built SCSI as modules, enabling this option can
- be a problem as the devices may not have been found by the
- time your system expects them to have been. You can load the
- scsi_wait_scan module to ensure that all scans have completed.
- If you build your SCSI drivers into the kernel, then everything
- will work fine if you say Y here.
-
You can override this choice by specifying "scsi_mod.scan=sync"
or async on the kernel's command line.
source "drivers/scsi/bnx2i/Kconfig"
source "drivers/scsi/bnx2fc/Kconfig"
source "drivers/scsi/be2iscsi/Kconfig"
+source "drivers/scsi/cxlflash/Kconfig"
config SGIWD93_SCSI
tristate "SGI WD93C93 SCSI Driver"
int depth, ext4_fsblk_t pblk)
{
const char *error_msg;
- int max = 0;
+ int max = 0, err = -EFSCORRUPTED;
if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
error_msg = "invalid magic";
if (ext_depth(inode) != depth &&
!ext4_extent_block_csum_verify(inode, eh)) {
error_msg = "extent tree corrupted";
+ err = -EFSBADCRC;
goto corrupted;
}
return 0;
le16_to_cpu(eh->eh_magic),
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
max, le16_to_cpu(eh->eh_depth), depth);
- return -EIO;
+ return err;
}
#define ext4_ext_check(inode, eh, depth, pblk) \
bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
flags);
- if (unlikely(IS_ERR(bh))) {
+ if (IS_ERR(bh)) {
ret = PTR_ERR(bh);
goto err;
}
put_bh(bh);
EXT4_ERROR_INODE(inode,
"ppos %d > depth %d", ppos, depth);
- ret = -EIO;
+ ret = -EFSCORRUPTED;
goto err;
}
path[ppos].p_bh = bh;
EXT4_ERROR_INODE(inode,
"logical %d == ei_block %d!",
logical, le32_to_cpu(curp->p_idx->ei_block));
- return -EIO;
+ return -EFSCORRUPTED;
}
if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
"eh_entries %d >= eh_max %d!",
le16_to_cpu(curp->p_hdr->eh_entries),
le16_to_cpu(curp->p_hdr->eh_max));
- return -EIO;
+ return -EFSCORRUPTED;
}
if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
- return -EIO;
+ return -EFSCORRUPTED;
}
ix->ei_block = cpu_to_le32(logical);
if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
- return -EIO;
+ return -EFSCORRUPTED;
}
err = ext4_ext_dirty(handle, inode, curp);
* border from split point */
if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
- return -EIO;
+ return -EFSCORRUPTED;
}
if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
border = path[depth].p_ext[1].ee_block;
newblock = ablocks[--a];
if (unlikely(newblock == 0)) {
EXT4_ERROR_INODE(inode, "newblock == 0!");
- err = -EIO;
+ err = -EFSCORRUPTED;
goto cleanup;
}
bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
path[depth].p_hdr->eh_entries,
path[depth].p_hdr->eh_max);
- err = -EIO;
+ err = -EFSCORRUPTED;
goto cleanup;
}
/* start copy from next extent */
k = depth - at - 1;
if (unlikely(k < 0)) {
EXT4_ERROR_INODE(inode, "k %d < 0!", k);
- err = -EIO;
+ err = -EFSCORRUPTED;
goto cleanup;
}
if (k)
EXT4_ERROR_INODE(inode,
"EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
le32_to_cpu(path[i].p_ext->ee_block));
- err = -EIO;
+ err = -EFSCORRUPTED;
goto cleanup;
}
/* start copy indexes */
if (unlikely(path == NULL)) {
EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
- return -EIO;
+ return -EFSCORRUPTED;
}
depth = path->p_depth;
*phys = 0;
EXT4_ERROR_INODE(inode,
"EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
*logical, le32_to_cpu(ex->ee_block));
- return -EIO;
+ return -EFSCORRUPTED;
}
while (--depth >= 0) {
ix = path[depth].p_idx;
EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
depth);
- return -EIO;
+ return -EFSCORRUPTED;
}
}
return 0;
EXT4_ERROR_INODE(inode,
"logical %d < ee_block %d + ee_len %d!",
*logical, le32_to_cpu(ex->ee_block), ee_len);
- return -EIO;
+ return -EFSCORRUPTED;
}
*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
if (unlikely(path == NULL)) {
EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
- return -EIO;
+ return -EFSCORRUPTED;
}
depth = path->p_depth;
*phys = 0;
EXT4_ERROR_INODE(inode,
"first_extent(path[%d].p_hdr) != ex",
depth);
- return -EIO;
+ return -EFSCORRUPTED;
}
while (--depth >= 0) {
ix = path[depth].p_idx;
EXT4_ERROR_INODE(inode,
"ix != EXT_FIRST_INDEX *logical %d!",
*logical);
- return -EIO;
+ return -EFSCORRUPTED;
}
}
goto found_extent;
EXT4_ERROR_INODE(inode,
"logical %d < ee_block %d + ee_len %d!",
*logical, le32_to_cpu(ex->ee_block), ee_len);
- return -EIO;
+ return -EFSCORRUPTED;
}
if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
if (unlikely(ex == NULL || eh == NULL)) {
EXT4_ERROR_INODE(inode,
"ex %p == NULL or eh %p == NULL", ex, eh);
- return -EIO;
+ return -EFSCORRUPTED;
}
if (depth == 0) {
mb_flags |= EXT4_MB_DELALLOC_RESERVED;
if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
- return -EIO;
+ return -EFSCORRUPTED;
}
depth = ext_depth(inode);
ex = path[depth].p_ext;
eh = path[depth].p_hdr;
if (unlikely(path[depth].p_hdr == NULL)) {
EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
- return -EIO;
+ return -EFSCORRUPTED;
}
/* try to insert block into found extent and return */
if (unlikely(path[depth].p_hdr == NULL)) {
up_read(&EXT4_I(inode)->i_data_sem);
EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
- err = -EIO;
+ err = -EFSCORRUPTED;
break;
}
ex = path[depth].p_ext;
if (unlikely(es.es_len == 0)) {
EXT4_ERROR_INODE(inode, "es.es_len == 0");
- err = -EIO;
+ err = -EFSCORRUPTED;
break;
}
"next extent == %u, next "
"delalloc extent = %u",
next, next_del);
- err = -EIO;
+ err = -EFSCORRUPTED;
break;
}
}
leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
- return -EIO;
+ return -EFSCORRUPTED;
}
err = ext4_ext_get_access(handle, inode, path);
if (err)
eh = path[depth].p_hdr;
if (unlikely(path[depth].p_hdr == NULL)) {
EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
- return -EIO;
+ return -EFSCORRUPTED;
}
/* find where to start removing */
ex = path[depth].p_ext;
"on extent %u:%u",
start, end, ex_ee_block,
ex_ee_block + ex_ee_len - 1);
- err = -EIO;
+ err = -EFSCORRUPTED;
goto out;
} else if (a != ex_ee_block) {
/* remove tail of the extent */
EXT4_ERROR_INODE(inode,
"path[%d].p_hdr == NULL",
depth);
- err = -EIO;
+ err = -EFSCORRUPTED;
}
goto out;
}
i = 0;
if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
- err = -EIO;
+ err = -EFSCORRUPTED;
goto out;
}
}
* Should be a no-op if we did IO above. */
cond_resched();
if (WARN_ON(i + 1 > depth)) {
- err = -EIO;
+ err = -EFSCORRUPTED;
break;
}
path[i + 1].p_bh = bh;
* possible initialization would be here
*/
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+ if (ext4_has_feature_extents(sb)) {
#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
printk(KERN_INFO "EXT4-fs: file extents enabled"
#ifdef AGGRESSIVE_TEST
*/
void ext4_ext_release(struct super_block *sb)
{
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
+ if (!ext4_has_feature_extents(sb))
return;
#ifdef EXTENTS_STATS
if (!ex) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) map->m_lblk);
- return -EIO;
+ return -EFSCORRUPTED;
}
unwritten = ext4_ext_is_unwritten(ex);
split_flag1 = 0;
max_zeroout = sbi->s_extent_max_zeroout_kb >>
(inode->i_sb->s_blocksize_bits - 10);
+ if (ext4_encrypted_inode(inode))
+ max_zeroout = 0;
+
/* If extent is less than s_max_zeroout_kb, zeroout directly */
if (max_zeroout && (ee_len <= max_zeroout)) {
err = ext4_ext_zeroout(inode, ex);
if (!ex) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) map->m_lblk);
- return -EIO;
+ return -EFSCORRUPTED;
}
}
"lblock: %lu, depth: %d pblock %lld",
(unsigned long) map->m_lblk, depth,
path[depth].p_block);
- err = -EIO;
+ err = -EFSCORRUPTED;
goto out2;
}
if (depth == path->p_depth) {
ex_start = path[depth].p_ext;
if (!ex_start)
- return -EIO;
+ return -EFSCORRUPTED;
ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
if (!extent) {
EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
(unsigned long) *iterator);
- return -EIO;
+ return -EFSCORRUPTED;
}
if (SHIFT == SHIFT_LEFT && *iterator >
le32_to_cpu(extent->ee_block)) {
int split = 0;
path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
- if (unlikely(IS_ERR(path1))) {
+ if (IS_ERR(path1)) {
*erp = PTR_ERR(path1);
path1 = NULL;
finish:
goto repeat;
}
path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
- if (unlikely(IS_ERR(path2))) {
+ if (IS_ERR(path2)) {
*erp = PTR_ERR(path2);
path2 = NULL;
goto finish;
if (!bh) {
ext4_error_inode(inode, func, line, block,
"Directory hole found");
- return ERR_PTR(-EIO);
+ return ERR_PTR(-EFSCORRUPTED);
}
dirent = (struct ext4_dir_entry *) bh->b_data;
/* Determine whether or not we have an index block */
if (!is_dx_block && type == INDEX) {
ext4_error_inode(inode, func, line, block,
"directory leaf block found instead of index block");
- return ERR_PTR(-EIO);
+ return ERR_PTR(-EFSCORRUPTED);
}
if (!ext4_has_metadata_csum(inode->i_sb) ||
buffer_verified(bh))
ext4_error_inode(inode, func, line, block,
"Directory index failed checksum");
brelse(bh);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-EFSBADCRC);
}
}
if (!is_dx_block) {
ext4_error_inode(inode, func, line, block,
"Directory block failed checksum");
brelse(bh);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-EFSBADCRC);
}
}
return bh;
}
num++;
bh = ext4_getblk(NULL, dir, b++, 0);
- if (unlikely(IS_ERR(bh))) {
+ if (IS_ERR(bh)) {
if (ra_max == 0) {
ret = bh;
goto cleanup_and_exit;
brelse(bh);
if (!ext4_valid_inum(dir->i_sb, ino)) {
EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-EFSCORRUPTED);
}
if (unlikely(ino == dir->i_ino)) {
EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir",
dentry);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-EFSCORRUPTED);
}
inode = ext4_iget_normal(dir->i_sb, ino);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
ino);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-EFSCORRUPTED);
}
if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
if (!ext4_valid_inum(d_inode(child)->i_sb, ino)) {
EXT4_ERROR_INODE(d_inode(child),
"bad parent inode number: %u", ino);
- return ERR_PTR(-EIO);
+ return ERR_PTR(-EFSCORRUPTED);
}
return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino));
while ((char *) de <= top) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
buf, buf_size, offset)) {
- res = -EIO;
+ res = -EFSCORRUPTED;
goto return_result;
}
/* Provide crypto context and crypto buffer to ext4 match */
if ((char *) de >= (((char *) root) + blocksize)) {
EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
brelse(bh);
- return -EIO;
+ return -EFSCORRUPTED;
}
len = ((char *) root) + (blocksize - csum_size) - (char *) de;
goto out;
if (blocks == 1 && !dx_fallback &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
+ ext4_has_feature_dir_index(sb)) {
retval = make_indexed_dir(handle, &fname, dentry,
inode, bh);
bh = NULL; /* make_indexed_dir releases bh */
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
bh->b_data, bh->b_size, i))
- return -EIO;
+ return -EFSCORRUPTED;
if (de == de_del) {
if (pde)
pde->rec_len = ext4_rec_len_to_disk(
/* limit is 16-bit i_links_count */
if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
set_nlink(inode, 1);
- EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
+ ext4_set_feature_dir_nlink(inode->i_sb);
}
}
}
struct inode *inode;
int err, credits, retries = 0;
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
struct inode *inode;
int err, credits, retries = 0;
- if (!new_valid_dev(rdev))
- return -EINVAL;
-
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
struct inode *inode;
int err, retries = 0;
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
retry:
inode = ext4_new_inode_start_handle(dir, mode,
if (EXT4_DIR_LINK_MAX(dir))
return -EMLINK;
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
/* Initialize quotas before so that eventual writes go in
* separate transaction */
- dquot_initialize(dir);
- dquot_initialize(d_inode(dentry));
+ retval = dquot_initialize(dir);
+ if (retval)
+ return retval;
+ retval = dquot_initialize(d_inode(dentry));
+ if (retval)
+ return retval;
retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
inode = d_inode(dentry);
- retval = -EIO;
+ retval = -EFSCORRUPTED;
if (le32_to_cpu(de->inode) != inode->i_ino)
goto end_rmdir;
trace_ext4_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
- dquot_initialize(dir);
- dquot_initialize(d_inode(dentry));
+ retval = dquot_initialize(dir);
+ if (retval)
+ return retval;
+ retval = dquot_initialize(d_inode(dentry));
+ if (retval)
+ return retval;
retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
inode = d_inode(dentry);
- retval = -EIO;
+ retval = -EFSCORRUPTED;
if (le32_to_cpu(de->inode) != inode->i_ino)
goto end_unlink;
goto err_free_sd;
}
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ goto err_free_sd;
if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
/*
if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode))
return -EPERM;
- dquot_initialize(dir);
+ err = dquot_initialize(dir);
+ if (err)
+ return err;
retry:
handle = ext4_journal_start(dir, EXT4_HT_DIR,
if (!ent->dir_bh)
return retval;
if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
- return -EIO;
+ return -EFSCORRUPTED;
BUFFER_TRACE(ent->dir_bh, "get_write_access");
return ext4_journal_get_write_access(handle, ent->dir_bh);
}
if (retval)
return retval;
ent->de->inode = cpu_to_le32(ino);
- if (EXT4_HAS_INCOMPAT_FEATURE(ent->dir->i_sb,
- EXT4_FEATURE_INCOMPAT_FILETYPE))
+ if (ext4_has_feature_filetype(ent->dir->i_sb))
ent->de->file_type = file_type;
ent->dir->i_version++;
ent->dir->i_ctime = ent->dir->i_mtime =
int credits;
u8 old_file_type;
- dquot_initialize(old.dir);
- dquot_initialize(new.dir);
+ retval = dquot_initialize(old.dir);
+ if (retval)
+ return retval;
+ retval = dquot_initialize(new.dir);
+ if (retval)
+ return retval;
/* Initialize quotas before so that eventual writes go
* in separate transaction */
- if (new.inode)
- dquot_initialize(new.inode);
+ if (new.inode) {
+ retval = dquot_initialize(new.inode);
+ if (retval)
+ return retval;
+ }
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
if (IS_ERR(old.bh))
new.inode)))
return -EPERM;
- dquot_initialize(old.dir);
- dquot_initialize(new.dir);
+ retval = dquot_initialize(old.dir);
+ if (retval)
+ return retval;
+ retval = dquot_initialize(new.dir);
+ if (retval)
+ return retval;
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
&old.de, &old.inlined);
return 0;
}
+/**
+ * path_connected - Verify that a path->dentry is below path->mnt.mnt_root
+ * @path: nameidate to verify
+ *
+ * Rename can sometimes move a file or directory outside of a bind
+ * mount, path_connected allows those cases to be detected.
+ */
+static bool path_connected(const struct path *path)
+{
+ struct vfsmount *mnt = path->mnt;
+
+ /* Only bind mounts can have disconnected paths */
+ if (mnt->mnt_root == mnt->mnt_sb->s_root)
+ return true;
+
+ return is_subdir(path->dentry, mnt->mnt_root);
+}
+
static inline int nd_alloc_stack(struct nameidata *nd)
{
if (likely(nd->depth != EMBEDDED_LEVELS))
return 0;
/* Allowed if parent directory not sticky and world-writable. */
- parent = nd->path.dentry->d_inode;
+ parent = nd->inode;
if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
return 0;
return -ECHILD;
nd->path.dentry = parent;
nd->seq = seq;
+ if (unlikely(!path_connected(&nd->path)))
+ return -ENOENT;
break;
} else {
struct mount *mnt = real_mount(nd->path.mnt);
}
}
-static void follow_dotdot(struct nameidata *nd)
+static int follow_dotdot(struct nameidata *nd)
{
if (!nd->root.mnt)
set_root(nd);
/* rare case of legitimate dget_parent()... */
nd->path.dentry = dget_parent(nd->path.dentry);
dput(old);
+ if (unlikely(!path_connected(&nd->path)))
+ return -ENOENT;
break;
}
if (!follow_up(&nd->path))
}
follow_mount(&nd->path);
nd->inode = nd->path.dentry->d_inode;
+ return 0;
}
/*
negative = d_is_negative(dentry);
if (read_seqcount_retry(&dentry->d_seq, seq))
return -ECHILD;
- if (negative)
- return -ENOENT;
/*
* This sequence count validates that the parent had no
goto unlazy;
}
}
+ /*
+ * Note: do negative dentry check after revalidation in
+ * case that drops it.
+ */
+ if (negative)
+ return -ENOENT;
path->mnt = mnt;
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
if (nd->flags & LOOKUP_RCU) {
return follow_dotdot_rcu(nd);
} else
- follow_dotdot(nd);
+ return follow_dotdot(nd);
}
return 0;
}
if (err) {
const char *s = get_link(nd);
- if (unlikely(IS_ERR(s)))
+ if (IS_ERR(s))
return PTR_ERR(s);
err = 0;
if (unlikely(!s)) {
*
* Note that this routine is purely a helper for filesystem usage and should
* not be called by generic code.
+ *
+ * The caller must hold base->i_mutex.
*/
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
}
EXPORT_SYMBOL(lookup_one_len);
+/**
+ * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
+ * @name: pathname component to lookup
+ * @base: base directory to lookup from
+ * @len: maximum length @len should be interpreted to
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * Unlike lookup_one_len, it should be called without the parent
+ * i_mutex held, and will take the i_mutex itself if necessary.
+ */
+struct dentry *lookup_one_len_unlocked(const char *name,
+ struct dentry *base, int len)
+{
+ struct qstr this;
+ unsigned int c;
+ int err;
+ struct dentry *ret;
+
+ this.name = name;
+ this.len = len;
+ this.hash = full_name_hash(name, len);
+ if (!len)
+ return ERR_PTR(-EACCES);
+
+ if (unlikely(name[0] == '.')) {
+ if (len < 2 || (len == 2 && name[1] == '.'))
+ return ERR_PTR(-EACCES);
+ }
+
+ while (len--) {
+ c = *(const unsigned char *)name++;
+ if (c == '/' || c == '\0')
+ return ERR_PTR(-EACCES);
+ }
+ /*
+ * See if the low-level filesystem might want
+ * to use its own hash..
+ */
+ if (base->d_flags & DCACHE_OP_HASH) {
+ int err = base->d_op->d_hash(base, &this);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
+ err = inode_permission(base->d_inode, MAY_EXEC);
+ if (err)
+ return ERR_PTR(err);
+
+ ret = __d_lookup(base, &this);
+ if (ret)
+ return ret;
+ /*
+ * __d_lookup() is used to try to get a quick answer and avoid the
+ * mutex. A false-negative does no harm.
+ */
+ ret = __d_lookup(base, &this);
+ if (ret && ret->d_flags & DCACHE_OP_REVALIDATE) {
+ dput(ret);
+ ret = NULL;
+ }
+ if (ret)
+ return ret;
+
+ mutex_lock(&base->d_inode->i_mutex);
+ ret = __lookup_hash(&this, base, 0);
+ mutex_unlock(&base->d_inode->i_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(lookup_one_len_unlocked);
+
int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
struct path *path, int *empty)
{
/**
* path_mountpoint - look up a path to be umounted
- * @nameidata: lookup context
+ * @nd: lookup context
* @flags: lookup flags
* @path: pointer to container for result
*
return ERR_PTR(-ELOOP);
filename = getname_kernel(name);
- if (unlikely(IS_ERR(filename)))
+ if (IS_ERR(filename))
return ERR_CAST(filename);
set_nameidata(&nd, -1, filename);
#include <linux/swap.h>
#include <linux/shrinker.h>
#include <linux/mm_inline.h>
+#include <linux/dax.h>
#include <linux/kthread.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
#include <linux/pagemap.h>
#include <linux/migrate.h>
#include <linux/hashtable.h>
+#include <linux/userfaultfd_k.h>
+#include <linux/page_idle.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
};
-static int set_recommended_min_free_kbytes(void)
+static void set_recommended_min_free_kbytes(void)
{
struct zone *zone;
int nr_zones = 0;
min_free_kbytes = recommended_min;
}
setup_per_zone_wmarks();
- return 0;
}
static int start_stop_khugepaged(void)
if (!khugepaged_thread)
khugepaged_thread = kthread_run(khugepaged, NULL,
"khugepaged");
- if (unlikely(IS_ERR(khugepaged_thread))) {
+ if (IS_ERR(khugepaged_thread)) {
pr_err("khugepaged: kthread_run(khugepaged) failed\n");
err = PTR_ERR(khugepaged_thread);
khugepaged_thread = NULL;
static atomic_t huge_zero_refcount;
struct page *huge_zero_page __read_mostly;
-static inline bool is_huge_zero_pmd(pmd_t pmd)
-{
- return is_huge_zero_page(pmd_page(pmd));
-}
-
-static struct page *get_huge_zero_page(void)
+struct page *get_huge_zero_page(void)
{
struct page *zero_page;
retry:
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
struct vm_area_struct *vma,
- unsigned long haddr, pmd_t *pmd,
- struct page *page, gfp_t gfp)
+ unsigned long address, pmd_t *pmd,
+ struct page *page, gfp_t gfp,
+ unsigned int flags)
{
struct mem_cgroup *memcg;
pgtable_t pgtable;
spinlock_t *ptl;
+ unsigned long haddr = address & HPAGE_PMD_MASK;
VM_BUG_ON_PAGE(!PageCompound(page), page);
- if (mem_cgroup_try_charge(page, mm, gfp, &memcg))
- return VM_FAULT_OOM;
+ if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) {
+ put_page(page);
+ count_vm_event(THP_FAULT_FALLBACK);
+ return VM_FAULT_FALLBACK;
+ }
pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable)) {
mem_cgroup_cancel_charge(page, memcg);
+ put_page(page);
return VM_FAULT_OOM;
}
pte_free(mm, pgtable);
} else {
pmd_t entry;
+
+ /* Deliver the page fault to userland */
+ if (userfaultfd_missing(vma)) {
+ int ret;
+
+ spin_unlock(ptl);
+ mem_cgroup_cancel_charge(page, memcg);
+ put_page(page);
+ pte_free(mm, pgtable);
+ ret = handle_userfault(vma, address, flags,
+ VM_UFFD_MISSING);
+ VM_BUG_ON(ret & VM_FAULT_FALLBACK);
+ return ret;
+ }
+
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr);
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
atomic_long_inc(&mm->nr_ptes);
spin_unlock(ptl);
+ count_vm_event(THP_FAULT_ALLOC);
}
return 0;
pgtable_t pgtable;
struct page *zero_page;
bool set;
+ int ret;
pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable))
return VM_FAULT_OOM;
return VM_FAULT_FALLBACK;
}
ptl = pmd_lock(mm, pmd);
- set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
- zero_page);
- spin_unlock(ptl);
+ ret = 0;
+ set = false;
+ if (pmd_none(*pmd)) {
+ if (userfaultfd_missing(vma)) {
+ spin_unlock(ptl);
+ ret = handle_userfault(vma, address, flags,
+ VM_UFFD_MISSING);
+ VM_BUG_ON(ret & VM_FAULT_FALLBACK);
+ } else {
+ set_huge_zero_page(pgtable, mm, vma,
+ haddr, pmd,
+ zero_page);
+ spin_unlock(ptl);
+ set = true;
+ }
+ } else
+ spin_unlock(ptl);
if (!set) {
pte_free(mm, pgtable);
put_huge_zero_page();
}
- return 0;
+ return ret;
}
gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
- if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) {
- put_page(page);
- count_vm_event(THP_FAULT_FALLBACK);
- return VM_FAULT_FALLBACK;
+ return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp,
+ flags);
+}
+
+static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, unsigned long pfn, pgprot_t prot, bool write)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pmd_t entry;
+ spinlock_t *ptl;
+
+ ptl = pmd_lock(mm, pmd);
+ if (pmd_none(*pmd)) {
+ entry = pmd_mkhuge(pfn_pmd(pfn, prot));
+ if (write) {
+ entry = pmd_mkyoung(pmd_mkdirty(entry));
+ entry = maybe_pmd_mkwrite(entry, vma);
+ }
+ set_pmd_at(mm, addr, pmd, entry);
+ update_mmu_cache_pmd(vma, addr, pmd);
}
+ spin_unlock(ptl);
+}
- count_vm_event(THP_FAULT_ALLOC);
- return 0;
+int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, unsigned long pfn, bool write)
+{
+ pgprot_t pgprot = vma->vm_page_prot;
+ /*
+ * If we had pmd_special, we could avoid all these restrictions,
+ * but we need to be consistent with PTEs and architectures that
+ * can't support a 'special' bit.
+ */
+ BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
+ BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
+ (VM_PFNMAP|VM_MIXEDMAP));
+ BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
+ BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
+
+ if (addr < vma->vm_start || addr >= vma->vm_end)
+ return VM_FAULT_SIGBUS;
+ if (track_pfn_insert(vma, &pgprot, pfn))
+ return VM_FAULT_SIGBUS;
+ insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
+ return VM_FAULT_NOPAGE;
}
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
*/
if (is_huge_zero_pmd(pmd)) {
struct page *zero_page;
- bool set;
/*
* get_huge_zero_page() will never allocate a new page here,
* since we already have a zero page to copy. It just takes a
* reference.
*/
zero_page = get_huge_zero_page();
- set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
+ set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
zero_page);
- BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */
ret = 0;
goto out_unlock;
}
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr)
{
+ pmd_t orig_pmd;
spinlock_t *ptl;
- int ret = 0;
- if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
- struct page *page;
- pgtable_t pgtable;
- pmd_t orig_pmd;
- /*
- * For architectures like ppc64 we look at deposited pgtable
- * when calling pmdp_huge_get_and_clear. So do the
- * pgtable_trans_huge_withdraw after finishing pmdp related
- * operations.
- */
- orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
- tlb->fullmm);
- tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
- pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
- if (is_huge_zero_pmd(orig_pmd)) {
- atomic_long_dec(&tlb->mm->nr_ptes);
- spin_unlock(ptl);
+ if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
+ return 0;
+ /*
+ * For architectures like ppc64 we look at deposited pgtable
+ * when calling pmdp_huge_get_and_clear. So do the
+ * pgtable_trans_huge_withdraw after finishing pmdp related
+ * operations.
+ */
+ orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
+ tlb->fullmm);
+ tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+ if (vma_is_dax(vma)) {
+ spin_unlock(ptl);
+ if (is_huge_zero_pmd(orig_pmd))
put_huge_zero_page();
- } else {
- page = pmd_page(orig_pmd);
- page_remove_rmap(page);
- VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
- add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
- VM_BUG_ON_PAGE(!PageHead(page), page);
- atomic_long_dec(&tlb->mm->nr_ptes);
- spin_unlock(ptl);
- tlb_remove_page(tlb, page);
- }
- pte_free(tlb->mm, pgtable);
- ret = 1;
+ } else if (is_huge_zero_pmd(orig_pmd)) {
+ pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
+ atomic_long_dec(&tlb->mm->nr_ptes);
+ spin_unlock(ptl);
+ put_huge_zero_page();
+ } else {
+ struct page *page = pmd_page(orig_pmd);
+ page_remove_rmap(page);
+ VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
+ add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+ VM_BUG_ON_PAGE(!PageHead(page), page);
+ pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
+ atomic_long_dec(&tlb->mm->nr_ptes);
+ spin_unlock(ptl);
+ tlb_remove_page(tlb, page);
}
- return ret;
+ return 1;
}
int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
/* after clearing PageTail the gup refcount can be released */
smp_mb__after_atomic();
- /*
- * retain hwpoison flag of the poisoned tail page:
- * fix for the unsuitable process killed on Guest Machine(KVM)
- * by the memory-failure.
- */
- page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
+ page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
page_tail->flags |= (page->flags &
((1L << PG_referenced) |
(1L << PG_swapbacked) |
/* clear PageTail before overwriting first_page */
smp_wmb();
+ if (page_is_young(page))
+ set_page_young(page_tail);
+ if (page_is_idle(page))
+ set_page_idle(page_tail);
+
/*
* __split_huge_page_splitting() already set the
* splitting bit in all pmd that could map this
* here). But it is generally safer to never allow
* small and huge TLB entries for the same virtual
* address to be loaded simultaneously. So instead of
- * doing "pmd_populate(); flush_tlb_range();" we first
+ * doing "pmd_populate(); flush_pmd_tlb_range();" we first
* mark the current pmd notpresent (atomically because
* here the pmd_trans_huge and pmd_trans_splitting
* must remain set at all times on the pmd until the
for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
_pte++, address += PAGE_SIZE) {
pte_t pteval = *_pte;
- if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
- if (++none_or_zero <= khugepaged_max_ptes_none)
+ if (pte_none(pteval) || (pte_present(pteval) &&
+ is_zero_pfn(pte_pfn(pteval)))) {
+ if (!userfaultfd_armed(vma) &&
+ ++none_or_zero <= khugepaged_max_ptes_none)
continue;
else
goto out;
VM_BUG_ON_PAGE(PageLRU(page), page);
/* If there is no mapped pte young don't collapse the page */
- if (pte_young(pteval) || PageReferenced(page) ||
+ if (pte_young(pteval) ||
+ page_is_young(page) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
referenced = true;
}
static void khugepaged_alloc_sleep(void)
{
- wait_event_freezable_timeout(khugepaged_wait, false,
- msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
+ DEFINE_WAIT(wait);
+
+ add_wait_queue(&khugepaged_wait, &wait);
+ freezable_schedule_timeout_interruptible(
+ msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
+ remove_wait_queue(&khugepaged_wait, &wait);
}
static int khugepaged_node_load[MAX_NUMNODES];
*/
up_read(&mm->mmap_sem);
- *hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER);
+ *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
if (unlikely(!*hpage)) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
*hpage = ERR_PTR(-ENOMEM);
_pte++, _address += PAGE_SIZE) {
pte_t pteval = *_pte;
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
- if (++none_or_zero <= khugepaged_max_ptes_none)
+ if (!userfaultfd_armed(vma) &&
+ ++none_or_zero <= khugepaged_max_ptes_none)
continue;
else
goto out_unmap;
*/
if (page_count(page) != 1 + !!PageSwapCache(page))
goto out_unmap;
- if (pte_young(pteval) || PageReferenced(page) ||
+ if (pte_young(pteval) ||
+ page_is_young(page) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
referenced = true;
}
pmd_t *pmd)
{
spinlock_t *ptl;
- struct page *page;
+ struct page *page = NULL;
struct mm_struct *mm = vma->vm_mm;
unsigned long haddr = address & HPAGE_PMD_MASK;
unsigned long mmun_start; /* For mmu_notifiers */
again:
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
ptl = pmd_lock(mm, pmd);
- if (unlikely(!pmd_trans_huge(*pmd))) {
- spin_unlock(ptl);
- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- return;
- }
- if (is_huge_zero_pmd(*pmd)) {
+ if (unlikely(!pmd_trans_huge(*pmd)))
+ goto unlock;
+ if (vma_is_dax(vma)) {
+ pmd_t _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ if (is_huge_zero_pmd(_pmd))
+ put_huge_zero_page();
+ } else if (is_huge_zero_pmd(*pmd)) {
__split_huge_zero_page_pmd(vma, haddr, pmd);
- spin_unlock(ptl);
- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- return;
+ } else {
+ page = pmd_page(*pmd);
+ VM_BUG_ON_PAGE(!page_count(page), page);
+ get_page(page);
}
- page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!page_count(page), page);
- get_page(page);
+ unlock:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- split_huge_page(page);
+ if (!page)
+ return;
+ split_huge_page(page);
put_page(page);
/*
split_huge_page_pmd_mm(mm, address, pmd);
}
-void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
long adjust_next)
static void ovs_notify(struct genl_family *family,
struct sk_buff *skb, struct genl_info *info)
{
- genl_notify(family, skb, genl_info_net(info), info->snd_portid,
- 0, info->nlhdr, GFP_KERNEL);
+ genl_notify(family, skb, info, 0, GFP_KERNEL);
}
/**
const char *ovs_dp_name(const struct datapath *dp)
{
struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
- return vport->ops->get_name(vport);
+ return ovs_vport_name(vport);
}
static int get_dpifindex(const struct datapath *dp)
local = ovs_vport_rcu(dp, OVSP_LOCAL);
if (local)
- ifindex = netdev_vport_priv(local)->dev->ifindex;
+ ifindex = local->dev->ifindex;
else
ifindex = 0;
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+ upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall);
if (unlikely(error))
kfree_skb(skb);
if (upcall_info->actions_len)
size += nla_total_size(upcall_info->actions_len);
+ /* OVS_PACKET_ATTR_MRU */
+ if (upcall_info->mru)
+ size += nla_total_size(sizeof(upcall_info->mru));
+
return size;
}
+static void pad_packet(struct datapath *dp, struct sk_buff *skb)
+{
+ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
+ size_t plen = NLA_ALIGN(skb->len) - skb->len;
+
+ if (plen > 0)
+ memset(skb_put(skb, plen), 0, plen);
+ }
+}
+
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
- err = ovs_nla_put_egress_tunnel_key(user_skb,
- upcall_info->egress_tun_info);
+ err = ovs_nla_put_tunnel_info(user_skb,
+ upcall_info->egress_tun_info);
BUG_ON(err);
nla_nest_end(user_skb, nla);
}
nla_nest_cancel(user_skb, nla);
}
+ /* Add OVS_PACKET_ATTR_MRU */
+ if (upcall_info->mru) {
+ if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
+ upcall_info->mru)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+ pad_packet(dp, user_skb);
+ }
+
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy() */
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
goto out;
/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
- if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
- size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
-
- if (plen > 0)
- memset(skb_put(user_skb, plen), 0, plen);
- }
+ pad_packet(dp, user_skb);
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
struct ovs_header *ovs_header = info->userhdr;
+ struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct sw_flow_actions *acts;
struct sk_buff *packet;
struct datapath *dp;
struct ethhdr *eth;
struct vport *input_vport;
+ u16 mru = 0;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
else
packet->protocol = htons(ETH_P_802_2);
+ /* Set packet's mru */
+ if (a[OVS_PACKET_ATTR_MRU]) {
+ mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
+ packet->ignore_df = 1;
+ }
+ OVS_CB(packet)->mru = mru;
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
if (IS_ERR(flow))
goto err_kfree_skb;
- err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
- &flow->key, log);
+ err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
+ packet, &flow->key, log);
if (err)
goto err_flow_free;
- err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
+ err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
&flow->key, &acts, log);
if (err)
goto err_flow_free;
rcu_assign_pointer(flow->sf_acts, acts);
- OVS_CB(packet)->egress_tun_info = NULL;
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
rcu_read_lock();
- dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp_rcu(net, ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto err_unlock;
if (!input_vport)
goto err_unlock;
+ packet->dev = input_vport->dev;
OVS_CB(packet)->input_vport = input_vport;
sf_acts = rcu_dereference(flow->sf_acts);
[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
+ [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
};
static const struct genl_ops dp_packet_genl_ops[] = {
/* OVS_FLOW_ATTR_ACTIONS */
if (should_fill_actions(ufid_flags))
- len += nla_total_size(acts->actions_len);
+ len += nla_total_size(acts->orig_len);
return len
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow *flow = NULL, *new_flow;
/* Extract key. */
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto err_kfree_flow;
- ovs_flow_mask_key(&new_flow->key, &key, &mask);
+ ovs_flow_mask_key(&new_flow->key, &key, true, &mask);
/* Extract flow identifier. */
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
goto err_kfree_flow;
/* Validate actions. */
- error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
- &acts, log);
+ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
+ &new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
goto err_kfree_flow;
}
ovs_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
if (unlikely(!dp)) {
error = -ENODEV;
goto err_unlock_ovs;
}
ovs_unlock();
- ovs_nla_free_flow_actions(old_acts);
+ ovs_nla_free_flow_actions_rcu(old_acts);
ovs_flow_free(new_flow, false);
}
ovs_unlock();
kfree_skb(reply);
err_kfree_acts:
- kfree(acts);
+ ovs_nla_free_flow_actions(acts);
err_kfree_flow:
ovs_flow_free(new_flow, false);
error:
}
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
-static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
+static struct sw_flow_actions *get_flow_actions(struct net *net,
+ const struct nlattr *a,
const struct sw_flow_key *key,
const struct sw_flow_mask *mask,
bool log)
struct sw_flow_key masked_key;
int error;
- ovs_flow_mask_key(&masked_key, key, mask);
- error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
+ ovs_flow_mask_key(&masked_key, key, true, mask);
+ error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
if (error) {
OVS_NLERR(log,
"Actions may not be safe on all matching packets");
static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto error;
/* Validate actions. */
if (a[OVS_FLOW_ATTR_ACTIONS]) {
- acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
- log);
+ acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
+ &mask, log);
if (IS_ERR(acts)) {
error = PTR_ERR(acts);
goto error;
}
ovs_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
if (unlikely(!dp)) {
error = -ENODEV;
goto err_unlock_ovs;
info, OVS_FLOW_CMD_NEW, false,
ufid_flags);
- if (unlikely(IS_ERR(reply))) {
+ if (IS_ERR(reply)) {
error = PTR_ERR(reply);
goto err_unlock_ovs;
}
if (reply)
ovs_notify(&dp_flow_genl_family, reply, info);
if (old_acts)
- ovs_nla_free_flow_actions(old_acts);
+ ovs_nla_free_flow_actions_rcu(old_acts);
return 0;
ovs_unlock();
kfree_skb(reply);
err_kfree_acts:
- kfree(acts);
+ ovs_nla_free_flow_actions(acts);
error:
return error;
}
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
+ struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
struct sw_flow *flow;
ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
+ err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
log);
} else if (!ufid_present) {
OVS_NLERR(log,
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
+ struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
struct sw_flow *flow = NULL;
ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
- log);
+ err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
+ NULL, log);
if (unlikely(err))
return err;
}
if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
nla_put_string(skb, OVS_VPORT_ATTR_NAME,
- vport->ops->get_name(vport)))
+ ovs_vport_name(vport)))
goto nla_put_failure;
ovs_vport_get_stats(vport, &vport_stats);
INIT_LIST_HEAD(&ovs_net->dps);
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
+ ovs_ct_init(net);
return 0;
}
struct vport *vport;
hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
- struct netdev_vport *netdev_vport;
-
if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
continue;
- netdev_vport = netdev_vport_priv(vport);
- if (dev_net(netdev_vport->dev) == dnet)
+ if (dev_net(vport->dev) == dnet)
list_add(&vport->detach_list, head);
}
}
struct net *net;
LIST_HEAD(head);
+ ovs_ct_exit(dnet);
ovs_lock();
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);