|
|
|
[PATCH 11/16] netvm: Propagate page->pfmemalloc from skb_alloc_page to skb | |
| [Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
|
|
The skb->pfmemalloc flag gets set to true iff during the slab
allocation of data in __alloc_skb that the the PFMEMALLOC reserves
were used. If page splitting is used, it is possible that pages will
be allocated from the PFMEMALLOC reserve without propagating this
information to the skb. This patch propagates page->pfmemalloc from
pages allocated for fragments to the skb.
It works by reintroducing and expanding the skb_alloc_page() API
to take an skb. If the page was allocated from pfmemalloc reserves,
it is automatically copied. If the driver allocates the page before
the skb, it should call skb_propagate_pfmemalloc() after the skb is
allocated to ensure the flag is copied properly.
Failure to do so is not critical. The resulting driver may perform
slower if it is used for swap-over-NBD or swap-over-NFS but it should
not result in failure.
[davem@xxxxxxxxxxxxx: API rename and consistency]
Signed-off-by: Mel Gorman <mgorman@xxxxxxx>
Acked-by: David S. Miller <davem@xxxxxxxxxxxxx>
---
drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +-
drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 2 +-
drivers/net/ethernet/intel/igb/igb_main.c | 2 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 +-
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 +-
drivers/net/usb/cdc-phonet.c | 2 +-
drivers/usb/gadget/f_phonet.c | 2 +-
include/linux/skbuff.h | 55 +++++++++++++++++++++
8 files changed, 64 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 8596aca..d49933e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -528,7 +528,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
#endif
while (n--) {
- pg = alloc_page(gfp);
+ pg = __skb_alloc_page(gfp, NULL);
if (unlikely(!pg)) {
q->alloc_failed++;
break;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index f2d1ecd..8877fbf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -653,7 +653,7 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl,
alloc_small_pages:
while (n--) {
- page = alloc_page(gfp | __GFP_NOWARN | __GFP_COLD);
+ page = __skb_alloc_page(gfp | __GFP_NOWARN, NULL);
if (unlikely(!page)) {
fl->alloc_failed++;
break;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 01ced68..16e0892 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6233,7 +6233,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
return true;
if (!page) {
- page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ page = __skb_alloc_page(GFP_ATOMIC, bi->skb);
bi->page = page;
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_failed++;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 5afbb37..ced0ec7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1146,8 +1146,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
/* alloc new page for storage */
if (likely(!page)) {
- page = alloc_pages(GFP_ATOMIC | __GFP_COLD | __GFP_COMP,
- ixgbe_rx_pg_order(rx_ring));
+ page = __skb_alloc_pages(GFP_ATOMIC | __GFP_COLD | __GFP_COMP,
+ bi->skb, ixgbe_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index f69ec42..0e2d91d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -369,7 +369,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
if (!bi->page_dma &&
(adapter->flags & IXGBE_FLAG_RX_PS_ENABLED)) {
if (!bi->page) {
- bi->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
+ bi->page = __skb_alloc_page(GFP_ATOMIC, NULL);
if (!bi->page) {
adapter->alloc_rx_page_failed++;
goto no_buffers;
@@ -403,6 +403,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
*/
skb_reserve(skb, NET_IP_ALIGN);
+ skb_propagate_pfmemalloc(bi->page, skb);
bi->skb = skb;
}
if (!bi->dma) {
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index 187c144..6461004 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -130,7 +130,7 @@ static int rx_submit(struct usbpn_dev *pnd, struct urb *req, gfp_t gfp_flags)
struct page *page;
int err;
- page = alloc_page(gfp_flags);
+ page = __skb_alloc_page(gfp_flags | __GFP_NOMEMALLOC, NULL);
if (!page)
return -ENOMEM;
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index 965a629..8ee9268 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -301,7 +301,7 @@ pn_rx_submit(struct f_phonet *fp, struct usb_request *req, gfp_t gfp_flags)
struct page *page;
int err;
- page = alloc_page(gfp_flags);
+ page = __skb_alloc_page(gfp_flags | __GFP_NOMEMALLOC, NULL);
if (!page)
return -ENOMEM;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b814bb8..7632c87 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1774,6 +1774,61 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
}
+/*
+ * __skb_alloc_page - allocate pages for ps-rx on a skb and preserve pfmemalloc data
+ * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
+ * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
+ * @order: size of the allocation
+ *
+ * Allocate a new page.
+ *
+ * %NULL is returned if there is no free memory.
+*/
+static inline struct page *__skb_alloc_pages(gfp_t gfp_mask,
+ struct sk_buff *skb,
+ unsigned int order)
+{
+ struct page *page;
+
+ gfp_mask |= __GFP_COLD;
+
+ if (!(gfp_mask & __GFP_NOMEMALLOC))
+ gfp_mask |= __GFP_MEMALLOC;
+
+ page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
+ if (skb && page && page->pfmemalloc)
+ skb->pfmemalloc = true;
+
+ return page;
+}
+
+/**
+ * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data
+ * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
+ * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
+ *
+ * Allocate a new page.
+ *
+ * %NULL is returned if there is no free memory.
+ */
+static inline struct page *__skb_alloc_page(gfp_t gfp_mask,
+ struct sk_buff *skb)
+{
+ return __skb_alloc_pages(gfp_mask, skb, 0);
+}
+
+/**
+ * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page
+ * @page: The page that was allocated from skb_alloc_page
+ * @skb: The skb that may need pfmemalloc set
+ */
+static inline void skb_propagate_pfmemalloc(struct page *page,
+ struct sk_buff *skb)
+{
+ if (page && page->pfmemalloc)
+ skb->pfmemalloc = true;
+}
+
/**
* skb_frag_page - retrieve the page refered to by a paged fragment
* @frag: the paged fragment
--
1.7.9.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
[Other Archives] [Linux Kernel Newbies] [Linux Driver Development] [Linux Kbuild] [Fedora Kernel] [Linux Kernel Testers] [Linux SH] [Linux Omap] [Linux Tape] [Linux Input] [Linux Kernel Janitors] [Linux Kernel Packagers] [Linux Doc] [Linux Man Pages] [Linux API] [Linux Memory Management] [Linux Modules] [Linux Standards] [Kernel Announce] [Netdev] [Git] [Linux PCI] Linux CAN Development [Linux I2C] [Linux RDMA] [Linux NUMA] [Netfilter] [Netfilter Devel] [SELinux] [Bugtraq] [FIO] [Linux Perf Users] [Linux Serial] [Linux PPP] [Linux ISDN] [Linux Next] [Kernel Stable Commits] [Linux Tip Commits] [Kernel MM Commits] [Linux Security Module] [AutoFS] [Filesystem Development] [Ext3 Filesystem] [Linux bcache] [Ext4 Filesystem] [Linux BTRFS] [Linux CEPH Filesystem] [Linux XFS] [XFS] [Linux NFS] [Linux CIFS] [Ecryptfs] [Linux NILFS] [Linux Cachefs] [Reiser FS] [Initramfs] [Linux FB Devel] [Linux OpenGL] [DRI Devel] [Fastboot] [Linux RT Users] [Linux RT Stable] [eCos] [Corosync] [Linux Clusters] [LVS Devel] [Hot Plug] [Linux Virtualization] [KVM] [KVM PPC] [KVM ia64] [Linux Containers] [Linux Hexagon] [Linux Cgroups] [Util Linux] [Wireless] [Linux Bluetooth] [Bluez Devel] [Ethernet Bridging] [Embedded Linux] [Barebox] [Linux MMC] [Linux IIO] [Sparse] [Smatch] [Linux Arch] [x86 Platform Driver] [Linux ACPI] [Linux IBM ACPI] [LM Sensors] [CPU Freq] [Linux Power Management] [Linmodems] [Linux DCCP] [Linux SCTP] [ALSA Devel] [Linux USB] [Linux PA RISC] [Linux Samsung SOC] [MIPS Linux] [IBM S/390 Linux] [ARM Linux] [ARM Kernel] [ARM MSM] [Tegra Devel] [Sparc Linux] [Linux Security] [Linux Sound] [Linux Media] [Video 4 Linux] [Linux IRDA Users] [Linux for the blind] [Linux RAID] [Linux ATA RAID] [Device Mapper] [Linux SCSI] [SCSI Target Devel] [Linux SCSI Target Infrastructure] [Linux IDE] [Linux SMP] [Linux AXP] [Linux Alpha] [Linux M68K] [Linux ia64] [Linux 8086] [Linux x86_64] [Linux Config] [Linux Apps] [Linux MSDOS] [Linux X.25] [Linux Crypto] [DM Crypt] [Linux Trace Users] [Linux Btrace] [Linux Watchdog] [Utrace Devel] [Linux C Programming] [Linux Assembly] [Dash] [DWARVES] [Hail Devel] [Linux Kernel Debugger] [Linux gcc] [Gcc Help] [X.Org] [Wine]
![]() |
![]() |
[Older Kernel Discussion] [Yosemite National Park Forum] [Large Format Photos] [Gimp] [Yosemite Photos] [Stuff]