Function call chain __btrfs_map_block()->find_live_mirror() uses
thread %pid to determine the %mirror_num for the read when the
mirror_num=0 in the argument.
This pid based mirror_num extrapolation has following disadvantages
A single-process large read IO will read only from one disk.
In a worst scenario all processes read accessing the FS could have
either odd or even pid, the read IO gets skewed.
There is no deterministic way of knowing/controlling which copy will
be used for reading.
May see performance variations for a given set of multi process
workload ran at different times.
So we need other types of readmirror policies.
This patch introduces a framework so that we can add more policies, and
converts the existing %pid into as a configurable parameter using the
property.
For example:
btrfs property set /btrfs readmirror pid
btrfs property set /btrfs readmirror ""
Signed-off-by: Anand Jain <anand.jain@xxxxxxxxxx>
---
fs/btrfs/props.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/volumes.c | 11 ++++++++++-
fs/btrfs/volumes.h | 7 +++++++
3 files changed, 64 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 1a13f10a6ef5..776cdf099f93 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -11,6 +11,7 @@
#include "ctree.h"
#include "xattr.h"
#include "compression.h"
+#include "volumes.h"
#define BTRFS_PROP_HANDLERS_HT_BITS 8
static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
@@ -326,6 +327,45 @@ static const char *prop_compression_extract(struct inode *inode)
return NULL;
}
+static int prop_readmirror_validate(struct inode *inode, const char *value,
+ size_t len)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID)
+ return -EINVAL;
+
+ if (!len)
+ return 0;
+
+ if (!strncmp("pid", value, 3))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int prop_readmirror_apply(struct inode *inode, const char *value,
+ size_t len)
+{
+ struct btrfs_fs_devices *fs_devices = btrfs_sb(inode->i_sb)->fs_devices;
+
+ if (!value)
+ fs_devices->readmirror_policy = BTRFS_READMIRROR_DEFAULT;
+ else if (!strncmp("pid", value, 3))
+ fs_devices->readmirror_policy = BTRFS_READMIRROR_PID;
+
+ return 0;
+}
+
+static const char *prop_readmirror_extract(struct inode *inode)
+{
+ /*
+ * readmirror policy is applied for the whole FS, inheritance is not
+ * applicable.
+ */
+ return NULL;
+}
+
static struct prop_handler prop_handlers[] = {
{
.xattr_name = XATTR_BTRFS_PREFIX "compression",
@@ -334,6 +374,13 @@ static const char *prop_compression_extract(struct inode *inode)
.extract = prop_compression_extract,
.inheritable = 1
},
+ {
+ .xattr_name = XATTR_BTRFS_PREFIX "readmirror",
+ .validate = prop_readmirror_validate,
+ .apply = prop_readmirror_apply,
+ .extract = prop_readmirror_extract,
+ .inheritable = 0
+ },
};
static int inherit_props(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9024eee889b9..e5072d46e181 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5562,7 +5562,16 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
else
num_stripes = map->num_stripes;
- preferred_mirror = first + current->pid % num_stripes;
+ switch(fs_info->fs_devices->readmirror_policy) {
+ case BTRFS_READMIRROR_PID:
+ /* fall through */
+ case BTRFS_READMIRROR_DEFAULT:
+ /* fall through */
+ default:
+ /* readmirror as per thread pid */
+ preferred_mirror = first + current->pid % num_stripes;
+ break;
+ }
if (dev_replace_is_ongoing &&
fs_info->dev_replace.cont_reading_from_srcdev_mode ==
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3ad9d58d1b66..27dce9242b55 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -208,6 +208,11 @@ struct btrfs_device {
BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes);
BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
+enum btrfs_readmirror_policy {
+ BTRFS_READMIRROR_DEFAULT,
+ BTRFS_READMIRROR_PID,
+};
+
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
u8 metadata_uuid[BTRFS_FSID_SIZE];
@@ -254,6 +259,8 @@ struct btrfs_fs_devices {
struct kobject fsid_kobj;
struct kobject *device_dir_kobj;
struct completion kobj_unregister;
+
+ int readmirror_policy;
};
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
--
1.8.3.1