Btrfs tree locker performance analyser v2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

There is the second version of btrfs tree locker analyser.
https://gist.github.com/adam900710/766767dba5381b86534c9051780ed262

The script is also attached to the end of mail.

Please note that, it needs a trace event which is not yet merged.

That patch can be find here:
https://patchwork.kernel.org/patch/10881305/

Unlike previous version, this version is based on trace events.
As long as we have btrfs_tree_lock() and btrfs_tree_read_lock() events,
we can using trace events to get all the needed info.

Compared to previous in-kernel version, this one has better resolution.
For example, we have an event whose
start_ns = 100ns end_ns=200,000,000ns (200ms) interval=100ms.

In previous in-kernel version, we will put all that 200ms latency into
the checkpoint 200ms.

While in this version, we will put (100ms - 100ns) into checkpoint
100ms, while another 100ms into checkpoint 200ms.

Some test result can be found in google doc:
https://docs.google.com/spreadsheets/d/1B5ETQNMq2plIcWBgEJZ5ap6NnXcAu2A4gTogGltlq4E/edit?usp=sharing


The script starts below:

#!/usr/bin/python2
# @lint-avoid-python-3-compatibility-imports

from __future__ import print_function
from bcc import BPF
from sys import stderr

text_bpf = '''
#include <uapi/linux/ptrace.h>
#define FSID_SIZE   16

struct data_t {
    u8  fsid[FSID_SIZE];
    u64 start_ns;
    u64 end_ns;
    u64 owner;
};

BPF_PERF_OUTPUT(events);

TRACEPOINT_PROBE(btrfs, btrfs_tree_read_lock)
{
    struct data_t data;

    bpf_probe_read(data.fsid, FSID_SIZE, args->fsid);
    data.start_ns = args->start_ns;
    data.end_ns = args->end_ns;
    data.owner = args->owner;
    events.perf_submit(args, &data ,sizeof(data));
    return 0;
}

TRACEPOINT_PROBE(btrfs, btrfs_tree_lock)
{
    struct data_t data;

    bpf_probe_read(data.fsid, FSID_SIZE, args->fsid);
    data.start_ns = args->start_ns;
    data.end_ns = args->end_ns;
    data.owner = args->owner;
    events.perf_submit(args, &data ,sizeof(data));
    return 0;
}
'''

def is_fstree(owner):
    if owner == 5 or (owner >= 256 and owner <= (2 ** 64 - 256)):
        return True
    return False

# TODO: Don't use such classification while have a good idea to output
# all the needed info
def get_owner_str(owner):
    if is_fstree(owner):
        return 'SUBVOL'
    if owner == 1:
        return 'TREE_ROOT'
    if owner == 2:
        return 'EXTENT_ROOT'
    return 'OTHER_ROOTS'


'''
Sparse dict where access to hole (non-exist) key will return 0
other than raise an exception
'''
class sparse_dict:
    data_dict = {}
    def __init__(self):
        self.data_dict = {}

    def __getitem__(self, key):
        if key not in self.data_dict:
            return 0
        return self.data_dict[key]

    def __setitem__(self, key, value):
        self.data_dict[key] = value;

    def __contains__(self, key):
        return (key in self.data_dict)


def process_event(cpu, data, size):
    event = b["events"].event(data)

    global start_time_set
    global start_time
    global end_time
    global time_interval
    global results

    cur = int(event.start_ns / time_interval) * time_interval
    if start_time_set:
        start_time = min(cur, start_time)
    else:
        start_time = cur
        start_time_set = True
    end_time = max(event.end_ns, end_time)

    while cur < event.end_ns:
        end_ns = min(event.end_ns, cur + time_interval)
        start_ns = max(cur, event.start_ns)

        if cur not in results:
            results[cur] = sparse_dict()

        results[cur][get_owner_str(event.owner)] += end_ns - start_ns
        cur += time_interval

'''
output format (csv):
<aligned timestamp>, <tree root ns>, <extent root ns>, <subvol ns>,
<other ns>
'''
def print_results():
    print(file=stderr)
    if not start_time_set:
        print("no data", file=stderr)
        exit()
    cur = start_time
    print("%s,%s,%s,%s,%s" % ("timestamp", "root", "extent", "subvol",
"other"))
    while cur < end_time:
        if cur not in results:
            print("%d,%d,%d,%d,%d" % (cur - start_time, 0, 0, 0, 0))
        else:
            print("%d,%d,%d,%d,%d" % (cur - start_time,
                results[cur]['SUBVOL'],
                results[cur]['TREE_ROOT'],
                results[cur]['EXTENT_ROOT'],
                results[cur]['OTHER_ROOTS']))
        cur += time_interval

# default time interval is 100ms
time_interval = 100 * 1000 * 1000

# @results is a 2 dimension dict.
# [<aligned_timetamp>][<owner_str>] to access, no need to
# worry about non-exist key.
results = sparse_dict()

# To catch the first event time stamp
start_time_set = False
start_time = False

end_time = 0


b = BPF(text = text_bpf)
b["events"].open_perf_buffer(process_event, page_cnt=64)
print("start recording", file=stderr)
while 1:
    try:
        b.perf_buffer_poll()
    except KeyboardInterrupt:
        print_results()
        exit()

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux