On 2019/6/19 下午9:39, David Sterba wrote:
> On Wed, Jun 19, 2019 at 09:37:39AM +0800, Qu Wenruo wrote:
>>> struct btrfs_key {
>>> __u64 objectid;
>>> - __u8 type;
>>> __u64 offset;
>>> + __u8 type;
>>> } __attribute__ ((__packed__));
>>
>> And why not remove the packed attribute?
>
> Because of this (stack usage changes):
That's expected as long as we're using btrfs_key on stack.
But if we're using btrfs_key on stack and follow the packed feature,
then adjacent on stack memory is not accessed aligned, which could cause
(unobvious) performance drop.
If the unaligned memory access is really causing some performance even
on stack memory, then I'd say the bump in stack memory usage is acceptable.
If not, then the idea of default -Waddress-of-packed-member makes no sense.
Thanks,
Qu
>
> do_relocation +8 (304 -> 312)
> btrfs_orphan_cleanup +16 (128 -> 144)
> btrfs_init_new_device -16 (288 -> 272)
> get_subvol_name_from_objectid +8 (136 -> 144)
> log_conflicting_inodes +8 (184 -> 192)
> btrfs_read_chunk_tree +16 (128 -> 144)
> btrfs_recover_relocation +8 (136 -> 144)
> scrub_stripe +16 (536 -> 552)
> btrfs_clone +16 (352 -> 368)
> free_space_next_bitmap +8 (80 -> 88)
> btrfs_find_orphan_roots +16 (112 -> 128)
> find_free_dev_extent_start +8 (160 -> 168)
> btrfs_lookup_extent_info +8 (160 -> 168)
> btrfs_find_item +8 (80 -> 88)
> btrfs_create_pending_block_groups +8 (120 -> 128)
> btrfs_read_block_groups -24 (176 -> 152)
> __remove_from_free_space_tree +8 (120 -> 128)
> check_committed_ref +8 (104 -> 112)
> replay_one_buffer +8 (152 -> 160)
> fixup_inode_link_counts +8 (96 -> 104)
> btrfs_del_csums +8 (192 -> 200)
> btrfs_search_path_in_tree_user +16 (192 -> 208)
> btrfs_lookup_dentry +8 (168 -> 176)
> __add_tree_block +8 (120 -> 128)
> __btrfs_balance +8 (264 -> 272)
> __lookup_free_space_inode +16 (112 -> 128)
> __readahead_hook +16 (168 -> 184)
> btrfs_get_parent +8 (96 -> 104)
> btrfs_run_dev_replace +8 (88 -> 96)
> add_qgroup_item +8 (80 -> 88)
> merge_reloc_root +16 (240 -> 256)
> link_to_fixup_dir +8 (80 -> 88)
> btrfs_insert_orphan_item +8 (64 -> 72)
> btrfs_delete_delayed_items +8 (184 -> 192)
> btrfs_read_qgroup_config +8 (136 -> 144)
> is_extent_unchanged +8 (152 -> 160)
> btrfs_recover_log_trees +24 (192 -> 216)
> btrfs_create_free_space_tree +8 (136 -> 144)
> send_subvol +8 (136 -> 144)
> btrfs_compare_trees +24 (176 -> 200)
> get_first_ref +8 (136 -> 144)
> qgroup_trace_new_subtree_blocks +8 (176 -> 184)
> qgroup_trace_extent_swap +16 (192 -> 208)
> generic_bin_search +8 (160 -> 168)
> replay_one_name +8 (152 -> 160)
> btrfs_print_tree +8 (160 -> 168)
> __create_free_space_inode +8 (112 -> 120)
> copy_items +24 (328 -> 352)
> walk_down_reloc_tree +8 (160 -> 168)
> btrfs_find_next_key +8 (184 -> 192)
> btrfs_del_inode_ref +8 (144 -> 152)
> btrfs_check_node +16 (144 -> 160)
> find_next_devid +8 (88 -> 96)
> btrfs_new_inode +8 (184 -> 192)
> __add_to_free_space_tree +16 (136 -> 152)
> __btrfs_run_delayed_refs +8 (168 -> 176)
> btrfs_qgroup_trace_subtree +8 (224 -> 232)
> btrfs_lookup_csums_range +8 (184 -> 192)
> __btrfs_update_delayed_inode +8 (112 -> 120)
> lookup_inline_extent_backref +8 (184 -> 192)
> find_data_references +8 (152 -> 160)
> replay_dir_deletes +16 (168 -> 184)
> check_leaf +8 (168 -> 176)
> btrfs_ioctl_get_subvol_info +16 (128 -> 144)
> btrfs_insert_inode_ref +8 (152 -> 160)
> btrfs_finish_sprout +8 (152 -> 160)
> build_backref_tree +8 (272 -> 280)
> insert_extent_data_ref +8 (96 -> 104)
> send_clone -24 (160 -> 136)
> insert_tree_block_ref +8 (56 -> 64)
> read_node_slot +8 (88 -> 96)
> btrfs_csum_file_blocks +8 (168 -> 176)
> replace_path +16 (352 -> 368)
> changed_inode +16 (144 -> 160)
> process_all_refs +16 (112 -> 128)
> btrfs_find_root +16 (144 -> 160)
> reada_walk_down +8 (176 -> 184)
> btrfs_insert_xattr_item +8 (136 -> 144)
> maybe_send_hole +8 (152 -> 160)
> btrfs_uuid_scan_kthread +8 (520 -> 528)
> modify_free_space_bitmap +8 (192 -> 200)
> lookup_extent_data_ref +8 (144 -> 152)
> btrfs_run_delayed_refs_for_head +8 (208 -> 216)
> btrfs_search_dir_index_item +8 (112 -> 120)
> btrfs_prev_leaf +16 (88 -> 104)
> may_destroy_subvol +8 (88 -> 96)
> convert_free_space_to_extents +8 (176 -> 184)
> btrfs_quota_enable +8 (128 -> 136)
> remove_block_group_free_space +8 (120 -> 128)
> __btrfs_drop_extents +24 (352 -> 376)
> btrfs_finish_chunk_alloc +16 (192 -> 208)
> log_dir_items +16 (192 -> 208)
> btrfs_log_changed_extents +8 (240 -> 248)
> update_cache_item +8 (112 -> 120)
> extent_from_logical +8 (104 -> 112)
> btrfs_drop_snapshot +8 (176 -> 184)
> btrfs_realloc_node +8 (208 -> 216)
> did_create_dir +8 (88 -> 96)
> add_qgroup_relation_item +8 (80 -> 88)
> find_dir_range +8 (112 -> 120)
> btrfs_verify_level_key +8 (128 -> 136)
> do_walk_down +8 (336 -> 344)
> get_last_extent +8 (88 -> 96)
> add_keyed_refs +8 (168 -> 176)
> read_block_for_search +8 (168 -> 176)
> can_rmdir +8 (120 -> 128)
> btrfs_insert_empty_inode +8 (40 -> 48)
> btrfs_add_root_ref +8 (144 -> 152)
> __btrfs_free_extent +8 (216 -> 224)
> add_all_parents +8 (160 -> 168)
> btrfs_search_path_in_tree +8 (120 -> 128)
> add_new_free_space_info +8 (72 -> 80)
> btrfs_set_inode_index +8 (96 -> 104)
> btrfs_search_forward +8 (128 -> 136)
> insert_balance_item +8 (232 -> 240)
> btrfs_find_one_extref +8 (104 -> 112)
> scrub_raid56_parity +8 (400 -> 408)
> btrfs_real_readdir +8 (192 -> 200)
> btrfs_log_inode_parent +8 (264 -> 272)
> btrfs_listxattr +8 (168 -> 176)
> convert_free_space_to_bitmaps +8 (168 -> 176)
> btrfs_verify_dev_extents +8 (152 -> 160)
> find_next_extent +8 (120 -> 128)
> btrfs_set_item_key_safe +8 (160 -> 168)
> btrfs_mark_extent_written +16 (296 -> 312)
> replay_xattr_deletes +8 (192 -> 200)
> caching_kthread +8 (128 -> 136)
> btrfs_remove_chunk +8 (184 -> 192)
> scrub_print_warning_inode +8 (208 -> 216)
> caching_thread +8 (184 -> 192)
> log_new_dir_dentries +16 (216 -> 232)
> drop_objectid_items +16 (112 -> 128)
> insert_dir_log_key +8 (56 -> 64)
> iterate_dir_item +8 (176 -> 184)
> btrfs_lookup_csum +8 (104 -> 112)
> relocate_tree_blocks +8 (168 -> 176)
> btrfs_build_ref_tree -8 (168 -> 160)
> btrfs_find_highest_objectid +8 (80 -> 88)
> btrfs_insert_dir_item +8 (136 -> 144)
> walk_down_log_tree +8 (176 -> 184)
> btrfs_insert_file_extent +8 (112 -> 120)
> setup_leaf_for_split +8 (120 -> 128)
> btrfs_shrink_device +8 (176 -> 184)
>
> LOST (0):
>
> NEW (544):
> btrfs_relocate_sys_chunks
> find_first_block_group
> add_delayed_refs
> process_leaf
>
> LOST/NEW DELTA: +544
> PRE/POST DELTA: +1840
>
Attachment:
signature.asc
Description: OpenPGP digital signature
