Re: cp -a leaves some compressed data.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, May 16, 2020 at 5:51 PM A L <mail@xxxxxxxxxxxxxx> wrote:
>
> Dear all,
>
> I did some testing on copying files with the +c (compression) xattrs set.
>
> As far as I can tell, 'cp - a' only sets any xattrs after copying the data. This means that a compressed file should end up without compression, but still with the +c xattr set. However this is not entirely true. Some small amount of data is still getting compressed.
>
> I would like to understand why.

As discussed on the mailing list:

cp copies the xattr only after copying the file data. Since the data
is written to the destination using buffered IO, it is possible that
while copying the data the system flushes dirty pages for whatever
reason (due to memory pressure, someone called sync(2), etc) - this
data will not be compressed since the file doesn't have yet the
compression xattr. If the remaining data is flushed after cp finishes,
then that data can end up compressed, since the file has the
compression xattr at that point. Typically for small files, all the
data ends up getting flushed after cp finishes, so we don't see any
surprising behaviour.

I'll look into changing 'cp''s behaviour to copy xattrs before file
data next week, unless you or someone else is interested in doing it.

Thanks.

>
> Here is a small test case:
>
> File test-comp.sh:
> #!/bin/bash
> mkdir -p test test/a test/b
> chattr +c test/a
> touch test/a/foo
> dd if=/dev/zero of=test/a/foo bs=1024 count=1M
> cp -a test/a test/b/
>
> Now check the output with the compsize tool:
>
> # compsize test/a
> Type       Perc     Disk Usage   Uncompressed Referenced
> TOTAL        3%       32M         1.0G         1.0G
> zlib         3%       32M         1.0G         1.0G
>
> # compsize test/b
> Type       Perc     Disk Usage   Uncompressed Referenced
> TOTAL       63%      652M         1.0G         1.0G
> none       100%      640M         640M         640M
> zlib         3%       12M         384M         384M
> /mnt/test #
>
>
> As you see, the copy ended up with 384M compressed data. When running this test several times, the amount changes between runs.
>
> I did an strace too see what was going on. It is clear that the setfxattr() is called after all the data was written to the file.
>
>
>
> # strace -s8 -xx cp -av a/foo b/
> execve("\x2f\x62\x69\x6e\x2f\x63\x70", ["\x63\x70", "\x2d\x61\x76", "\x61\x2f
> \x66\x6f\x6f", "\x62\x2f"], 0x7fff9d6acb68 /* 44 vars */) = 0
> brk(NULL)                               = 0x556e7cf7c000
> access("\x2f\x65\x74\x63\x2f\x6c\x64\x2e\x73\x6f\x2e\x70\x72\x65\x6c\x6f\x61\
> x64", R_OK) = -1 ENOENT (No such file or directory)
> openat(AT_FDCWD, "\x2f\x65\x74\x63\x2f\x6c\x64\x2e\x73\x6f\x2e\x63\x61\x63\x6
> 8\x65", O_RDONLY|O_CLOEXEC) = 3
> fstat(3, {st_mode=S_IFREG|0644, st_size=122526, ...}) = 0
> mmap(NULL, 122526, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f5f04d8b000
> close(3)                                = 0
> openat(AT_FDCWD, "\x2f\x6c\x69\x62\x36\x34\x2f\x6c\x69\x62\x61\x63\x6c\x2e\x7
> 3\x6f\x2e\x31", O_RDONLY|O_CLOEXEC) = 3
> read(3, "\x7f\x45\x4c\x46\x02\x01\x01\x00"..., 832) = 832
> fstat(3, {st_mode=S_IFREG|0755, st_size=39240, ...}) = 0
> mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x
> 7f5f04d89000
> mmap(NULL, 41568, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f5f04d7e00
> 0
> mmap(0x7f5f04d80000, 20480, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DE
> NYWRITE, 3, 0x2000) = 0x7f5f04d80000
> mmap(0x7f5f04d85000, 8192, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3,
>  0x7000) = 0x7f5f04d85000
> mmap(0x7f5f04d87000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DE
> NYWRITE, 3, 0x8000) = 0x7f5f04d87000
> close(3)                                = 0
> openat(AT_FDCWD, "\x2f\x6c\x69\x62\x36\x34\x2f\x6c\x69\x62\x61\x74\x74\x72\x2
> e\x73\x6f\x2e\x31", O_RDONLY|O_CLOEXEC) = 3
> read(3, "\x7f\x45\x4c\x46\x02\x01\x01\x00"..., 832) = 832
> fstat(3, {st_mode=S_IFREG|0755, st_size=26720, ...}) = 0
> mmap(NULL, 29016, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f5f04d7600
> 0
> mmap(0x7f5f04d78000, 12288, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DE
> NYWRITE, 3, 0x2000) = 0x7f5f04d78000
> write(1, "\x27\x61\x2f\x66\x6f\x6f\x27\x20"..., 19'a/foo' -> 'b/foo'
> ) = 19
> openat(AT_FDCWD, "\x61\x2f\x66\x6f\x6f", O_RDONLY|O_NOFOLLOW) = 3
> fstat(3, {st_mode=S_IFREG|0644, st_size=1048576, ...}) = 0
> openat(AT_FDCWD, "\x62\x2f\x66\x6f\x6f", O_WRONLY|O_CREAT|O_EXCL, 0600) = 4
> fstat(4, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
> fadvise64(3, 0, 0, POSIX_FADV_SEQUENTIAL) = 0
> mmap(NULL, 139264, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
> 0x7f5f0440f000
> read(3, "\x00\x00\x00\x00\x00\x00\x00\x00"..., 131072) = 131072
> write(4, "\x00\x00\x00\x00\x00\x00\x00\x00"..., 131072) = 131072
> ...
> snip
> ...
> read(3, "", 131072)                     = 0
> utimensat(4, NULL, [{tv_sec=1589642969, tv_nsec=260647830} /* 2020-05-16T17:2
> 9:29.260647830+0200 */, {tv_sec=1589643713, tv_nsec=971537549} /* 2020-05-16T
> 17:41:53.971537549+0200 */], 0) = 0
> flistxattr(3, NULL, 0)                  = 18
> flistxattr(3, "\x62\x74\x72\x66\x73\x2e\x63\x6f"..., 18) = 18
> openat(AT_FDCWD, "\x2f\x65\x74\x63\x2f\x78\x61\x74\x74\x72\x2e\x63\x6f\x6e\x6
> 6", O_RDONLY) = 5
> fstat(5, {st_mode=S_IFREG|0644, st_size=642, ...}) = 0
> read(5, "\x23\x20\x2f\x65\x74\x63\x2f\x78"..., 4096) = 642
> read(5, "", 4096)                       = 0
> close(5)                                = 0
> openat(AT_FDCWD, "\x2f\x75\x73\x72\x2f\x6c\x69\x62\x36\x34\x2f\x67\x63\x6f\x6
> e\x76\x2f\x67\x63\x6f\x6e\x76\x2d\x6d\x6f\x64\x75\x6c\x65\x73\x2e\x63\x61\x63
> \x68\x65", O_RDONLY) = 5
> fstat(5, {st_mode=S_IFREG|0644, st_size=26988, ...}) = 0
> mmap(NULL, 26988, PROT_READ, MAP_SHARED, 5, 0) = 0x7f5f04da2000
> close(5)                                = 0
> fgetxattr(3, "\x62\x74\x72\x66\x73\x2e\x63\x6f"..., NULL, 0) = 4
> fgetxattr(3, "\x62\x74\x72\x66\x73\x2e\x63\x6f"..., "\x7a\x6c\x69\x62", 4) =
> 4
> fsetxattr(4, "\x62\x74\x72\x66\x73\x2e\x63\x6f"..., "\x7a\x6c\x69\x62", 4, 0)
>  = 0
> fgetxattr(3, "\x73\x79\x73\x74\x65\x6d\x2e\x70"..., 0x7fff8daf2580, 132) = -1
>  ENODATA (No data available)
> fstat(3, {st_mode=S_IFREG|0644, st_size=1048576, ...}) = 0
> fsetxattr(4, "\x73\x79\x73\x74\x65\x6d\x2e\x70"..., "\x02\x00\x00\x00\x01\x00
> \x06\x00"..., 28, 0) = 0
> close(4)                                = 0
> close(3)                                = 0
> munmap(0x7f5f0440f000, 139264)          = 0
> lseek(0, 0, SEEK_CUR)                   = -1 ESPIPE (Illegal seek)
> close(0)                                = 0
> close(1)                                = 0
> close(2)                                = 0
> exit_group(0)                           = ?
> +++ exited with 0 +++



-- 
Filipe David Manana,

“Whether you think you can, or you think you can't — you're right.”




[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux