Dear all,
I did some testing on copying files with the +c (compression) xattrs set.
As far as I can tell, 'cp - a' only sets any xattrs after copying the data. This means that a compressed file should end up without compression, but still with the +c xattr set. However this is not entirely true. Some small amount of data is still getting compressed.
I would like to understand why.
Here is a small test case:
File test-comp.sh:
#!/bin/bash
mkdir -p test test/a test/b
chattr +c test/a
touch test/a/foo
dd if=/dev/zero of=test/a/foo bs=1024 count=1M
cp -a test/a test/b/
Now check the output with the compsize tool:
# compsize test/a
Type Perc Disk Usage Uncompressed Referenced
TOTAL 3% 32M 1.0G 1.0G
zlib 3% 32M 1.0G 1.0G
# compsize test/b
Type Perc Disk Usage Uncompressed Referenced
TOTAL 63% 652M 1.0G 1.0G
none 100% 640M 640M 640M
zlib 3% 12M 384M 384M
/mnt/test #
As you see, the copy ended up with 384M compressed data. When running this test several times, the amount changes between runs.
I did an strace too see what was going on. It is clear that the setfxattr() is called after all the data was written to the file.
# strace -s8 -xx cp -av a/foo b/
execve("\x2f\x62\x69\x6e\x2f\x63\x70", ["\x63\x70", "\x2d\x61\x76", "\x61\x2f
\x66\x6f\x6f", "\x62\x2f"], 0x7fff9d6acb68 /* 44 vars */) = 0
brk(NULL) = 0x556e7cf7c000
access("\x2f\x65\x74\x63\x2f\x6c\x64\x2e\x73\x6f\x2e\x70\x72\x65\x6c\x6f\x61\
x64", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "\x2f\x65\x74\x63\x2f\x6c\x64\x2e\x73\x6f\x2e\x63\x61\x63\x6
8\x65", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=122526, ...}) = 0
mmap(NULL, 122526, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f5f04d8b000
close(3) = 0
openat(AT_FDCWD, "\x2f\x6c\x69\x62\x36\x34\x2f\x6c\x69\x62\x61\x63\x6c\x2e\x7
3\x6f\x2e\x31", O_RDONLY|O_CLOEXEC) = 3
read(3, "\x7f\x45\x4c\x46\x02\x01\x01\x00"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=39240, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x
7f5f04d89000
mmap(NULL, 41568, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f5f04d7e00
0
mmap(0x7f5f04d80000, 20480, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DE
NYWRITE, 3, 0x2000) = 0x7f5f04d80000
mmap(0x7f5f04d85000, 8192, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3,
0x7000) = 0x7f5f04d85000
mmap(0x7f5f04d87000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DE
NYWRITE, 3, 0x8000) = 0x7f5f04d87000
close(3) = 0
openat(AT_FDCWD, "\x2f\x6c\x69\x62\x36\x34\x2f\x6c\x69\x62\x61\x74\x74\x72\x2
e\x73\x6f\x2e\x31", O_RDONLY|O_CLOEXEC) = 3
read(3, "\x7f\x45\x4c\x46\x02\x01\x01\x00"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=26720, ...}) = 0
mmap(NULL, 29016, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f5f04d7600
0
mmap(0x7f5f04d78000, 12288, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DE
NYWRITE, 3, 0x2000) = 0x7f5f04d78000
write(1, "\x27\x61\x2f\x66\x6f\x6f\x27\x20"..., 19'a/foo' -> 'b/foo'
) = 19
openat(AT_FDCWD, "\x61\x2f\x66\x6f\x6f", O_RDONLY|O_NOFOLLOW) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=1048576, ...}) = 0
openat(AT_FDCWD, "\x62\x2f\x66\x6f\x6f", O_WRONLY|O_CREAT|O_EXCL, 0600) = 4
fstat(4, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
fadvise64(3, 0, 0, POSIX_FADV_SEQUENTIAL) = 0
mmap(NULL, 139264, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x7f5f0440f000
read(3, "\x00\x00\x00\x00\x00\x00\x00\x00"..., 131072) = 131072
write(4, "\x00\x00\x00\x00\x00\x00\x00\x00"..., 131072) = 131072
...
snip
...
read(3, "", 131072) = 0
utimensat(4, NULL, [{tv_sec=1589642969, tv_nsec=260647830} /* 2020-05-16T17:2
9:29.260647830+0200 */, {tv_sec=1589643713, tv_nsec=971537549} /* 2020-05-16T
17:41:53.971537549+0200 */], 0) = 0
flistxattr(3, NULL, 0) = 18
flistxattr(3, "\x62\x74\x72\x66\x73\x2e\x63\x6f"..., 18) = 18
openat(AT_FDCWD, "\x2f\x65\x74\x63\x2f\x78\x61\x74\x74\x72\x2e\x63\x6f\x6e\x6
6", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=642, ...}) = 0
read(5, "\x23\x20\x2f\x65\x74\x63\x2f\x78"..., 4096) = 642
read(5, "", 4096) = 0
close(5) = 0
openat(AT_FDCWD, "\x2f\x75\x73\x72\x2f\x6c\x69\x62\x36\x34\x2f\x67\x63\x6f\x6
e\x76\x2f\x67\x63\x6f\x6e\x76\x2d\x6d\x6f\x64\x75\x6c\x65\x73\x2e\x63\x61\x63
\x68\x65", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0644, st_size=26988, ...}) = 0
mmap(NULL, 26988, PROT_READ, MAP_SHARED, 5, 0) = 0x7f5f04da2000
close(5) = 0
fgetxattr(3, "\x62\x74\x72\x66\x73\x2e\x63\x6f"..., NULL, 0) = 4
fgetxattr(3, "\x62\x74\x72\x66\x73\x2e\x63\x6f"..., "\x7a\x6c\x69\x62", 4) =
4
fsetxattr(4, "\x62\x74\x72\x66\x73\x2e\x63\x6f"..., "\x7a\x6c\x69\x62", 4, 0)
= 0
fgetxattr(3, "\x73\x79\x73\x74\x65\x6d\x2e\x70"..., 0x7fff8daf2580, 132) = -1
ENODATA (No data available)
fstat(3, {st_mode=S_IFREG|0644, st_size=1048576, ...}) = 0
fsetxattr(4, "\x73\x79\x73\x74\x65\x6d\x2e\x70"..., "\x02\x00\x00\x00\x01\x00
\x06\x00"..., 28, 0) = 0
close(4) = 0
close(3) = 0
munmap(0x7f5f0440f000, 139264) = 0
lseek(0, 0, SEEK_CUR) = -1 ESPIPE (Illegal seek)
close(0) = 0
close(1) = 0
close(2) = 0
exit_group(0) = ?
+++ exited with 0 +++