From 1e6a4de9afe59098213e233e344f634ab805f990 Mon Sep 17 00:00:00 2001 From: Mikhail Novosyolov Date: Thu, 27 Oct 2022 09:46:15 +0300 Subject: [PATCH] aufs: rediff patch Just rediff without changes. It did not apply to 5.15.75. --- 0001-Apply-AUFS-5.patch | 11374 +++++++++++++++++++------------------- 1 file changed, 5800 insertions(+), 5574 deletions(-) diff --git a/0001-Apply-AUFS-5.patch b/0001-Apply-AUFS-5.patch index 2921f9f..8b3aae2 100644 --- a/0001-Apply-AUFS-5.patch +++ b/0001-Apply-AUFS-5.patch @@ -1,6 +1,8 @@ -diff -ruN a/Documentation/ABI/testing/debugfs-aufs b/Documentation/ABI/testing/debugfs-aufs ---- a/Documentation/ABI/testing/debugfs-aufs 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/ABI/testing/debugfs-aufs 2022-03-09 15:19:00.000000000 +0300 +diff --git a/Documentation/ABI/testing/debugfs-aufs b/Documentation/ABI/testing/debugfs-aufs +new file mode 100644 +index 000000000000..45b739879d76 +--- /dev/null ++++ b/Documentation/ABI/testing/debugfs-aufs @@ -0,0 +1,55 @@ +What: /debug/aufs/si_/ +Date: March 2009 @@ -57,9 +59,11 @@ diff -ruN a/Documentation/ABI/testing/debugfs-aufs b/Documentation/ABI/testing/d + be created. + When the aufs mount option 'noxino' is specified, it + will be empty. About XINO files, see the aufs manual. -diff -ruN a/Documentation/ABI/testing/sysfs-aufs b/Documentation/ABI/testing/sysfs-aufs ---- a/Documentation/ABI/testing/sysfs-aufs 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/ABI/testing/sysfs-aufs 2022-03-09 15:19:00.000000000 +0300 +diff --git a/Documentation/ABI/testing/sysfs-aufs b/Documentation/ABI/testing/sysfs-aufs +new file mode 100644 +index 000000000000..48500c0569e6 +--- /dev/null ++++ b/Documentation/ABI/testing/sysfs-aufs @@ -0,0 +1,31 @@ +What: /sys/fs/aufs/si_/ +Date: March 2009 @@ -92,1408 +96,11 @@ diff -ruN a/Documentation/ABI/testing/sysfs-aufs b/Documentation/ABI/testing/sys + even if it is the default path. + When the aufs mount option 'noxino' is specified, it + will be empty. About XINO files, see the aufs manual. -diff -ruN a/Documentation/filesystems/aufs/design/01intro.txt b/Documentation/filesystems/aufs/design/01intro.txt ---- a/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/01intro.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,171 @@ -+ -+# Copyright (C) 2005-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Introduction -+---------------------------------------- -+ -+aufs [ei ju: ef es] | /ey-yoo-ef-es/ | [a u f s] -+1. abbrev. for "advanced multi-layered unification filesystem". -+2. abbrev. for "another unionfs". -+3. abbrev. for "auf das" in German which means "on the" in English. -+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E). -+ But "Filesystem aufs Filesystem" is hard to understand. -+4. abbrev. for "African Urban Fashion Show". -+ -+AUFS is a filesystem with features: -+- multi layered stackable unification filesystem, the member directory -+ is called as a branch. -+- branch permission and attribute, 'readonly', 'real-readonly', -+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their -+ combination. -+- internal "file copy-on-write". -+- logical deletion, whiteout. -+- dynamic branch manipulation, adding, deleting and changing permission. -+- allow bypassing aufs, user's direct branch access. -+- external inode number translation table and bitmap which maintains the -+ persistent aufs inode number. -+- seekable directory, including NFS readdir. -+- file mapping, mmap and sharing pages. -+- pseudo-link, hardlink over branches. -+- loopback mounted filesystem as a branch. -+- several policies to select one among multiple writable branches. -+- revert a single systemcall when an error occurs in aufs. -+- and more... -+ -+ -+Multi Layered Stackable Unification Filesystem -+---------------------------------------------------------------------- -+Most people already knows what it is. -+It is a filesystem which unifies several directories and provides a -+merged single directory. When users access a file, the access will be -+passed/re-directed/converted (sorry, I am not sure which English word is -+correct) to the real file on the member filesystem. The member -+filesystem is called 'lower filesystem' or 'branch' and has a mode -+'readonly' and 'readwrite.' And the deletion for a file on the lower -+readonly branch is handled by creating 'whiteout' on the upper writable -+branch. -+ -+On LKML, there have been discussions about UnionMount (Jan Blunck, -+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took -+different approaches to implement the merged-view. -+The former tries putting it into VFS, and the latter implements as a -+separate filesystem. -+(If I misunderstand about these implementations, please let me know and -+I shall correct it. Because it is a long time ago when I read their -+source files last time). -+ -+UnionMount's approach will be able to small, but may be hard to share -+branches between several UnionMount since the whiteout in it is -+implemented in the inode on branch filesystem and always -+shared. According to Bharata's post, readdir does not seems to be -+finished yet. -+There are several missing features known in this implementations such as -+- for users, the inode number may change silently. eg. copy-up. -+- link(2) may break by copy-up. -+- read(2) may get an obsoleted filedata (fstat(2) too). -+- fcntl(F_SETLK) may be broken by copy-up. -+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after -+ open(O_RDWR). -+ -+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was -+merged into mainline. This is another implementation of UnionMount as a -+separated filesystem. All the limitations and known problems which -+UnionMount are equally inherited to "overlay" filesystem. -+ -+Unionfs has a longer history. When I started implementing a stackable -+filesystem (Aug 2005), it already existed. It has virtual super_block, -+inode, dentry and file objects and they have an array pointing lower -+same kind objects. After contributing many patches for Unionfs, I -+re-started my project AUFS (Jun 2006). -+ -+In AUFS, the structure of filesystem resembles to Unionfs, but I -+implemented my own ideas, approaches and enhancements and it became -+totally different one. -+ -+Comparing DM snapshot and fs based implementation -+- the number of bytes to be copied between devices is much smaller. -+- the type of filesystem must be one and only. -+- the fs must be writable, no readonly fs, even for the lower original -+ device. so the compression fs will not be usable. but if we use -+ loopback mount, we may address this issue. -+ for instance, -+ mount /cdrom/squashfs.img /sq -+ losetup /sq/ext2.img -+ losetup /somewhere/cow -+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..." -+- it will be difficult (or needs more operations) to extract the -+ difference between the original device and COW. -+- DM snapshot-merge may help a lot when users try merging. in the -+ fs-layer union, users will use rsync(1). -+ -+You may want to read my old paper "Filesystems in LiveCD" -+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf). -+ -+ -+Several characters/aspects/persona of aufs -+---------------------------------------------------------------------- -+ -+Aufs has several characters, aspects or persona. -+1. a filesystem, callee of VFS helper -+2. sub-VFS, caller of VFS helper for branches -+3. a virtual filesystem which maintains persistent inode number -+4. reader/writer of files on branches such like an application -+ -+1. Callee of VFS Helper -+As an ordinary linux filesystem, aufs is a callee of VFS. For instance, -+unlink(2) from an application reaches sys_unlink() kernel function and -+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it -+calls filesystem specific unlink operation. Actually aufs implements the -+unlink operation but it behaves like a redirector. -+ -+2. Caller of VFS Helper for Branches -+aufs_unlink() passes the unlink request to the branch filesystem as if -+it were called from VFS. So the called unlink operation of the branch -+filesystem acts as usual. As a caller of VFS helper, aufs should handle -+every necessary pre/post operation for the branch filesystem. -+- acquire the lock for the parent dir on a branch -+- lookup in a branch -+- revalidate dentry on a branch -+- mnt_want_write() for a branch -+- vfs_unlink() for a branch -+- mnt_drop_write() for a branch -+- release the lock on a branch -+ -+3. Persistent Inode Number -+One of the most important issue for a filesystem is to maintain inode -+numbers. This is particularly important to support exporting a -+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a -+backend block device for its own. But some storage is necessary to -+keep and maintain the inode numbers. It may be a large space and may not -+suit to keep in memory. Aufs rents some space from its first writable -+branch filesystem (by default) and creates file(s) on it. These files -+are created by aufs internally and removed soon (currently) keeping -+opened. -+Note: Because these files are removed, they are totally gone after -+ unmounting aufs. It means the inode numbers are not persistent -+ across unmount or reboot. I have a plan to make them really -+ persistent which will be important for aufs on NFS server. -+ -+4. Read/Write Files Internally (copy-on-write) -+Because a branch can be readonly, when you write a file on it, aufs will -+"copy-up" it to the upper writable branch internally. And then write the -+originally requested thing to the file. Generally kernel doesn't -+open/read/write file actively. In aufs, even a single write may cause a -+internal "file copy". This behaviour is very similar to cp(1) command. -+ -+Some people may think it is better to pass such work to user space -+helper, instead of doing in kernel space. Actually I am still thinking -+about it. But currently I have implemented it in kernel space. -diff -ruN a/Documentation/filesystems/aufs/design/02struct.txt b/Documentation/filesystems/aufs/design/02struct.txt ---- a/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/02struct.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,258 @@ -+ -+# Copyright (C) 2005-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Basic Aufs Internal Structure -+ -+Superblock/Inode/Dentry/File Objects -+---------------------------------------------------------------------- -+As like an ordinary filesystem, aufs has its own -+superblock/inode/dentry/file objects. All these objects have a -+dynamically allocated array and store the same kind of pointers to the -+lower filesystem, branch. -+For example, when you build a union with one readwrite branch and one -+readonly, mounted /au, /rw and /ro respectively. -+- /au = /rw + /ro -+- /ro/fileA exists but /rw/fileA -+ -+Aufs lookup operation finds /ro/fileA and gets dentry for that. These -+pointers are stored in a aufs dentry. The array in aufs dentry will be, -+- [0] = NULL (because /rw/fileA doesn't exist) -+- [1] = /ro/fileA -+ -+This style of an array is essentially same to the aufs -+superblock/inode/dentry/file objects. -+ -+Because aufs supports manipulating branches, ie. add/delete/change -+branches dynamically, these objects has its own generation. When -+branches are changed, the generation in aufs superblock is -+incremented. And a generation in other object are compared when it is -+accessed. When a generation in other objects are obsoleted, aufs -+refreshes the internal array. -+ -+ -+Superblock -+---------------------------------------------------------------------- -+Additionally aufs superblock has some data for policies to select one -+among multiple writable branches, XIB files, pseudo-links and kobject. -+See below in detail. -+About the policies which supports copy-down a directory, see -+wbr_policy.txt too. -+ -+ -+Branch and XINO(External Inode Number Translation Table) -+---------------------------------------------------------------------- -+Every branch has its own xino (external inode number translation table) -+file. The xino file is created and unlinked by aufs internally. When two -+members of a union exist on the same filesystem, they share the single -+xino file. -+The struct of a xino file is simple, just a sequence of aufs inode -+numbers which is indexed by the lower inode number. -+In the above sample, assume the inode number of /ro/fileA is i111 and -+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as -+4(8) bytes at 111 * 4(8) bytes offset in the xino file. -+ -+When the inode numbers are not contiguous, the xino file will be sparse -+which has a hole in it and doesn't consume as much disk space as it -+might appear. If your branch filesystem consumes disk space for such -+holes, then you should specify 'xino=' option at mounting aufs. -+ -+Aufs has a mount option to free the disk blocks for such holes in XINO -+files on tmpfs or ramdisk. But it is not so effective actually. If you -+meet a problem of disk shortage due to XINO files, then you should try -+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git. -+The patch localizes the assignment inumbers per tmpfs-mount and avoid -+the holes in XINO files. -+ -+Also a writable branch has three kinds of "whiteout bases". All these -+are existed when the branch is joined to aufs, and their names are -+whiteout-ed doubly, so that users will never see their names in aufs -+hierarchy. -+1. a regular file which will be hardlinked to all whiteouts. -+2. a directory to store a pseudo-link. -+3. a directory to store an "orphan"-ed file temporary. -+ -+1. Whiteout Base -+ When you remove a file on a readonly branch, aufs handles it as a -+ logical deletion and creates a whiteout on the upper writable branch -+ as a hardlink of this file in order not to consume inode on the -+ writable branch. -+2. Pseudo-link Dir -+ See below, Pseudo-link. -+3. Step-Parent Dir -+ When "fileC" exists on the lower readonly branch only and it is -+ opened and removed with its parent dir, and then user writes -+ something into it, then aufs copies-up fileC to this -+ directory. Because there is no other dir to store fileC. After -+ creating a file under this dir, the file is unlinked. -+ -+Because aufs supports manipulating branches, ie. add/delete/change -+dynamically, a branch has its own id. When the branch order changes, -+aufs finds the new index by searching the branch id. -+ -+ -+Pseudo-link -+---------------------------------------------------------------------- -+Assume "fileA" exists on the lower readonly branch only and it is -+hardlinked to "fileB" on the branch. When you write something to fileA, -+aufs copies-up it to the upper writable branch. Additionally aufs -+creates a hardlink under the Pseudo-link Directory of the writable -+branch. The inode of a pseudo-link is kept in aufs super_block as a -+simple list. If fileB is read after unlinking fileA, aufs returns -+filedata from the pseudo-link instead of the lower readonly -+branch. Because the pseudo-link is based upon the inode, to keep the -+inode number by xino (see above) is essentially necessary. -+ -+All the hardlinks under the Pseudo-link Directory of the writable branch -+should be restored in a proper location later. Aufs provides a utility -+to do this. The userspace helpers executed at remounting and unmounting -+aufs by default. -+During this utility is running, it puts aufs into the pseudo-link -+maintenance mode. In this mode, only the process which began the -+maintenance mode (and its child processes) is allowed to operate in -+aufs. Some other processes which are not related to the pseudo-link will -+be allowed to run too, but the rest have to return an error or wait -+until the maintenance mode ends. If a process already acquires an inode -+mutex (in VFS), it has to return an error. -+ -+ -+XIB(external inode number bitmap) -+---------------------------------------------------------------------- -+Addition to the xino file per a branch, aufs has an external inode number -+bitmap in a superblock object. It is also an internal file such like a -+xino file. -+It is a simple bitmap to mark whether the aufs inode number is in-use or -+not. -+To reduce the file I/O, aufs prepares a single memory page to cache xib. -+ -+As well as XINO files, aufs has a feature to truncate/refresh XIB to -+reduce the number of consumed disk blocks for these files. -+ -+ -+Virtual or Vertical Dir, and Readdir in Userspace -+---------------------------------------------------------------------- -+In order to support multiple layers (branches), aufs readdir operation -+constructs a virtual dir block on memory. For readdir, aufs calls -+vfs_readdir() internally for each dir on branches, merges their entries -+with eliminating the whiteout-ed ones, and sets it to file (dir) -+object. So the file object has its entry list until it is closed. The -+entry list will be updated when the file position is zero and becomes -+obsoleted. This decision is made in aufs automatically. -+ -+The dynamically allocated memory block for the name of entries has a -+unit of 512 bytes (by default) and stores the names contiguously (no -+padding). Another block for each entry is handled by kmem_cache too. -+During building dir blocks, aufs creates hash list and judging whether -+the entry is whiteouted by its upper branch or already listed. -+The merged result is cached in the corresponding inode object and -+maintained by a customizable life-time option. -+ -+Some people may call it can be a security hole or invite DoS attack -+since the opened and once readdir-ed dir (file object) holds its entry -+list and becomes a pressure for system memory. But I'd say it is similar -+to files under /proc or /sys. The virtual files in them also holds a -+memory page (generally) while they are opened. When an idea to reduce -+memory for them is introduced, it will be applied to aufs too. -+For those who really hate this situation, I've developed readdir(3) -+library which operates this merging in userspace. You just need to set -+LD_PRELOAD environment variable, and aufs will not consume no memory in -+kernel space for readdir(3). -+ -+ -+Workqueue -+---------------------------------------------------------------------- -+Aufs sometimes requires privilege access to a branch. For instance, -+in copy-up/down operation. When a user process is going to make changes -+to a file which exists in the lower readonly branch only, and the mode -+of one of ancestor directories may not be writable by a user -+process. Here aufs copy-up the file with its ancestors and they may -+require privilege to set its owner/group/mode/etc. -+This is a typical case of a application character of aufs (see -+Introduction). -+ -+Aufs uses workqueue synchronously for this case. It creates its own -+workqueue. The workqueue is a kernel thread and has privilege. Aufs -+passes the request to call mkdir or write (for example), and wait for -+its completion. This approach solves a problem of a signal handler -+simply. -+If aufs didn't adopt the workqueue and changed the privilege of the -+process, then the process may receive the unexpected SIGXFSZ or other -+signals. -+ -+Also aufs uses the system global workqueue ("events" kernel thread) too -+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a -+whiteout base and etc. This is unrelated to a privilege. -+Most of aufs operation tries acquiring a rw_semaphore for aufs -+superblock at the beginning, at the same time waits for the completion -+of all queued asynchronous tasks. -+ -+ -+Whiteout -+---------------------------------------------------------------------- -+The whiteout in aufs is very similar to Unionfs's. That is represented -+by its filename. UnionMount takes an approach of a file mode, but I am -+afraid several utilities (find(1) or something) will have to support it. -+ -+Basically the whiteout represents "logical deletion" which stops aufs to -+lookup further, but also it represents "dir is opaque" which also stop -+further lookup. -+ -+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively. -+In order to make several functions in a single systemcall to be -+revertible, aufs adopts an approach to rename a directory to a temporary -+unique whiteouted name. -+For example, in rename(2) dir where the target dir already existed, aufs -+renames the target dir to a temporary unique whiteouted name before the -+actual rename on a branch, and then handles other actions (make it opaque, -+update the attributes, etc). If an error happens in these actions, aufs -+simply renames the whiteouted name back and returns an error. If all are -+succeeded, aufs registers a function to remove the whiteouted unique -+temporary name completely and asynchronously to the system global -+workqueue. -+ -+ -+Copy-up -+---------------------------------------------------------------------- -+It is a well-known feature or concept. -+When user modifies a file on a readonly branch, aufs operate "copy-up" -+internally and makes change to the new file on the upper writable branch. -+When the trigger systemcall does not update the timestamps of the parent -+dir, aufs reverts it after copy-up. -+ -+ -+Move-down (aufs3.9 and later) -+---------------------------------------------------------------------- -+"Copy-up" is one of the essential feature in aufs. It copies a file from -+the lower readonly branch to the upper writable branch when a user -+changes something about the file. -+"Move-down" is an opposite action of copy-up. Basically this action is -+ran manually instead of automatically and internally. -+For desgin and implementation, aufs has to consider these issues. -+- whiteout for the file may exist on the lower branch. -+- ancestor directories may not exist on the lower branch. -+- diropq for the ancestor directories may exist on the upper branch. -+- free space on the lower branch will reduce. -+- another access to the file may happen during moving-down, including -+ UDBA (see "Revalidate Dentry and UDBA"). -+- the file should not be hard-linked nor pseudo-linked. they should be -+ handled by auplink utility later. -+ -+Sometimes users want to move-down a file from the upper writable branch -+to the lower readonly or writable branch. For instance, -+- the free space of the upper writable branch is going to run out. -+- create a new intermediate branch between the upper and lower branch. -+- etc. -+ -+For this purpose, use "aumvdown" command in aufs-util.git. -diff -ruN a/Documentation/filesystems/aufs/design/03atomic_open.txt b/Documentation/filesystems/aufs/design/03atomic_open.txt ---- a/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/03atomic_open.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,85 @@ -+ -+# Copyright (C) 2015-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Support for a branch who has its ->atomic_open() -+---------------------------------------------------------------------- -+The filesystems who implement its ->atomic_open() are not majority. For -+example NFSv4 does, and aufs should call NFSv4 ->atomic_open, -+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than -+->atomic_open(), NFSv4 returns an error for this open(2). While I am not -+sure whether all filesystems who have ->atomic_open() behave like this, -+but NFSv4 surely returns the error. -+ -+In order to support ->atomic_open() for aufs, there are a few -+approaches. -+ -+A. Introduce aufs_atomic_open() -+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for -+ branch fs. -+B. Introduce aufs_atomic_open() calling create, open and chmod. this is -+ an aufs user Pip Cet's approach -+ - calls aufs_create(), VFS finish_open() and notify_change(). -+ - pass fake-mode to finish_open(), and then correct the mode by -+ notify_change(). -+C. Extend aufs_open() to call branch fs's ->atomic_open() -+ - no aufs_atomic_open(). -+ - aufs_lookup() registers the TID to an aufs internal object. -+ - aufs_create() does nothing when the matching TID is registered, but -+ registers the mode. -+ - aufs_open() calls branch fs's ->atomic_open() when the matching -+ TID is registered. -+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's -+ credential -+ - no aufs_atomic_open(). -+ - aufs_create() registers the TID to an internal object. this info -+ represents "this process created this file just now." -+ - when aufs gets EACCES from branch fs's ->open(), then confirm the -+ registered TID and re-try open() with superuser's credential. -+ -+Pros and cons for each approach. -+ -+A. -+ - straightforward but highly depends upon VFS internal. -+ - the atomic behavaiour is kept. -+ - some of parameters such as nameidata are hard to reproduce for -+ branch fs. -+ - large overhead. -+B. -+ - easy to implement. -+ - the atomic behavaiour is lost. -+C. -+ - the atomic behavaiour is kept. -+ - dirty and tricky. -+ - VFS checks whether the file is created correctly after calling -+ ->create(), which means this approach doesn't work. -+D. -+ - easy to implement. -+ - the atomic behavaiour is lost. -+ - to open a file with superuser's credential and give it to a user -+ process is a bad idea, since the file object keeps the credential -+ in it. It may affect LSM or something. This approach doesn't work -+ either. -+ -+The approach A is ideal, but it hard to implement. So here is a -+variation of A, which is to be implemented. -+ -+A-1. Introduce aufs_atomic_open() -+ - calls branch fs ->atomic_open() if exists. otherwise calls -+ vfs_create() and finish_open(). -+ - the demerit is that the several checks after branch fs -+ ->atomic_open() are lost. in the ordinary case, the checks are -+ done by VFS:do_last(), lookup_open() and atomic_open(). some can -+ be implemented in aufs, but not all I am afraid. -diff -ruN a/Documentation/filesystems/aufs/design/03lookup.txt b/Documentation/filesystems/aufs/design/03lookup.txt ---- a/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/03lookup.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,113 @@ -+ -+# Copyright (C) 2005-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Lookup in a Branch -+---------------------------------------------------------------------- -+Since aufs has a character of sub-VFS (see Introduction), it operates -+lookup for branches as VFS does. It may be a heavy work. But almost all -+lookup operation in aufs is the simplest case, ie. lookup only an entry -+directly connected to its parent. Digging down the directory hierarchy -+is unnecessary. VFS has a function lookup_one_len() for that use, and -+aufs calls it. -+ -+When a branch is a remote filesystem, aufs basically relies upon its -+->d_revalidate(), also aufs forces the hardest revalidate tests for -+them. -+For d_revalidate, aufs implements three levels of revalidate tests. See -+"Revalidate Dentry and UDBA" in detail. -+ -+ -+Test Only the Highest One for the Directory Permission (dirperm1 option) -+---------------------------------------------------------------------- -+Let's try case study. -+- aufs has two branches, upper readwrite and lower readonly. -+ /au = /rw + /ro -+- "dirA" exists under /ro, but /rw. and its mode is 0700. -+- user invoked "chmod a+rx /au/dirA" -+- the internal copy-up is activated and "/rw/dirA" is created and its -+ permission bits are set to world readable. -+- then "/au/dirA" becomes world readable? -+ -+In this case, /ro/dirA is still 0700 since it exists in readonly branch, -+or it may be a natively readonly filesystem. If aufs respects the lower -+branch, it should not respond readdir request from other users. But user -+allowed it by chmod. Should really aufs rejects showing the entries -+under /ro/dirA? -+ -+To be honest, I don't have a good solution for this case. So aufs -+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to -+users. -+When dirperm1 is specified, aufs checks only the highest one for the -+directory permission, and shows the entries. Otherwise, as usual, checks -+every dir existing on all branches and rejects the request. -+ -+As a side effect, dirperm1 option improves the performance of aufs -+because the number of permission check is reduced when the number of -+branch is many. -+ -+ -+Revalidate Dentry and UDBA (User's Direct Branch Access) -+---------------------------------------------------------------------- -+Generally VFS helpers re-validate a dentry as a part of lookup. -+0. digging down the directory hierarchy. -+1. lock the parent dir by its i_mutex. -+2. lookup the final (child) entry. -+3. revalidate it. -+4. call the actual operation (create, unlink, etc.) -+5. unlock the parent dir -+ -+If the filesystem implements its ->d_revalidate() (step 3), then it is -+called. Actually aufs implements it and checks the dentry on a branch is -+still valid. -+But it is not enough. Because aufs has to release the lock for the -+parent dir on a branch at the end of ->lookup() (step 2) and -+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still -+held by VFS. -+If the file on a branch is changed directly, eg. bypassing aufs, after -+aufs released the lock, then the subsequent operation may cause -+something unpleasant result. -+ -+This situation is a result of VFS architecture, ->lookup() and -+->d_revalidate() is separated. But I never say it is wrong. It is a good -+design from VFS's point of view. It is just not suitable for sub-VFS -+character in aufs. -+ -+Aufs supports such case by three level of revalidation which is -+selectable by user. -+1. Simple Revalidate -+ Addition to the native flow in VFS's, confirm the child-parent -+ relationship on the branch just after locking the parent dir on the -+ branch in the "actual operation" (step 4). When this validation -+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still -+ checks the validation of the dentry on branches. -+2. Monitor Changes Internally by Inotify/Fsnotify -+ Addition to above, in the "actual operation" (step 4) aufs re-lookup -+ the dentry on the branch, and returns EBUSY if it finds different -+ dentry. -+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches -+ during it is in cache. When the event is notified, aufs registers a -+ function to kernel 'events' thread by schedule_work(). And the -+ function sets some special status to the cached aufs dentry and inode -+ private data. If they are not cached, then aufs has nothing to -+ do. When the same file is accessed through aufs (step 0-3) later, -+ aufs will detect the status and refresh all necessary data. -+ In this mode, aufs has to ignore the event which is fired by aufs -+ itself. -+3. No Extra Validation -+ This is the simplest test and doesn't add any additional revalidation -+ test, and skip the revalidation in step 4. It is useful and improves -+ aufs performance when system surely hide the aufs branches from user, -+ by over-mounting something (or another method). -diff -ruN a/Documentation/filesystems/aufs/design/04branch.txt b/Documentation/filesystems/aufs/design/04branch.txt ---- a/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/04branch.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,74 @@ -+ -+# Copyright (C) 2005-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Branch Manipulation -+ -+Since aufs supports dynamic branch manipulation, ie. add/remove a branch -+and changing its permission/attribute, there are a lot of works to do. -+ -+ -+Add a Branch -+---------------------------------------------------------------------- -+o Confirm the adding dir exists outside of aufs, including loopback -+ mount, and its various attributes. -+o Initialize the xino file and whiteout bases if necessary. -+ See struct.txt. -+ -+o Check the owner/group/mode of the directory -+ When the owner/group/mode of the adding directory differs from the -+ existing branch, aufs issues a warning because it may impose a -+ security risk. -+ For example, when a upper writable branch has a world writable empty -+ top directory, a malicious user can create any files on the writable -+ branch directly, like copy-up and modify manually. If something like -+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper -+ writable branch, and the writable branch is world-writable, then a -+ malicious guy may create /etc/passwd on the writable branch directly -+ and the infected file will be valid in aufs. -+ I am afraid it can be a security issue, but aufs can do nothing except -+ producing a warning. -+ -+ -+Delete a Branch -+---------------------------------------------------------------------- -+o Confirm the deleting branch is not busy -+ To be general, there is one merit to adopt "remount" interface to -+ manipulate branches. It is to discard caches. At deleting a branch, -+ aufs checks the still cached (and connected) dentries and inodes. If -+ there are any, then they are all in-use. An inode without its -+ corresponding dentry can be alive alone (for example, inotify/fsnotify case). -+ -+ For the cached one, aufs checks whether the same named entry exists on -+ other branches. -+ If the cached one is a directory, because aufs provides a merged view -+ to users, as long as one dir is left on any branch aufs can show the -+ dir to users. In this case, the branch can be removed from aufs. -+ Otherwise aufs rejects deleting the branch. -+ -+ If any file on the deleting branch is opened by aufs, then aufs -+ rejects deleting. -+ -+ -+Modify the Permission of a Branch -+---------------------------------------------------------------------- -+o Re-initialize or remove the xino file and whiteout bases if necessary. -+ See struct.txt. -+ -+o rw --> ro: Confirm the modifying branch is not busy -+ Aufs rejects the request if any of these conditions are true. -+ - a file on the branch is mmap-ed. -+ - a regular file on the branch is opened for write and there is no -+ same named entry on the upper branch. -diff -ruN a/Documentation/filesystems/aufs/design/05wbr_policy.txt b/Documentation/filesystems/aufs/design/05wbr_policy.txt ---- a/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/05wbr_policy.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,64 @@ -+ -+# Copyright (C) 2005-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Policies to Select One among Multiple Writable Branches -+---------------------------------------------------------------------- -+When the number of writable branch is more than one, aufs has to decide -+the target branch for file creation or copy-up. By default, the highest -+writable branch which has the parent (or ancestor) dir of the target -+file is chosen (top-down-parent policy). -+By user's request, aufs implements some other policies to select the -+writable branch, for file creation several policies, round-robin, -+most-free-space, and other policies. For copy-up, top-down-parent, -+bottom-up-parent, bottom-up and others. -+ -+As expected, the round-robin policy selects the branch in circular. When -+you have two writable branches and creates 10 new files, 5 files will be -+created for each branch. mkdir(2) systemcall is an exception. When you -+create 10 new directories, all will be created on the same branch. -+And the most-free-space policy selects the one which has most free -+space among the writable branches. The amount of free space will be -+checked by aufs internally, and users can specify its time interval. -+ -+The policies for copy-up is more simple, -+top-down-parent is equivalent to the same named on in create policy, -+bottom-up-parent selects the writable branch where the parent dir -+exists and the nearest upper one from the copyup-source, -+bottom-up selects the nearest upper writable branch from the -+copyup-source, regardless the existence of the parent dir. -+ -+There are some rules or exceptions to apply these policies. -+- If there is a readonly branch above the policy-selected branch and -+ the parent dir is marked as opaque (a variation of whiteout), or the -+ target (creating) file is whiteout-ed on the upper readonly branch, -+ then the result of the policy is ignored and the target file will be -+ created on the nearest upper writable branch than the readonly branch. -+- If there is a writable branch above the policy-selected branch and -+ the parent dir is marked as opaque or the target file is whiteouted -+ on the branch, then the result of the policy is ignored and the target -+ file will be created on the highest one among the upper writable -+ branches who has diropq or whiteout. In case of whiteout, aufs removes -+ it as usual. -+- link(2) and rename(2) systemcalls are exceptions in every policy. -+ They try selecting the branch where the source exists as possible -+ since copyup a large file will take long time. If it can't be, -+ ie. the branch where the source exists is readonly, then they will -+ follow the copyup policy. -+- There is an exception for rename(2) when the target exists. -+ If the rename target exists, aufs compares the index of the branches -+ where the source and the target exists and selects the higher -+ one. If the selected branch is readonly, then aufs follows the -+ copyup policy. -diff -ruN a/Documentation/filesystems/aufs/design/06dirren.dot b/Documentation/filesystems/aufs/design/06dirren.dot ---- a/Documentation/filesystems/aufs/design/06dirren.dot 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/06dirren.dot 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,31 @@ -+ -+// to view this graph, run dot(1) command in GRAPHVIZ. -+ -+digraph G { -+node [shape=box]; -+whinfo [label="detailed info file\n(lower_brid_root-hinum, h_inum, namelen, old name)"]; -+ -+node [shape=oval]; -+ -+aufs_rename -> whinfo [label="store/remove"]; -+ -+node [shape=oval]; -+inode_list [label="h_inum list in branch\ncache"]; -+ -+node [shape=box]; -+whinode [label="h_inum list file"]; -+ -+node [shape=oval]; -+brmgmt [label="br_add/del/mod/umount"]; -+ -+brmgmt -> inode_list [label="create/remove"]; -+brmgmt -> whinode [label="load/store"]; -+ -+inode_list -> whinode [style=dashed,dir=both]; -+ -+aufs_rename -> inode_list [label="add/del"]; -+ -+aufs_lookup -> inode_list [label="search"]; -+ -+aufs_lookup -> whinfo [label="load/remove"]; -+} -diff -ruN a/Documentation/filesystems/aufs/design/06dirren.txt b/Documentation/filesystems/aufs/design/06dirren.txt ---- a/Documentation/filesystems/aufs/design/06dirren.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/06dirren.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,102 @@ -+ -+# Copyright (C) 2017-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Special handling for renaming a directory (DIRREN) -+---------------------------------------------------------------------- -+First, let's assume we have a simple usecase. -+ -+- /u = /rw + /ro -+- /rw/dirA exists -+- /ro/dirA and /ro/dirA/file exist too -+- there is no dirB on both branches -+- a user issues rename("dirA", "dirB") -+ -+Now, what should aufs behave against this rename(2)? -+There are a few possible cases. -+ -+A. returns EROFS. -+ since dirA exists on a readonly branch which cannot be renamed. -+B. returns EXDEV. -+ it is possible to copy-up dirA (only the dir itself), but the child -+ entries ("file" in this case) should not be. it must be a bad -+ approach to copy-up recursively. -+C. returns a success. -+ even the branch /ro is readonly, aufs tries renaming it. Obviously it -+ is a violation of aufs' policy. -+D. construct an extra information which indicates that /ro/dirA should -+ be handled as the name of dirB. -+ overlayfs has a similar feature called REDIRECT. -+ -+Until now, aufs implements the case B only which returns EXDEV, and -+expects the userspace application behaves like mv(1) which tries -+issueing rename(2) recursively. -+ -+A new aufs feature called DIRREN is introduced which implements the case -+D. There are several "extra information" added. -+ -+1. detailed info per renamed directory -+ path: /rw/dirB/$AUFS_WH_DR_INFO_PFX. -+2. the inode-number list of directories on a branch -+ path: /rw/dirB/$AUFS_WH_DR_BRHINO -+ -+The filename of "detailed info per directory" represents the lower -+branch, and its format is -+- a type of the branch id -+ one of these. -+ + uuid (not implemented yet) -+ + fsid -+ + dev -+- the inode-number of the branch root dir -+ -+And it contains these info in a single regular file. -+- magic number -+- branch's inode-number of the logically renamed dir -+- the name of the before-renamed dir -+ -+The "detailed info per directory" file is created in aufs rename(2), and -+loaded in any lookup. -+The info is considered in lookup for the matching case only. Here -+"matching" means that the root of branch (in the info filename) is same -+to the current looking-up branch. After looking-up the before-renamed -+name, the inode-number is compared. And the matched dentry is used. -+ -+The "inode-number list of directories" is a regular file which contains -+simply the inode-numbers on the branch. The file is created or updated -+in removing the branch, and loaded in adding the branch. Its lifetime is -+equal to the branch. -+The list is refered in lookup, and when the current target inode is -+found in the list, the aufs tries loading the "detailed info per -+directory" and get the changed and valid name of the dir. -+ -+Theoretically these "extra informaiton" may be able to be put into XATTR -+in the dir inode. But aufs doesn't choose this way because -+1. XATTR may not be supported by the branch (or its configuration) -+2. XATTR may have its size limit. -+3. XATTR may be less easy to convert than a regular file, when the -+ format of the info is changed in the future. -+At the same time, I agree that the regular file approach is much slower -+than XATTR approach. So, in the future, aufs may take the XATTR or other -+better approach. -+ -+This DIRREN feature is enabled by aufs configuration, and is activated -+by a new mount option. -+ -+For the more complicated case, there is a work with UDBA option, which -+is to dected the direct access to the branches (by-passing aufs) and to -+maintain the cashes in aufs. Since a single cached aufs dentry may -+contains two names, before- and after-rename, the name comparision in -+UDBA handler may not work correctly. In this case, the behaviour will be -+equivalen to udba=reval case. -diff -ruN a/Documentation/filesystems/aufs/design/06fhsm.txt b/Documentation/filesystems/aufs/design/06fhsm.txt ---- a/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/06fhsm.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,120 @@ -+ -+# Copyright (C) 2011-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, write to the Free Software -+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ -+ -+File-based Hierarchical Storage Management (FHSM) -+---------------------------------------------------------------------- -+Hierarchical Storage Management (or HSM) is a well-known feature in the -+storage world. Aufs provides this feature as file-based with multiple -+writable branches, based upon the principle of "Colder, the Lower". -+Here the word "colder" means that the less used files, and "lower" means -+that the position in the order of the stacked branches vertically. -+These multiple writable branches are prioritized, ie. the topmost one -+should be the fastest drive and be used heavily. -+ -+o Characters in aufs FHSM story -+- aufs itself and a new branch attribute. -+- a new ioctl interface to move-down and to establish a connection with -+ the daemon ("move-down" is a converse of "copy-up"). -+- userspace tool and daemon. -+ -+The userspace daemon establishes a connection with aufs and waits for -+the notification. The notified information is very similar to struct -+statfs containing the number of consumed blocks and inodes. -+When the consumed blocks/inodes of a branch exceeds the user-specified -+upper watermark, the daemon activates its move-down process until the -+consumed blocks/inodes reaches the user-specified lower watermark. -+ -+The actual move-down is done by aufs based upon the request from -+user-space since we need to maintain the inode number and the internal -+pointer arrays in aufs. -+ -+Currently aufs FHSM handles the regular files only. Additionally they -+must not be hard-linked nor pseudo-linked. -+ -+ -+o Cowork of aufs and the user-space daemon -+ During the userspace daemon established the connection, aufs sends a -+ small notification to it whenever aufs writes something into the -+ writable branch. But it may cost high since aufs issues statfs(2) -+ internally. So user can specify a new option to cache the -+ info. Actually the notification is controlled by these factors. -+ + the specified cache time. -+ + classified as "force" by aufs internally. -+ Until the specified time expires, aufs doesn't send the info -+ except the forced cases. When aufs decide forcing, the info is always -+ notified to userspace. -+ For example, the number of free inodes is generally large enough and -+ the shortage of it happens rarely. So aufs doesn't force the -+ notification when creating a new file, directory and others. This is -+ the typical case which aufs doesn't force. -+ When aufs writes the actual filedata and the files consumes any of new -+ blocks, the aufs forces notifying. -+ -+ -+o Interfaces in aufs -+- New branch attribute. -+ + fhsm -+ Specifies that the branch is managed by FHSM feature. In other word, -+ participant in the FHSM. -+ When nofhsm is set to the branch, it will not be the source/target -+ branch of the move-down operation. This attribute is set -+ independently from coo and moo attributes, and if you want full -+ FHSM, you should specify them as well. -+- New mount option. -+ + fhsm_sec -+ Specifies a second to suppress many less important info to be -+ notified. -+- New ioctl. -+ + AUFS_CTL_FHSM_FD -+ create a new file descriptor which userspace can read the notification -+ (a subset of struct statfs) from aufs. -+- Module parameter 'brs' -+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not -+ be set. -+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs -+ When there are two or more branches with fhsm attributes, -+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs -+ terminates it. As a result of remounting and branch-manipulation, the -+ number of branches with fhsm attribute can be one. In this case, -+ /sbin/mount.aufs will terminate the user-space daemon. -+ -+ -+Finally the operation is done as these steps in kernel-space. -+- make sure that, -+ + no one else is using the file. -+ + the file is not hard-linked. -+ + the file is not pseudo-linked. -+ + the file is a regular file. -+ + the parent dir is not opaqued. -+- find the target writable branch. -+- make sure the file is not whiteout-ed by the upper (than the target) -+ branch. -+- make the parent dir on the target branch. -+- mutex lock the inode on the branch. -+- unlink the whiteout on the target branch (if exists). -+- lookup and create the whiteout-ed temporary name on the target branch. -+- copy the file as the whiteout-ed temporary name on the target branch. -+- rename the whiteout-ed temporary name to the original name. -+- unlink the file on the source branch. -+- maintain the internal pointer array and the external inode number -+ table (XINO). -+- maintain the timestamps and other attributes of the parent dir and the -+ file. -+ -+And of course, in every step, an error may happen. So the operation -+should restore the original file state after an error happens. -diff -ruN a/Documentation/filesystems/aufs/design/06mmap.txt b/Documentation/filesystems/aufs/design/06mmap.txt ---- a/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/06mmap.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,72 @@ -+ -+# Copyright (C) 2005-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+mmap(2) -- File Memory Mapping -+---------------------------------------------------------------------- -+In aufs, the file-mapped pages are handled by a branch fs directly, no -+interaction with aufs. It means aufs_mmap() calls the branch fs's -+->mmap(). -+This approach is simple and good, but there is one problem. -+Under /proc, several entries show the mmapped files by its path (with -+device and inode number), and the printed path will be the path on the -+branch fs's instead of virtual aufs's. -+This is not a problem in most cases, but some utilities lsof(1) (and its -+user) may expect the path on aufs. -+ -+To address this issue, aufs adds a new member called vm_prfile in struct -+vm_area_struct (and struct vm_region). The original vm_file points to -+the file on the branch fs in order to handle everything correctly as -+usual. The new vm_prfile points to a virtual file in aufs, and the -+show-functions in procfs refers to vm_prfile if it is set. -+Also we need to maintain several other places where touching vm_file -+such like -+- fork()/clone() copies vma and the reference count of vm_file is -+ incremented. -+- merging vma maintains the ref count too. -+ -+This is not a good approach. It just fakes the printed path. But it -+leaves all behaviour around f_mapping unchanged. This is surely an -+advantage. -+Actually aufs had adopted another complicated approach which calls -+generic_file_mmap() and handles struct vm_operations_struct. In this -+approach, aufs met a hard problem and I could not solve it without -+switching the approach. -+ -+There may be one more another approach which is -+- bind-mount the branch-root onto the aufs-root internally -+- grab the new vfsmount (ie. struct mount) -+- lazy-umount the branch-root internally -+- in open(2) the aufs-file, open the branch-file with the hidden -+ vfsmount (instead of the original branch's vfsmount) -+- ideally this "bind-mount and lazy-umount" should be done atomically, -+ but it may be possible from userspace by the mount helper. -+ -+Adding the internal hidden vfsmount and using it in opening a file, the -+file path under /proc will be printed correctly. This approach looks -+smarter, but is not possible I am afraid. -+- aufs-root may be bind-mount later. when it happens, another hidden -+ vfsmount will be required. -+- it is hard to get the chance to bind-mount and lazy-umount -+ + in kernel-space, FS can have vfsmount in open(2) via -+ file->f_path, and aufs can know its vfsmount. But several locks are -+ already acquired, and if aufs tries to bind-mount and lazy-umount -+ here, then it may cause a deadlock. -+ + in user-space, bind-mount doesn't invoke the mount helper. -+- since /proc shows dev and ino, aufs has to give vma these info. it -+ means a new member vm_prinode will be necessary. this is essentially -+ equivalent to vm_prfile described above. -+ -+I have to give up this "looks-smater" approach. -diff -ruN a/Documentation/filesystems/aufs/design/06xattr.txt b/Documentation/filesystems/aufs/design/06xattr.txt ---- a/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/06xattr.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,96 @@ -+ -+# Copyright (C) 2014-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; if not, write to the Free Software -+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -+ -+ -+Listing XATTR/EA and getting the value -+---------------------------------------------------------------------- -+For the inode standard attributes (owner, group, timestamps, etc.), aufs -+shows the values from the topmost existing file. This behaviour is good -+for the non-dir entries since the bahaviour exactly matches the shown -+information. But for the directories, aufs considers all the same named -+entries on the lower branches. Which means, if one of the lower entry -+rejects readdir call, then aufs returns an error even if the topmost -+entry allows it. This behaviour is necessary to respect the branch fs's -+security, but can make users confused since the user-visible standard -+attributes don't match the behaviour. -+To address this issue, aufs has a mount option called dirperm1 which -+checks the permission for the topmost entry only, and ignores the lower -+entry's permission. -+ -+A similar issue can happen around XATTR. -+getxattr(2) and listxattr(2) families behave as if dirperm1 option is -+always set. Otherwise these very unpleasant situation would happen. -+- listxattr(2) may return the duplicated entries. -+- users may not be able to remove or reset the XATTR forever, -+ -+ -+XATTR/EA support in the internal (copy,move)-(up,down) -+---------------------------------------------------------------------- -+Generally the extended attributes of inode are categorized as these. -+- "security" for LSM and capability. -+- "system" for posix ACL, 'acl' mount option is required for the branch -+ fs generally. -+- "trusted" for userspace, CAP_SYS_ADMIN is required. -+- "user" for userspace, 'user_xattr' mount option is required for the -+ branch fs generally. -+ -+Moreover there are some other categories. Aufs handles these rather -+unpopular categories as the ordinary ones, ie. there is no special -+condition nor exception. -+ -+In copy-up, the support for XATTR on the dst branch may differ from the -+src branch. In this case, the copy-up operation will get an error and -+the original user operation which triggered the copy-up will fail. It -+can happen that even all copy-up will fail. -+When both of src and dst branches support XATTR and if an error occurs -+during copying XATTR, then the copy-up should fail obviously. That is a -+good reason and aufs should return an error to userspace. But when only -+the src branch support that XATTR, aufs should not return an error. -+For example, the src branch supports ACL but the dst branch doesn't -+because the dst branch may natively un-support it or temporary -+un-support it due to "noacl" mount option. Of course, the dst branch fs -+may NOT return an error even if the XATTR is not supported. It is -+totally up to the branch fs. -+ -+Anyway when the aufs internal copy-up gets an error from the dst branch -+fs, then aufs tries removing the just copied entry and returns the error -+to the userspace. The worst case of this situation will be all copy-up -+will fail. -+ -+For the copy-up operation, there two basic approaches. -+- copy the specified XATTR only (by category above), and return the -+ error unconditionally if it happens. -+- copy all XATTR, and ignore the error on the specified category only. -+ -+In order to support XATTR and to implement the correct behaviour, aufs -+chooses the latter approach and introduces some new branch attributes, -+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth". -+They correspond to the XATTR namespaces (see above). Additionally, to be -+convenient, "icex" is also provided which means all "icex*" attributes -+are set (here the word "icex" stands for "ignore copy-error on XATTR"). -+ -+The meaning of these attributes is to ignore the error from setting -+XATTR on that branch. -+Note that aufs tries copying all XATTR unconditionally, and ignores the -+error from the dst branch according to the specified attributes. -+ -+Some XATTR may have its default value. The default value may come from -+the parent dir or the environment. If the default value is set at the -+file creating-time, it will be overwritten by copy-up. -+Some contradiction may happen I am afraid. -+Do we need another attribute to stop copying XATTR? I am unsure. For -+now, aufs implements the branch attributes to ignore the error. -diff -ruN a/Documentation/filesystems/aufs/design/07export.txt b/Documentation/filesystems/aufs/design/07export.txt ---- a/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/07export.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,58 @@ -+ -+# Copyright (C) 2005-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Export Aufs via NFS -+---------------------------------------------------------------------- -+Here is an approach. -+- like xino/xib, add a new file 'xigen' which stores aufs inode -+ generation. -+- iget_locked(): initialize aufs inode generation for a new inode, and -+ store it in xigen file. -+- destroy_inode(): increment aufs inode generation and store it in xigen -+ file. it is necessary even if it is not unlinked, because any data of -+ inode may be changed by UDBA. -+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise -+ build file handle by -+ + branch id (4 bytes) -+ + superblock generation (4 bytes) -+ + inode number (4 or 8 bytes) -+ + parent dir inode number (4 or 8 bytes) -+ + inode generation (4 bytes)) -+ + return value of exportfs_encode_fh() for the parent on a branch (4 -+ bytes) -+ + file handle for a branch (by exportfs_encode_fh()) -+- fh_to_dentry(): -+ + find the index of a branch from its id in handle, and check it is -+ still exist in aufs. -+ + 1st level: get the inode number from handle and search it in cache. -+ + 2nd level: if not found in cache, get the parent inode number from -+ the handle and search it in cache. and then open the found parent -+ dir, find the matching inode number by vfs_readdir() and get its -+ name, and call lookup_one_len() for the target dentry. -+ + 3rd level: if the parent dir is not cached, call -+ exportfs_decode_fh() for a branch and get the parent on a branch, -+ build a pathname of it, convert it a pathname in aufs, call -+ path_lookup(). now aufs gets a parent dir dentry, then handle it as -+ the 2nd level. -+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount -+ for every branch, but not itself. to get this, (currently) aufs -+ searches in current->nsproxy->mnt_ns list. it may not be a good -+ idea, but I didn't get other approach. -+ + test the generation of the gotten inode. -+- every inode operation: they may get EBUSY due to UDBA. in this case, -+ convert it into ESTALE for NFSD. -+- readdir(): call lockdep_on/off() because filldir in NFSD calls -+ lookup_one_len(), vfs_getattr(), encode_fh() and others. -diff -ruN a/Documentation/filesystems/aufs/design/08shwh.txt b/Documentation/filesystems/aufs/design/08shwh.txt ---- a/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/08shwh.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,52 @@ -+ -+# Copyright (C) 2005-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Show Whiteout Mode (shwh) -+---------------------------------------------------------------------- -+Generally aufs hides the name of whiteouts. But in some cases, to show -+them is very useful for users. For instance, creating a new middle layer -+(branch) by merging existing layers. -+ -+(borrowing aufs1 HOW-TO from a user, Michael Towers) -+When you have three branches, -+- Bottom: 'system', squashfs (underlying base system), read-only -+- Middle: 'mods', squashfs, read-only -+- Top: 'overlay', ram (tmpfs), read-write -+ -+The top layer is loaded at boot time and saved at shutdown, to preserve -+the changes made to the system during the session. -+When larger changes have been made, or smaller changes have accumulated, -+the size of the saved top layer data grows. At this point, it would be -+nice to be able to merge the two overlay branches ('mods' and 'overlay') -+and rewrite the 'mods' squashfs, clearing the top layer and thus -+restoring save and load speed. -+ -+This merging is simplified by the use of another aufs mount, of just the -+two overlay branches using the 'shwh' option. -+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \ -+ aufs /livesys/merge_union -+ -+A merged view of these two branches is then available at -+/livesys/merge_union, and the new feature is that the whiteouts are -+visible! -+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable -+writing to all branches. Also the default mode for all branches is 'ro'. -+It is now possible to save the combined contents of the two overlay -+branches to a new squashfs, e.g.: -+# mksquashfs /livesys/merge_union /path/to/newmods.squash -+ -+This new squashfs archive can be stored on the boot device and the -+initramfs will use it to replace the old one at the next boot. -diff -ruN a/Documentation/filesystems/aufs/design/10dynop.txt b/Documentation/filesystems/aufs/design/10dynop.txt ---- a/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/design/10dynop.txt 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,47 @@ -+ -+# Copyright (C) 2010-2021 Junjiro R. Okajima -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+ -+Dynamically customizable FS operations -+---------------------------------------------------------------------- -+Generally FS operations (struct inode_operations, struct -+address_space_operations, struct file_operations, etc.) are defined as -+"static const", but it never means that FS have only one set of -+operation. Some FS have multiple sets of them. For instance, ext2 has -+three sets, one for XIP, for NOBH, and for normal. -+Since aufs overrides and redirects these operations, sometimes aufs has -+to change its behaviour according to the branch FS type. More importantly -+VFS acts differently if a function (member in the struct) is set or -+not. It means aufs should have several sets of operations and select one -+among them according to the branch FS definition. -+ -+In order to solve this problem and not to affect the behaviour of VFS, -+aufs defines these operations dynamically. For instance, aufs defines -+dummy direct_IO function for struct address_space_operations, but it may -+not be set to the address_space_operations actually. When the branch FS -+doesn't have it, aufs doesn't set it to its address_space_operations -+while the function definition itself is still alive. So the behaviour -+itself will not change, and it will return an error when direct_IO is -+not set. -+ -+The lifetime of these dynamically generated operation object is -+maintained by aufs branch object. When the branch is removed from aufs, -+the reference counter of the object is decremented. When it reaches -+zero, the dynamically generated operation object will be freed. -+ -+This approach is designed to support AIO (io_submit), Direct I/O and -+XIP (DAX) mainly. -+Currently this approach is applied to address_space_operations for -+regular files only. -diff -ruN a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README ---- a/Documentation/filesystems/aufs/README 1970-01-01 03:00:00.000000000 +0300 -+++ b/Documentation/filesystems/aufs/README 2022-03-09 15:19:00.000000000 +0300 +diff --git a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README +new file mode 100644 +index 000000000000..9c05340b9dda +--- /dev/null ++++ b/Documentation/filesystems/aufs/README @@ -0,0 +1,396 @@ + +Aufs5 -- advanced multi layered unification filesystem version 5.x @@ -1891,10 +498,1462 @@ diff -ruN a/Documentation/filesystems/aufs/README b/Documentation/filesystems/au +# Local variables: ; +# mode: text; +# End: ; -diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c ---- a/drivers/block/loop.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/drivers/block/loop.c 2022-03-30 02:32:42.071758101 +0300 -@@ -685,6 +685,15 @@ +diff --git a/Documentation/filesystems/aufs/design/01intro.txt b/Documentation/filesystems/aufs/design/01intro.txt +new file mode 100644 +index 000000000000..ca1e93a99915 +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/01intro.txt +@@ -0,0 +1,171 @@ ++ ++# Copyright (C) 2005-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Introduction ++---------------------------------------- ++ ++aufs [ei ju: ef es] | /ey-yoo-ef-es/ | [a u f s] ++1. abbrev. for "advanced multi-layered unification filesystem". ++2. abbrev. for "another unionfs". ++3. abbrev. for "auf das" in German which means "on the" in English. ++ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E). ++ But "Filesystem aufs Filesystem" is hard to understand. ++4. abbrev. for "African Urban Fashion Show". ++ ++AUFS is a filesystem with features: ++- multi layered stackable unification filesystem, the member directory ++ is called as a branch. ++- branch permission and attribute, 'readonly', 'real-readonly', ++ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their ++ combination. ++- internal "file copy-on-write". ++- logical deletion, whiteout. ++- dynamic branch manipulation, adding, deleting and changing permission. ++- allow bypassing aufs, user's direct branch access. ++- external inode number translation table and bitmap which maintains the ++ persistent aufs inode number. ++- seekable directory, including NFS readdir. ++- file mapping, mmap and sharing pages. ++- pseudo-link, hardlink over branches. ++- loopback mounted filesystem as a branch. ++- several policies to select one among multiple writable branches. ++- revert a single systemcall when an error occurs in aufs. ++- and more... ++ ++ ++Multi Layered Stackable Unification Filesystem ++---------------------------------------------------------------------- ++Most people already knows what it is. ++It is a filesystem which unifies several directories and provides a ++merged single directory. When users access a file, the access will be ++passed/re-directed/converted (sorry, I am not sure which English word is ++correct) to the real file on the member filesystem. The member ++filesystem is called 'lower filesystem' or 'branch' and has a mode ++'readonly' and 'readwrite.' And the deletion for a file on the lower ++readonly branch is handled by creating 'whiteout' on the upper writable ++branch. ++ ++On LKML, there have been discussions about UnionMount (Jan Blunck, ++Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took ++different approaches to implement the merged-view. ++The former tries putting it into VFS, and the latter implements as a ++separate filesystem. ++(If I misunderstand about these implementations, please let me know and ++I shall correct it. Because it is a long time ago when I read their ++source files last time). ++ ++UnionMount's approach will be able to small, but may be hard to share ++branches between several UnionMount since the whiteout in it is ++implemented in the inode on branch filesystem and always ++shared. According to Bharata's post, readdir does not seems to be ++finished yet. ++There are several missing features known in this implementations such as ++- for users, the inode number may change silently. eg. copy-up. ++- link(2) may break by copy-up. ++- read(2) may get an obsoleted filedata (fstat(2) too). ++- fcntl(F_SETLK) may be broken by copy-up. ++- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after ++ open(O_RDWR). ++ ++In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was ++merged into mainline. This is another implementation of UnionMount as a ++separated filesystem. All the limitations and known problems which ++UnionMount are equally inherited to "overlay" filesystem. ++ ++Unionfs has a longer history. When I started implementing a stackable ++filesystem (Aug 2005), it already existed. It has virtual super_block, ++inode, dentry and file objects and they have an array pointing lower ++same kind objects. After contributing many patches for Unionfs, I ++re-started my project AUFS (Jun 2006). ++ ++In AUFS, the structure of filesystem resembles to Unionfs, but I ++implemented my own ideas, approaches and enhancements and it became ++totally different one. ++ ++Comparing DM snapshot and fs based implementation ++- the number of bytes to be copied between devices is much smaller. ++- the type of filesystem must be one and only. ++- the fs must be writable, no readonly fs, even for the lower original ++ device. so the compression fs will not be usable. but if we use ++ loopback mount, we may address this issue. ++ for instance, ++ mount /cdrom/squashfs.img /sq ++ losetup /sq/ext2.img ++ losetup /somewhere/cow ++ dmsetup "snapshot /dev/loop0 /dev/loop1 ..." ++- it will be difficult (or needs more operations) to extract the ++ difference between the original device and COW. ++- DM snapshot-merge may help a lot when users try merging. in the ++ fs-layer union, users will use rsync(1). ++ ++You may want to read my old paper "Filesystems in LiveCD" ++(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf). ++ ++ ++Several characters/aspects/persona of aufs ++---------------------------------------------------------------------- ++ ++Aufs has several characters, aspects or persona. ++1. a filesystem, callee of VFS helper ++2. sub-VFS, caller of VFS helper for branches ++3. a virtual filesystem which maintains persistent inode number ++4. reader/writer of files on branches such like an application ++ ++1. Callee of VFS Helper ++As an ordinary linux filesystem, aufs is a callee of VFS. For instance, ++unlink(2) from an application reaches sys_unlink() kernel function and ++then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it ++calls filesystem specific unlink operation. Actually aufs implements the ++unlink operation but it behaves like a redirector. ++ ++2. Caller of VFS Helper for Branches ++aufs_unlink() passes the unlink request to the branch filesystem as if ++it were called from VFS. So the called unlink operation of the branch ++filesystem acts as usual. As a caller of VFS helper, aufs should handle ++every necessary pre/post operation for the branch filesystem. ++- acquire the lock for the parent dir on a branch ++- lookup in a branch ++- revalidate dentry on a branch ++- mnt_want_write() for a branch ++- vfs_unlink() for a branch ++- mnt_drop_write() for a branch ++- release the lock on a branch ++ ++3. Persistent Inode Number ++One of the most important issue for a filesystem is to maintain inode ++numbers. This is particularly important to support exporting a ++filesystem via NFS. Aufs is a virtual filesystem which doesn't have a ++backend block device for its own. But some storage is necessary to ++keep and maintain the inode numbers. It may be a large space and may not ++suit to keep in memory. Aufs rents some space from its first writable ++branch filesystem (by default) and creates file(s) on it. These files ++are created by aufs internally and removed soon (currently) keeping ++opened. ++Note: Because these files are removed, they are totally gone after ++ unmounting aufs. It means the inode numbers are not persistent ++ across unmount or reboot. I have a plan to make them really ++ persistent which will be important for aufs on NFS server. ++ ++4. Read/Write Files Internally (copy-on-write) ++Because a branch can be readonly, when you write a file on it, aufs will ++"copy-up" it to the upper writable branch internally. And then write the ++originally requested thing to the file. Generally kernel doesn't ++open/read/write file actively. In aufs, even a single write may cause a ++internal "file copy". This behaviour is very similar to cp(1) command. ++ ++Some people may think it is better to pass such work to user space ++helper, instead of doing in kernel space. Actually I am still thinking ++about it. But currently I have implemented it in kernel space. +diff --git a/Documentation/filesystems/aufs/design/02struct.txt b/Documentation/filesystems/aufs/design/02struct.txt +new file mode 100644 +index 000000000000..3599289bf320 +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/02struct.txt +@@ -0,0 +1,258 @@ ++ ++# Copyright (C) 2005-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Basic Aufs Internal Structure ++ ++Superblock/Inode/Dentry/File Objects ++---------------------------------------------------------------------- ++As like an ordinary filesystem, aufs has its own ++superblock/inode/dentry/file objects. All these objects have a ++dynamically allocated array and store the same kind of pointers to the ++lower filesystem, branch. ++For example, when you build a union with one readwrite branch and one ++readonly, mounted /au, /rw and /ro respectively. ++- /au = /rw + /ro ++- /ro/fileA exists but /rw/fileA ++ ++Aufs lookup operation finds /ro/fileA and gets dentry for that. These ++pointers are stored in a aufs dentry. The array in aufs dentry will be, ++- [0] = NULL (because /rw/fileA doesn't exist) ++- [1] = /ro/fileA ++ ++This style of an array is essentially same to the aufs ++superblock/inode/dentry/file objects. ++ ++Because aufs supports manipulating branches, ie. add/delete/change ++branches dynamically, these objects has its own generation. When ++branches are changed, the generation in aufs superblock is ++incremented. And a generation in other object are compared when it is ++accessed. When a generation in other objects are obsoleted, aufs ++refreshes the internal array. ++ ++ ++Superblock ++---------------------------------------------------------------------- ++Additionally aufs superblock has some data for policies to select one ++among multiple writable branches, XIB files, pseudo-links and kobject. ++See below in detail. ++About the policies which supports copy-down a directory, see ++wbr_policy.txt too. ++ ++ ++Branch and XINO(External Inode Number Translation Table) ++---------------------------------------------------------------------- ++Every branch has its own xino (external inode number translation table) ++file. The xino file is created and unlinked by aufs internally. When two ++members of a union exist on the same filesystem, they share the single ++xino file. ++The struct of a xino file is simple, just a sequence of aufs inode ++numbers which is indexed by the lower inode number. ++In the above sample, assume the inode number of /ro/fileA is i111 and ++aufs assigns the inode number i999 for fileA. Then aufs writes 999 as ++4(8) bytes at 111 * 4(8) bytes offset in the xino file. ++ ++When the inode numbers are not contiguous, the xino file will be sparse ++which has a hole in it and doesn't consume as much disk space as it ++might appear. If your branch filesystem consumes disk space for such ++holes, then you should specify 'xino=' option at mounting aufs. ++ ++Aufs has a mount option to free the disk blocks for such holes in XINO ++files on tmpfs or ramdisk. But it is not so effective actually. If you ++meet a problem of disk shortage due to XINO files, then you should try ++"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git. ++The patch localizes the assignment inumbers per tmpfs-mount and avoid ++the holes in XINO files. ++ ++Also a writable branch has three kinds of "whiteout bases". All these ++are existed when the branch is joined to aufs, and their names are ++whiteout-ed doubly, so that users will never see their names in aufs ++hierarchy. ++1. a regular file which will be hardlinked to all whiteouts. ++2. a directory to store a pseudo-link. ++3. a directory to store an "orphan"-ed file temporary. ++ ++1. Whiteout Base ++ When you remove a file on a readonly branch, aufs handles it as a ++ logical deletion and creates a whiteout on the upper writable branch ++ as a hardlink of this file in order not to consume inode on the ++ writable branch. ++2. Pseudo-link Dir ++ See below, Pseudo-link. ++3. Step-Parent Dir ++ When "fileC" exists on the lower readonly branch only and it is ++ opened and removed with its parent dir, and then user writes ++ something into it, then aufs copies-up fileC to this ++ directory. Because there is no other dir to store fileC. After ++ creating a file under this dir, the file is unlinked. ++ ++Because aufs supports manipulating branches, ie. add/delete/change ++dynamically, a branch has its own id. When the branch order changes, ++aufs finds the new index by searching the branch id. ++ ++ ++Pseudo-link ++---------------------------------------------------------------------- ++Assume "fileA" exists on the lower readonly branch only and it is ++hardlinked to "fileB" on the branch. When you write something to fileA, ++aufs copies-up it to the upper writable branch. Additionally aufs ++creates a hardlink under the Pseudo-link Directory of the writable ++branch. The inode of a pseudo-link is kept in aufs super_block as a ++simple list. If fileB is read after unlinking fileA, aufs returns ++filedata from the pseudo-link instead of the lower readonly ++branch. Because the pseudo-link is based upon the inode, to keep the ++inode number by xino (see above) is essentially necessary. ++ ++All the hardlinks under the Pseudo-link Directory of the writable branch ++should be restored in a proper location later. Aufs provides a utility ++to do this. The userspace helpers executed at remounting and unmounting ++aufs by default. ++During this utility is running, it puts aufs into the pseudo-link ++maintenance mode. In this mode, only the process which began the ++maintenance mode (and its child processes) is allowed to operate in ++aufs. Some other processes which are not related to the pseudo-link will ++be allowed to run too, but the rest have to return an error or wait ++until the maintenance mode ends. If a process already acquires an inode ++mutex (in VFS), it has to return an error. ++ ++ ++XIB(external inode number bitmap) ++---------------------------------------------------------------------- ++Addition to the xino file per a branch, aufs has an external inode number ++bitmap in a superblock object. It is also an internal file such like a ++xino file. ++It is a simple bitmap to mark whether the aufs inode number is in-use or ++not. ++To reduce the file I/O, aufs prepares a single memory page to cache xib. ++ ++As well as XINO files, aufs has a feature to truncate/refresh XIB to ++reduce the number of consumed disk blocks for these files. ++ ++ ++Virtual or Vertical Dir, and Readdir in Userspace ++---------------------------------------------------------------------- ++In order to support multiple layers (branches), aufs readdir operation ++constructs a virtual dir block on memory. For readdir, aufs calls ++vfs_readdir() internally for each dir on branches, merges their entries ++with eliminating the whiteout-ed ones, and sets it to file (dir) ++object. So the file object has its entry list until it is closed. The ++entry list will be updated when the file position is zero and becomes ++obsoleted. This decision is made in aufs automatically. ++ ++The dynamically allocated memory block for the name of entries has a ++unit of 512 bytes (by default) and stores the names contiguously (no ++padding). Another block for each entry is handled by kmem_cache too. ++During building dir blocks, aufs creates hash list and judging whether ++the entry is whiteouted by its upper branch or already listed. ++The merged result is cached in the corresponding inode object and ++maintained by a customizable life-time option. ++ ++Some people may call it can be a security hole or invite DoS attack ++since the opened and once readdir-ed dir (file object) holds its entry ++list and becomes a pressure for system memory. But I'd say it is similar ++to files under /proc or /sys. The virtual files in them also holds a ++memory page (generally) while they are opened. When an idea to reduce ++memory for them is introduced, it will be applied to aufs too. ++For those who really hate this situation, I've developed readdir(3) ++library which operates this merging in userspace. You just need to set ++LD_PRELOAD environment variable, and aufs will not consume no memory in ++kernel space for readdir(3). ++ ++ ++Workqueue ++---------------------------------------------------------------------- ++Aufs sometimes requires privilege access to a branch. For instance, ++in copy-up/down operation. When a user process is going to make changes ++to a file which exists in the lower readonly branch only, and the mode ++of one of ancestor directories may not be writable by a user ++process. Here aufs copy-up the file with its ancestors and they may ++require privilege to set its owner/group/mode/etc. ++This is a typical case of a application character of aufs (see ++Introduction). ++ ++Aufs uses workqueue synchronously for this case. It creates its own ++workqueue. The workqueue is a kernel thread and has privilege. Aufs ++passes the request to call mkdir or write (for example), and wait for ++its completion. This approach solves a problem of a signal handler ++simply. ++If aufs didn't adopt the workqueue and changed the privilege of the ++process, then the process may receive the unexpected SIGXFSZ or other ++signals. ++ ++Also aufs uses the system global workqueue ("events" kernel thread) too ++for asynchronous tasks, such like handling inotify/fsnotify, re-creating a ++whiteout base and etc. This is unrelated to a privilege. ++Most of aufs operation tries acquiring a rw_semaphore for aufs ++superblock at the beginning, at the same time waits for the completion ++of all queued asynchronous tasks. ++ ++ ++Whiteout ++---------------------------------------------------------------------- ++The whiteout in aufs is very similar to Unionfs's. That is represented ++by its filename. UnionMount takes an approach of a file mode, but I am ++afraid several utilities (find(1) or something) will have to support it. ++ ++Basically the whiteout represents "logical deletion" which stops aufs to ++lookup further, but also it represents "dir is opaque" which also stop ++further lookup. ++ ++In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively. ++In order to make several functions in a single systemcall to be ++revertible, aufs adopts an approach to rename a directory to a temporary ++unique whiteouted name. ++For example, in rename(2) dir where the target dir already existed, aufs ++renames the target dir to a temporary unique whiteouted name before the ++actual rename on a branch, and then handles other actions (make it opaque, ++update the attributes, etc). If an error happens in these actions, aufs ++simply renames the whiteouted name back and returns an error. If all are ++succeeded, aufs registers a function to remove the whiteouted unique ++temporary name completely and asynchronously to the system global ++workqueue. ++ ++ ++Copy-up ++---------------------------------------------------------------------- ++It is a well-known feature or concept. ++When user modifies a file on a readonly branch, aufs operate "copy-up" ++internally and makes change to the new file on the upper writable branch. ++When the trigger systemcall does not update the timestamps of the parent ++dir, aufs reverts it after copy-up. ++ ++ ++Move-down (aufs3.9 and later) ++---------------------------------------------------------------------- ++"Copy-up" is one of the essential feature in aufs. It copies a file from ++the lower readonly branch to the upper writable branch when a user ++changes something about the file. ++"Move-down" is an opposite action of copy-up. Basically this action is ++ran manually instead of automatically and internally. ++For desgin and implementation, aufs has to consider these issues. ++- whiteout for the file may exist on the lower branch. ++- ancestor directories may not exist on the lower branch. ++- diropq for the ancestor directories may exist on the upper branch. ++- free space on the lower branch will reduce. ++- another access to the file may happen during moving-down, including ++ UDBA (see "Revalidate Dentry and UDBA"). ++- the file should not be hard-linked nor pseudo-linked. they should be ++ handled by auplink utility later. ++ ++Sometimes users want to move-down a file from the upper writable branch ++to the lower readonly or writable branch. For instance, ++- the free space of the upper writable branch is going to run out. ++- create a new intermediate branch between the upper and lower branch. ++- etc. ++ ++For this purpose, use "aumvdown" command in aufs-util.git. +diff --git a/Documentation/filesystems/aufs/design/03atomic_open.txt b/Documentation/filesystems/aufs/design/03atomic_open.txt +new file mode 100644 +index 000000000000..2050faf901f5 +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/03atomic_open.txt +@@ -0,0 +1,85 @@ ++ ++# Copyright (C) 2015-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Support for a branch who has its ->atomic_open() ++---------------------------------------------------------------------- ++The filesystems who implement its ->atomic_open() are not majority. For ++example NFSv4 does, and aufs should call NFSv4 ->atomic_open, ++particularly for open(O_CREAT|O_EXCL, 0400) case. Other than ++->atomic_open(), NFSv4 returns an error for this open(2). While I am not ++sure whether all filesystems who have ->atomic_open() behave like this, ++but NFSv4 surely returns the error. ++ ++In order to support ->atomic_open() for aufs, there are a few ++approaches. ++ ++A. Introduce aufs_atomic_open() ++ - calls one of VFS:do_last(), lookup_open() or atomic_open() for ++ branch fs. ++B. Introduce aufs_atomic_open() calling create, open and chmod. this is ++ an aufs user Pip Cet's approach ++ - calls aufs_create(), VFS finish_open() and notify_change(). ++ - pass fake-mode to finish_open(), and then correct the mode by ++ notify_change(). ++C. Extend aufs_open() to call branch fs's ->atomic_open() ++ - no aufs_atomic_open(). ++ - aufs_lookup() registers the TID to an aufs internal object. ++ - aufs_create() does nothing when the matching TID is registered, but ++ registers the mode. ++ - aufs_open() calls branch fs's ->atomic_open() when the matching ++ TID is registered. ++D. Extend aufs_open() to re-try branch fs's ->open() with superuser's ++ credential ++ - no aufs_atomic_open(). ++ - aufs_create() registers the TID to an internal object. this info ++ represents "this process created this file just now." ++ - when aufs gets EACCES from branch fs's ->open(), then confirm the ++ registered TID and re-try open() with superuser's credential. ++ ++Pros and cons for each approach. ++ ++A. ++ - straightforward but highly depends upon VFS internal. ++ - the atomic behavaiour is kept. ++ - some of parameters such as nameidata are hard to reproduce for ++ branch fs. ++ - large overhead. ++B. ++ - easy to implement. ++ - the atomic behavaiour is lost. ++C. ++ - the atomic behavaiour is kept. ++ - dirty and tricky. ++ - VFS checks whether the file is created correctly after calling ++ ->create(), which means this approach doesn't work. ++D. ++ - easy to implement. ++ - the atomic behavaiour is lost. ++ - to open a file with superuser's credential and give it to a user ++ process is a bad idea, since the file object keeps the credential ++ in it. It may affect LSM or something. This approach doesn't work ++ either. ++ ++The approach A is ideal, but it hard to implement. So here is a ++variation of A, which is to be implemented. ++ ++A-1. Introduce aufs_atomic_open() ++ - calls branch fs ->atomic_open() if exists. otherwise calls ++ vfs_create() and finish_open(). ++ - the demerit is that the several checks after branch fs ++ ->atomic_open() are lost. in the ordinary case, the checks are ++ done by VFS:do_last(), lookup_open() and atomic_open(). some can ++ be implemented in aufs, but not all I am afraid. +diff --git a/Documentation/filesystems/aufs/design/03lookup.txt b/Documentation/filesystems/aufs/design/03lookup.txt +new file mode 100644 +index 000000000000..bcfa51170ebd +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/03lookup.txt +@@ -0,0 +1,113 @@ ++ ++# Copyright (C) 2005-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Lookup in a Branch ++---------------------------------------------------------------------- ++Since aufs has a character of sub-VFS (see Introduction), it operates ++lookup for branches as VFS does. It may be a heavy work. But almost all ++lookup operation in aufs is the simplest case, ie. lookup only an entry ++directly connected to its parent. Digging down the directory hierarchy ++is unnecessary. VFS has a function lookup_one_len() for that use, and ++aufs calls it. ++ ++When a branch is a remote filesystem, aufs basically relies upon its ++->d_revalidate(), also aufs forces the hardest revalidate tests for ++them. ++For d_revalidate, aufs implements three levels of revalidate tests. See ++"Revalidate Dentry and UDBA" in detail. ++ ++ ++Test Only the Highest One for the Directory Permission (dirperm1 option) ++---------------------------------------------------------------------- ++Let's try case study. ++- aufs has two branches, upper readwrite and lower readonly. ++ /au = /rw + /ro ++- "dirA" exists under /ro, but /rw. and its mode is 0700. ++- user invoked "chmod a+rx /au/dirA" ++- the internal copy-up is activated and "/rw/dirA" is created and its ++ permission bits are set to world readable. ++- then "/au/dirA" becomes world readable? ++ ++In this case, /ro/dirA is still 0700 since it exists in readonly branch, ++or it may be a natively readonly filesystem. If aufs respects the lower ++branch, it should not respond readdir request from other users. But user ++allowed it by chmod. Should really aufs rejects showing the entries ++under /ro/dirA? ++ ++To be honest, I don't have a good solution for this case. So aufs ++implements 'dirperm1' and 'nodirperm1' mount options, and leave it to ++users. ++When dirperm1 is specified, aufs checks only the highest one for the ++directory permission, and shows the entries. Otherwise, as usual, checks ++every dir existing on all branches and rejects the request. ++ ++As a side effect, dirperm1 option improves the performance of aufs ++because the number of permission check is reduced when the number of ++branch is many. ++ ++ ++Revalidate Dentry and UDBA (User's Direct Branch Access) ++---------------------------------------------------------------------- ++Generally VFS helpers re-validate a dentry as a part of lookup. ++0. digging down the directory hierarchy. ++1. lock the parent dir by its i_mutex. ++2. lookup the final (child) entry. ++3. revalidate it. ++4. call the actual operation (create, unlink, etc.) ++5. unlock the parent dir ++ ++If the filesystem implements its ->d_revalidate() (step 3), then it is ++called. Actually aufs implements it and checks the dentry on a branch is ++still valid. ++But it is not enough. Because aufs has to release the lock for the ++parent dir on a branch at the end of ->lookup() (step 2) and ++->d_revalidate() (step 3) while the i_mutex of the aufs dir is still ++held by VFS. ++If the file on a branch is changed directly, eg. bypassing aufs, after ++aufs released the lock, then the subsequent operation may cause ++something unpleasant result. ++ ++This situation is a result of VFS architecture, ->lookup() and ++->d_revalidate() is separated. But I never say it is wrong. It is a good ++design from VFS's point of view. It is just not suitable for sub-VFS ++character in aufs. ++ ++Aufs supports such case by three level of revalidation which is ++selectable by user. ++1. Simple Revalidate ++ Addition to the native flow in VFS's, confirm the child-parent ++ relationship on the branch just after locking the parent dir on the ++ branch in the "actual operation" (step 4). When this validation ++ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still ++ checks the validation of the dentry on branches. ++2. Monitor Changes Internally by Inotify/Fsnotify ++ Addition to above, in the "actual operation" (step 4) aufs re-lookup ++ the dentry on the branch, and returns EBUSY if it finds different ++ dentry. ++ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches ++ during it is in cache. When the event is notified, aufs registers a ++ function to kernel 'events' thread by schedule_work(). And the ++ function sets some special status to the cached aufs dentry and inode ++ private data. If they are not cached, then aufs has nothing to ++ do. When the same file is accessed through aufs (step 0-3) later, ++ aufs will detect the status and refresh all necessary data. ++ In this mode, aufs has to ignore the event which is fired by aufs ++ itself. ++3. No Extra Validation ++ This is the simplest test and doesn't add any additional revalidation ++ test, and skip the revalidation in step 4. It is useful and improves ++ aufs performance when system surely hide the aufs branches from user, ++ by over-mounting something (or another method). +diff --git a/Documentation/filesystems/aufs/design/04branch.txt b/Documentation/filesystems/aufs/design/04branch.txt +new file mode 100644 +index 000000000000..3bcef8eff62b +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/04branch.txt +@@ -0,0 +1,74 @@ ++ ++# Copyright (C) 2005-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Branch Manipulation ++ ++Since aufs supports dynamic branch manipulation, ie. add/remove a branch ++and changing its permission/attribute, there are a lot of works to do. ++ ++ ++Add a Branch ++---------------------------------------------------------------------- ++o Confirm the adding dir exists outside of aufs, including loopback ++ mount, and its various attributes. ++o Initialize the xino file and whiteout bases if necessary. ++ See struct.txt. ++ ++o Check the owner/group/mode of the directory ++ When the owner/group/mode of the adding directory differs from the ++ existing branch, aufs issues a warning because it may impose a ++ security risk. ++ For example, when a upper writable branch has a world writable empty ++ top directory, a malicious user can create any files on the writable ++ branch directly, like copy-up and modify manually. If something like ++ /etc/{passwd,shadow} exists on the lower readonly branch but the upper ++ writable branch, and the writable branch is world-writable, then a ++ malicious guy may create /etc/passwd on the writable branch directly ++ and the infected file will be valid in aufs. ++ I am afraid it can be a security issue, but aufs can do nothing except ++ producing a warning. ++ ++ ++Delete a Branch ++---------------------------------------------------------------------- ++o Confirm the deleting branch is not busy ++ To be general, there is one merit to adopt "remount" interface to ++ manipulate branches. It is to discard caches. At deleting a branch, ++ aufs checks the still cached (and connected) dentries and inodes. If ++ there are any, then they are all in-use. An inode without its ++ corresponding dentry can be alive alone (for example, inotify/fsnotify case). ++ ++ For the cached one, aufs checks whether the same named entry exists on ++ other branches. ++ If the cached one is a directory, because aufs provides a merged view ++ to users, as long as one dir is left on any branch aufs can show the ++ dir to users. In this case, the branch can be removed from aufs. ++ Otherwise aufs rejects deleting the branch. ++ ++ If any file on the deleting branch is opened by aufs, then aufs ++ rejects deleting. ++ ++ ++Modify the Permission of a Branch ++---------------------------------------------------------------------- ++o Re-initialize or remove the xino file and whiteout bases if necessary. ++ See struct.txt. ++ ++o rw --> ro: Confirm the modifying branch is not busy ++ Aufs rejects the request if any of these conditions are true. ++ - a file on the branch is mmap-ed. ++ - a regular file on the branch is opened for write and there is no ++ same named entry on the upper branch. +diff --git a/Documentation/filesystems/aufs/design/05wbr_policy.txt b/Documentation/filesystems/aufs/design/05wbr_policy.txt +new file mode 100644 +index 000000000000..1361c74fc591 +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/05wbr_policy.txt +@@ -0,0 +1,64 @@ ++ ++# Copyright (C) 2005-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Policies to Select One among Multiple Writable Branches ++---------------------------------------------------------------------- ++When the number of writable branch is more than one, aufs has to decide ++the target branch for file creation or copy-up. By default, the highest ++writable branch which has the parent (or ancestor) dir of the target ++file is chosen (top-down-parent policy). ++By user's request, aufs implements some other policies to select the ++writable branch, for file creation several policies, round-robin, ++most-free-space, and other policies. For copy-up, top-down-parent, ++bottom-up-parent, bottom-up and others. ++ ++As expected, the round-robin policy selects the branch in circular. When ++you have two writable branches and creates 10 new files, 5 files will be ++created for each branch. mkdir(2) systemcall is an exception. When you ++create 10 new directories, all will be created on the same branch. ++And the most-free-space policy selects the one which has most free ++space among the writable branches. The amount of free space will be ++checked by aufs internally, and users can specify its time interval. ++ ++The policies for copy-up is more simple, ++top-down-parent is equivalent to the same named on in create policy, ++bottom-up-parent selects the writable branch where the parent dir ++exists and the nearest upper one from the copyup-source, ++bottom-up selects the nearest upper writable branch from the ++copyup-source, regardless the existence of the parent dir. ++ ++There are some rules or exceptions to apply these policies. ++- If there is a readonly branch above the policy-selected branch and ++ the parent dir is marked as opaque (a variation of whiteout), or the ++ target (creating) file is whiteout-ed on the upper readonly branch, ++ then the result of the policy is ignored and the target file will be ++ created on the nearest upper writable branch than the readonly branch. ++- If there is a writable branch above the policy-selected branch and ++ the parent dir is marked as opaque or the target file is whiteouted ++ on the branch, then the result of the policy is ignored and the target ++ file will be created on the highest one among the upper writable ++ branches who has diropq or whiteout. In case of whiteout, aufs removes ++ it as usual. ++- link(2) and rename(2) systemcalls are exceptions in every policy. ++ They try selecting the branch where the source exists as possible ++ since copyup a large file will take long time. If it can't be, ++ ie. the branch where the source exists is readonly, then they will ++ follow the copyup policy. ++- There is an exception for rename(2) when the target exists. ++ If the rename target exists, aufs compares the index of the branches ++ where the source and the target exists and selects the higher ++ one. If the selected branch is readonly, then aufs follows the ++ copyup policy. +diff --git a/Documentation/filesystems/aufs/design/06dirren.dot b/Documentation/filesystems/aufs/design/06dirren.dot +new file mode 100644 +index 000000000000..2d62bb6dd55f +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/06dirren.dot +@@ -0,0 +1,31 @@ ++ ++// to view this graph, run dot(1) command in GRAPHVIZ. ++ ++digraph G { ++node [shape=box]; ++whinfo [label="detailed info file\n(lower_brid_root-hinum, h_inum, namelen, old name)"]; ++ ++node [shape=oval]; ++ ++aufs_rename -> whinfo [label="store/remove"]; ++ ++node [shape=oval]; ++inode_list [label="h_inum list in branch\ncache"]; ++ ++node [shape=box]; ++whinode [label="h_inum list file"]; ++ ++node [shape=oval]; ++brmgmt [label="br_add/del/mod/umount"]; ++ ++brmgmt -> inode_list [label="create/remove"]; ++brmgmt -> whinode [label="load/store"]; ++ ++inode_list -> whinode [style=dashed,dir=both]; ++ ++aufs_rename -> inode_list [label="add/del"]; ++ ++aufs_lookup -> inode_list [label="search"]; ++ ++aufs_lookup -> whinfo [label="load/remove"]; ++} +diff --git a/Documentation/filesystems/aufs/design/06dirren.txt b/Documentation/filesystems/aufs/design/06dirren.txt +new file mode 100644 +index 000000000000..81f8efedb56a +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/06dirren.txt +@@ -0,0 +1,102 @@ ++ ++# Copyright (C) 2017-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Special handling for renaming a directory (DIRREN) ++---------------------------------------------------------------------- ++First, let's assume we have a simple usecase. ++ ++- /u = /rw + /ro ++- /rw/dirA exists ++- /ro/dirA and /ro/dirA/file exist too ++- there is no dirB on both branches ++- a user issues rename("dirA", "dirB") ++ ++Now, what should aufs behave against this rename(2)? ++There are a few possible cases. ++ ++A. returns EROFS. ++ since dirA exists on a readonly branch which cannot be renamed. ++B. returns EXDEV. ++ it is possible to copy-up dirA (only the dir itself), but the child ++ entries ("file" in this case) should not be. it must be a bad ++ approach to copy-up recursively. ++C. returns a success. ++ even the branch /ro is readonly, aufs tries renaming it. Obviously it ++ is a violation of aufs' policy. ++D. construct an extra information which indicates that /ro/dirA should ++ be handled as the name of dirB. ++ overlayfs has a similar feature called REDIRECT. ++ ++Until now, aufs implements the case B only which returns EXDEV, and ++expects the userspace application behaves like mv(1) which tries ++issueing rename(2) recursively. ++ ++A new aufs feature called DIRREN is introduced which implements the case ++D. There are several "extra information" added. ++ ++1. detailed info per renamed directory ++ path: /rw/dirB/$AUFS_WH_DR_INFO_PFX. ++2. the inode-number list of directories on a branch ++ path: /rw/dirB/$AUFS_WH_DR_BRHINO ++ ++The filename of "detailed info per directory" represents the lower ++branch, and its format is ++- a type of the branch id ++ one of these. ++ + uuid (not implemented yet) ++ + fsid ++ + dev ++- the inode-number of the branch root dir ++ ++And it contains these info in a single regular file. ++- magic number ++- branch's inode-number of the logically renamed dir ++- the name of the before-renamed dir ++ ++The "detailed info per directory" file is created in aufs rename(2), and ++loaded in any lookup. ++The info is considered in lookup for the matching case only. Here ++"matching" means that the root of branch (in the info filename) is same ++to the current looking-up branch. After looking-up the before-renamed ++name, the inode-number is compared. And the matched dentry is used. ++ ++The "inode-number list of directories" is a regular file which contains ++simply the inode-numbers on the branch. The file is created or updated ++in removing the branch, and loaded in adding the branch. Its lifetime is ++equal to the branch. ++The list is refered in lookup, and when the current target inode is ++found in the list, the aufs tries loading the "detailed info per ++directory" and get the changed and valid name of the dir. ++ ++Theoretically these "extra informaiton" may be able to be put into XATTR ++in the dir inode. But aufs doesn't choose this way because ++1. XATTR may not be supported by the branch (or its configuration) ++2. XATTR may have its size limit. ++3. XATTR may be less easy to convert than a regular file, when the ++ format of the info is changed in the future. ++At the same time, I agree that the regular file approach is much slower ++than XATTR approach. So, in the future, aufs may take the XATTR or other ++better approach. ++ ++This DIRREN feature is enabled by aufs configuration, and is activated ++by a new mount option. ++ ++For the more complicated case, there is a work with UDBA option, which ++is to dected the direct access to the branches (by-passing aufs) and to ++maintain the cashes in aufs. Since a single cached aufs dentry may ++contains two names, before- and after-rename, the name comparision in ++UDBA handler may not work correctly. In this case, the behaviour will be ++equivalen to udba=reval case. +diff --git a/Documentation/filesystems/aufs/design/06fhsm.txt b/Documentation/filesystems/aufs/design/06fhsm.txt +new file mode 100644 +index 000000000000..75ba61f47494 +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/06fhsm.txt +@@ -0,0 +1,120 @@ ++ ++# Copyright (C) 2011-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write to the Free Software ++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ ++ ++File-based Hierarchical Storage Management (FHSM) ++---------------------------------------------------------------------- ++Hierarchical Storage Management (or HSM) is a well-known feature in the ++storage world. Aufs provides this feature as file-based with multiple ++writable branches, based upon the principle of "Colder, the Lower". ++Here the word "colder" means that the less used files, and "lower" means ++that the position in the order of the stacked branches vertically. ++These multiple writable branches are prioritized, ie. the topmost one ++should be the fastest drive and be used heavily. ++ ++o Characters in aufs FHSM story ++- aufs itself and a new branch attribute. ++- a new ioctl interface to move-down and to establish a connection with ++ the daemon ("move-down" is a converse of "copy-up"). ++- userspace tool and daemon. ++ ++The userspace daemon establishes a connection with aufs and waits for ++the notification. The notified information is very similar to struct ++statfs containing the number of consumed blocks and inodes. ++When the consumed blocks/inodes of a branch exceeds the user-specified ++upper watermark, the daemon activates its move-down process until the ++consumed blocks/inodes reaches the user-specified lower watermark. ++ ++The actual move-down is done by aufs based upon the request from ++user-space since we need to maintain the inode number and the internal ++pointer arrays in aufs. ++ ++Currently aufs FHSM handles the regular files only. Additionally they ++must not be hard-linked nor pseudo-linked. ++ ++ ++o Cowork of aufs and the user-space daemon ++ During the userspace daemon established the connection, aufs sends a ++ small notification to it whenever aufs writes something into the ++ writable branch. But it may cost high since aufs issues statfs(2) ++ internally. So user can specify a new option to cache the ++ info. Actually the notification is controlled by these factors. ++ + the specified cache time. ++ + classified as "force" by aufs internally. ++ Until the specified time expires, aufs doesn't send the info ++ except the forced cases. When aufs decide forcing, the info is always ++ notified to userspace. ++ For example, the number of free inodes is generally large enough and ++ the shortage of it happens rarely. So aufs doesn't force the ++ notification when creating a new file, directory and others. This is ++ the typical case which aufs doesn't force. ++ When aufs writes the actual filedata and the files consumes any of new ++ blocks, the aufs forces notifying. ++ ++ ++o Interfaces in aufs ++- New branch attribute. ++ + fhsm ++ Specifies that the branch is managed by FHSM feature. In other word, ++ participant in the FHSM. ++ When nofhsm is set to the branch, it will not be the source/target ++ branch of the move-down operation. This attribute is set ++ independently from coo and moo attributes, and if you want full ++ FHSM, you should specify them as well. ++- New mount option. ++ + fhsm_sec ++ Specifies a second to suppress many less important info to be ++ notified. ++- New ioctl. ++ + AUFS_CTL_FHSM_FD ++ create a new file descriptor which userspace can read the notification ++ (a subset of struct statfs) from aufs. ++- Module parameter 'brs' ++ It has to be set to 1. Otherwise the new mount option 'fhsm' will not ++ be set. ++- mount helpers /sbin/mount.aufs and /sbin/umount.aufs ++ When there are two or more branches with fhsm attributes, ++ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs ++ terminates it. As a result of remounting and branch-manipulation, the ++ number of branches with fhsm attribute can be one. In this case, ++ /sbin/mount.aufs will terminate the user-space daemon. ++ ++ ++Finally the operation is done as these steps in kernel-space. ++- make sure that, ++ + no one else is using the file. ++ + the file is not hard-linked. ++ + the file is not pseudo-linked. ++ + the file is a regular file. ++ + the parent dir is not opaqued. ++- find the target writable branch. ++- make sure the file is not whiteout-ed by the upper (than the target) ++ branch. ++- make the parent dir on the target branch. ++- mutex lock the inode on the branch. ++- unlink the whiteout on the target branch (if exists). ++- lookup and create the whiteout-ed temporary name on the target branch. ++- copy the file as the whiteout-ed temporary name on the target branch. ++- rename the whiteout-ed temporary name to the original name. ++- unlink the file on the source branch. ++- maintain the internal pointer array and the external inode number ++ table (XINO). ++- maintain the timestamps and other attributes of the parent dir and the ++ file. ++ ++And of course, in every step, an error may happen. So the operation ++should restore the original file state after an error happens. +diff --git a/Documentation/filesystems/aufs/design/06mmap.txt b/Documentation/filesystems/aufs/design/06mmap.txt +new file mode 100644 +index 000000000000..71f4b9695503 +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/06mmap.txt +@@ -0,0 +1,72 @@ ++ ++# Copyright (C) 2005-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++mmap(2) -- File Memory Mapping ++---------------------------------------------------------------------- ++In aufs, the file-mapped pages are handled by a branch fs directly, no ++interaction with aufs. It means aufs_mmap() calls the branch fs's ++->mmap(). ++This approach is simple and good, but there is one problem. ++Under /proc, several entries show the mmapped files by its path (with ++device and inode number), and the printed path will be the path on the ++branch fs's instead of virtual aufs's. ++This is not a problem in most cases, but some utilities lsof(1) (and its ++user) may expect the path on aufs. ++ ++To address this issue, aufs adds a new member called vm_prfile in struct ++vm_area_struct (and struct vm_region). The original vm_file points to ++the file on the branch fs in order to handle everything correctly as ++usual. The new vm_prfile points to a virtual file in aufs, and the ++show-functions in procfs refers to vm_prfile if it is set. ++Also we need to maintain several other places where touching vm_file ++such like ++- fork()/clone() copies vma and the reference count of vm_file is ++ incremented. ++- merging vma maintains the ref count too. ++ ++This is not a good approach. It just fakes the printed path. But it ++leaves all behaviour around f_mapping unchanged. This is surely an ++advantage. ++Actually aufs had adopted another complicated approach which calls ++generic_file_mmap() and handles struct vm_operations_struct. In this ++approach, aufs met a hard problem and I could not solve it without ++switching the approach. ++ ++There may be one more another approach which is ++- bind-mount the branch-root onto the aufs-root internally ++- grab the new vfsmount (ie. struct mount) ++- lazy-umount the branch-root internally ++- in open(2) the aufs-file, open the branch-file with the hidden ++ vfsmount (instead of the original branch's vfsmount) ++- ideally this "bind-mount and lazy-umount" should be done atomically, ++ but it may be possible from userspace by the mount helper. ++ ++Adding the internal hidden vfsmount and using it in opening a file, the ++file path under /proc will be printed correctly. This approach looks ++smarter, but is not possible I am afraid. ++- aufs-root may be bind-mount later. when it happens, another hidden ++ vfsmount will be required. ++- it is hard to get the chance to bind-mount and lazy-umount ++ + in kernel-space, FS can have vfsmount in open(2) via ++ file->f_path, and aufs can know its vfsmount. But several locks are ++ already acquired, and if aufs tries to bind-mount and lazy-umount ++ here, then it may cause a deadlock. ++ + in user-space, bind-mount doesn't invoke the mount helper. ++- since /proc shows dev and ino, aufs has to give vma these info. it ++ means a new member vm_prinode will be necessary. this is essentially ++ equivalent to vm_prfile described above. ++ ++I have to give up this "looks-smater" approach. +diff --git a/Documentation/filesystems/aufs/design/06xattr.txt b/Documentation/filesystems/aufs/design/06xattr.txt +new file mode 100644 +index 000000000000..4b112bc733db +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/06xattr.txt +@@ -0,0 +1,96 @@ ++ ++# Copyright (C) 2014-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; if not, write to the Free Software ++# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ ++ ++Listing XATTR/EA and getting the value ++---------------------------------------------------------------------- ++For the inode standard attributes (owner, group, timestamps, etc.), aufs ++shows the values from the topmost existing file. This behaviour is good ++for the non-dir entries since the bahaviour exactly matches the shown ++information. But for the directories, aufs considers all the same named ++entries on the lower branches. Which means, if one of the lower entry ++rejects readdir call, then aufs returns an error even if the topmost ++entry allows it. This behaviour is necessary to respect the branch fs's ++security, but can make users confused since the user-visible standard ++attributes don't match the behaviour. ++To address this issue, aufs has a mount option called dirperm1 which ++checks the permission for the topmost entry only, and ignores the lower ++entry's permission. ++ ++A similar issue can happen around XATTR. ++getxattr(2) and listxattr(2) families behave as if dirperm1 option is ++always set. Otherwise these very unpleasant situation would happen. ++- listxattr(2) may return the duplicated entries. ++- users may not be able to remove or reset the XATTR forever, ++ ++ ++XATTR/EA support in the internal (copy,move)-(up,down) ++---------------------------------------------------------------------- ++Generally the extended attributes of inode are categorized as these. ++- "security" for LSM and capability. ++- "system" for posix ACL, 'acl' mount option is required for the branch ++ fs generally. ++- "trusted" for userspace, CAP_SYS_ADMIN is required. ++- "user" for userspace, 'user_xattr' mount option is required for the ++ branch fs generally. ++ ++Moreover there are some other categories. Aufs handles these rather ++unpopular categories as the ordinary ones, ie. there is no special ++condition nor exception. ++ ++In copy-up, the support for XATTR on the dst branch may differ from the ++src branch. In this case, the copy-up operation will get an error and ++the original user operation which triggered the copy-up will fail. It ++can happen that even all copy-up will fail. ++When both of src and dst branches support XATTR and if an error occurs ++during copying XATTR, then the copy-up should fail obviously. That is a ++good reason and aufs should return an error to userspace. But when only ++the src branch support that XATTR, aufs should not return an error. ++For example, the src branch supports ACL but the dst branch doesn't ++because the dst branch may natively un-support it or temporary ++un-support it due to "noacl" mount option. Of course, the dst branch fs ++may NOT return an error even if the XATTR is not supported. It is ++totally up to the branch fs. ++ ++Anyway when the aufs internal copy-up gets an error from the dst branch ++fs, then aufs tries removing the just copied entry and returns the error ++to the userspace. The worst case of this situation will be all copy-up ++will fail. ++ ++For the copy-up operation, there two basic approaches. ++- copy the specified XATTR only (by category above), and return the ++ error unconditionally if it happens. ++- copy all XATTR, and ignore the error on the specified category only. ++ ++In order to support XATTR and to implement the correct behaviour, aufs ++chooses the latter approach and introduces some new branch attributes, ++"icexsec", "icexsys", "icextr", "icexusr", and "icexoth". ++They correspond to the XATTR namespaces (see above). Additionally, to be ++convenient, "icex" is also provided which means all "icex*" attributes ++are set (here the word "icex" stands for "ignore copy-error on XATTR"). ++ ++The meaning of these attributes is to ignore the error from setting ++XATTR on that branch. ++Note that aufs tries copying all XATTR unconditionally, and ignores the ++error from the dst branch according to the specified attributes. ++ ++Some XATTR may have its default value. The default value may come from ++the parent dir or the environment. If the default value is set at the ++file creating-time, it will be overwritten by copy-up. ++Some contradiction may happen I am afraid. ++Do we need another attribute to stop copying XATTR? I am unsure. For ++now, aufs implements the branch attributes to ignore the error. +diff --git a/Documentation/filesystems/aufs/design/07export.txt b/Documentation/filesystems/aufs/design/07export.txt +new file mode 100644 +index 000000000000..09e648e7e24a +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/07export.txt +@@ -0,0 +1,58 @@ ++ ++# Copyright (C) 2005-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Export Aufs via NFS ++---------------------------------------------------------------------- ++Here is an approach. ++- like xino/xib, add a new file 'xigen' which stores aufs inode ++ generation. ++- iget_locked(): initialize aufs inode generation for a new inode, and ++ store it in xigen file. ++- destroy_inode(): increment aufs inode generation and store it in xigen ++ file. it is necessary even if it is not unlinked, because any data of ++ inode may be changed by UDBA. ++- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise ++ build file handle by ++ + branch id (4 bytes) ++ + superblock generation (4 bytes) ++ + inode number (4 or 8 bytes) ++ + parent dir inode number (4 or 8 bytes) ++ + inode generation (4 bytes)) ++ + return value of exportfs_encode_fh() for the parent on a branch (4 ++ bytes) ++ + file handle for a branch (by exportfs_encode_fh()) ++- fh_to_dentry(): ++ + find the index of a branch from its id in handle, and check it is ++ still exist in aufs. ++ + 1st level: get the inode number from handle and search it in cache. ++ + 2nd level: if not found in cache, get the parent inode number from ++ the handle and search it in cache. and then open the found parent ++ dir, find the matching inode number by vfs_readdir() and get its ++ name, and call lookup_one_len() for the target dentry. ++ + 3rd level: if the parent dir is not cached, call ++ exportfs_decode_fh() for a branch and get the parent on a branch, ++ build a pathname of it, convert it a pathname in aufs, call ++ path_lookup(). now aufs gets a parent dir dentry, then handle it as ++ the 2nd level. ++ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount ++ for every branch, but not itself. to get this, (currently) aufs ++ searches in current->nsproxy->mnt_ns list. it may not be a good ++ idea, but I didn't get other approach. ++ + test the generation of the gotten inode. ++- every inode operation: they may get EBUSY due to UDBA. in this case, ++ convert it into ESTALE for NFSD. ++- readdir(): call lockdep_on/off() because filldir in NFSD calls ++ lookup_one_len(), vfs_getattr(), encode_fh() and others. +diff --git a/Documentation/filesystems/aufs/design/08shwh.txt b/Documentation/filesystems/aufs/design/08shwh.txt +new file mode 100644 +index 000000000000..f7b6619433e8 +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/08shwh.txt +@@ -0,0 +1,52 @@ ++ ++# Copyright (C) 2005-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Show Whiteout Mode (shwh) ++---------------------------------------------------------------------- ++Generally aufs hides the name of whiteouts. But in some cases, to show ++them is very useful for users. For instance, creating a new middle layer ++(branch) by merging existing layers. ++ ++(borrowing aufs1 HOW-TO from a user, Michael Towers) ++When you have three branches, ++- Bottom: 'system', squashfs (underlying base system), read-only ++- Middle: 'mods', squashfs, read-only ++- Top: 'overlay', ram (tmpfs), read-write ++ ++The top layer is loaded at boot time and saved at shutdown, to preserve ++the changes made to the system during the session. ++When larger changes have been made, or smaller changes have accumulated, ++the size of the saved top layer data grows. At this point, it would be ++nice to be able to merge the two overlay branches ('mods' and 'overlay') ++and rewrite the 'mods' squashfs, clearing the top layer and thus ++restoring save and load speed. ++ ++This merging is simplified by the use of another aufs mount, of just the ++two overlay branches using the 'shwh' option. ++# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \ ++ aufs /livesys/merge_union ++ ++A merged view of these two branches is then available at ++/livesys/merge_union, and the new feature is that the whiteouts are ++visible! ++Note that in 'shwh' mode the aufs mount must be 'ro', which will disable ++writing to all branches. Also the default mode for all branches is 'ro'. ++It is now possible to save the combined contents of the two overlay ++branches to a new squashfs, e.g.: ++# mksquashfs /livesys/merge_union /path/to/newmods.squash ++ ++This new squashfs archive can be stored on the boot device and the ++initramfs will use it to replace the old one at the next boot. +diff --git a/Documentation/filesystems/aufs/design/10dynop.txt b/Documentation/filesystems/aufs/design/10dynop.txt +new file mode 100644 +index 000000000000..0f1ce3ce9086 +--- /dev/null ++++ b/Documentation/filesystems/aufs/design/10dynop.txt +@@ -0,0 +1,47 @@ ++ ++# Copyright (C) 2010-2021 Junjiro R. Okajima ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++Dynamically customizable FS operations ++---------------------------------------------------------------------- ++Generally FS operations (struct inode_operations, struct ++address_space_operations, struct file_operations, etc.) are defined as ++"static const", but it never means that FS have only one set of ++operation. Some FS have multiple sets of them. For instance, ext2 has ++three sets, one for XIP, for NOBH, and for normal. ++Since aufs overrides and redirects these operations, sometimes aufs has ++to change its behaviour according to the branch FS type. More importantly ++VFS acts differently if a function (member in the struct) is set or ++not. It means aufs should have several sets of operations and select one ++among them according to the branch FS definition. ++ ++In order to solve this problem and not to affect the behaviour of VFS, ++aufs defines these operations dynamically. For instance, aufs defines ++dummy direct_IO function for struct address_space_operations, but it may ++not be set to the address_space_operations actually. When the branch FS ++doesn't have it, aufs doesn't set it to its address_space_operations ++while the function definition itself is still alive. So the behaviour ++itself will not change, and it will return an error when direct_IO is ++not set. ++ ++The lifetime of these dynamically generated operation object is ++maintained by aufs branch object. When the branch is removed from aufs, ++the reference counter of the object is decremented. When it reaches ++zero, the dynamically generated operation object will be freed. ++ ++This approach is designed to support AIO (io_submit), Direct I/O and ++XIP (DAX) mainly. ++Currently this approach is applied to address_space_operations for ++regular files only. +diff --git a/MAINTAINERS b/MAINTAINERS +index edc32575828b..d80625ae4c12 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -3124,6 +3124,19 @@ F: include/uapi/linux/audit.h + F: kernel/audit* + F: lib/*audit.c + ++AUFS (advanced multi layered unification filesystem) FILESYSTEM ++M: "J. R. Okajima" ++L: aufs-users@lists.sourceforge.net (members only) ++L: linux-unionfs@vger.kernel.org ++S: Supported ++W: http://aufs.sourceforge.net ++T: git://github.com/sfjro/aufs4-linux.git ++F: Documentation/ABI/testing/debugfs-aufs ++F: Documentation/ABI/testing/sysfs-aufs ++F: Documentation/filesystems/aufs/ ++F: fs/aufs/ ++F: include/uapi/linux/aufs_type.h ++ + AUXILIARY DISPLAY DRIVERS + M: Miguel Ojeda + S: Maintained +diff --git a/drivers/block/loop.c b/drivers/block/loop.c +index 79e485949b60..44fe18055d44 100644 +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -673,6 +673,15 @@ static inline void loop_update_dio(struct loop_device *lo) lo->use_dio); } @@ -1910,7 +1969,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c static void loop_reread_partitions(struct loop_device *lo) { int rc; -@@ -742,6 +751,7 @@ +@@ -730,6 +739,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, { struct file *file = fget(arg); struct file *old_file; @@ -1918,7 +1977,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c int error; bool partscan; bool is_loop; -@@ -761,11 +771,19 @@ +@@ -749,11 +759,19 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; @@ -1938,7 +1997,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c error = -EINVAL; -@@ -778,6 +796,7 @@ +@@ -766,6 +784,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, blk_mq_freeze_queue(lo->lo_queue); mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); lo->lo_backing_file = file; @@ -1946,7 +2005,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping); mapping_set_gfp_mask(file->f_mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); -@@ -800,6 +819,8 @@ +@@ -788,6 +807,8 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, * dependency. */ fput(old_file); @@ -1955,7 +2014,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c if (partscan) loop_reread_partitions(lo); return 0; -@@ -808,9 +829,29 @@ +@@ -796,9 +817,29 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_global_unlock(lo, is_loop); out_putf: fput(file); @@ -1985,7 +2044,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c /* loop sysfs attributes */ static ssize_t loop_attr_show(struct device *dev, char *page, -@@ -1190,6 +1231,7 @@ +@@ -1188,6 +1229,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, const struct loop_config *config) { struct file *file = fget(config->fd); @@ -1993,7 +2052,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c struct inode *inode; struct address_space *mapping; int error; -@@ -1205,6 +1247,13 @@ +@@ -1203,6 +1245,13 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); @@ -2007,7 +2066,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. -@@ -1270,6 +1319,7 @@ +@@ -1268,6 +1317,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; lo->lo_device = bdev; lo->lo_backing_file = file; @@ -2015,7 +2074,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); -@@ -1320,6 +1370,8 @@ +@@ -1318,6 +1368,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); @@ -2024,7 +2083,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; -@@ -1328,6 +1380,7 @@ +@@ -1326,6 +1378,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, static int __loop_clr_fd(struct loop_device *lo, bool release) { struct file *filp = NULL; @@ -2032,7 +2091,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c gfp_t gfp = lo->old_gfp_mask; struct block_device *bdev = lo->lo_device; int err = 0; -@@ -1379,6 +1432,7 @@ +@@ -1377,6 +1430,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) spin_lock_irq(&lo->lo_lock); lo->lo_backing_file = NULL; @@ -2040,7 +2099,7 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c spin_unlock_irq(&lo->lo_lock); loop_release_xfer(lo); -@@ -1459,6 +1513,8 @@ +@@ -1457,6 +1511,8 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) */ if (filp) fput(filp); @@ -2049,10 +2108,11 @@ diff -ruN a/drivers/block/loop.c b/drivers/block/loop.c return err; } -diff -ruN a/drivers/block/loop.h b/drivers/block/loop.h ---- a/drivers/block/loop.h 2021-10-31 23:53:10.000000000 +0300 -+++ b/drivers/block/loop.h 2022-03-30 02:32:42.071758101 +0300 -@@ -46,7 +46,7 @@ +diff --git a/drivers/block/loop.h b/drivers/block/loop.h +index 04c88dd6eabd..0ff3ba22ee17 100644 +--- a/drivers/block/loop.h ++++ b/drivers/block/loop.h +@@ -46,7 +46,7 @@ struct loop_device { int (*ioctl)(struct loop_device *, int cmd, unsigned long arg); @@ -2061,9 +2121,289 @@ diff -ruN a/drivers/block/loop.h b/drivers/block/loop.h struct block_device *lo_device; void *key_data; -diff -ruN a/fs/aufs/aufs.h b/fs/aufs/aufs.h ---- a/fs/aufs/aufs.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/aufs.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/Kconfig b/fs/Kconfig +index a6313a969bc5..aca4b89d41a1 100644 +--- a/fs/Kconfig ++++ b/fs/Kconfig +@@ -312,6 +312,7 @@ source "fs/sysv/Kconfig" + source "fs/ufs/Kconfig" + source "fs/erofs/Kconfig" + source "fs/vboxsf/Kconfig" ++source "fs/aufs/Kconfig" + + endif # MISC_FILESYSTEMS + +diff --git a/fs/Makefile b/fs/Makefile +index 84c5e4cdfee5..b4fcdad8412e 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -138,3 +138,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ + obj-$(CONFIG_EROFS_FS) += erofs/ + obj-$(CONFIG_VBOXSF_FS) += vboxsf/ + obj-$(CONFIG_ZONEFS_FS) += zonefs/ ++obj-$(CONFIG_AUFS_FS) += aufs/ +diff --git a/fs/aufs/Kconfig b/fs/aufs/Kconfig +new file mode 100644 +index 000000000000..9f436425716a +--- /dev/null ++++ b/fs/aufs/Kconfig +@@ -0,0 +1,199 @@ ++# SPDX-License-Identifier: GPL-2.0 ++config AUFS_FS ++ tristate "Aufs (Advanced multi layered unification filesystem) support" ++ help ++ Aufs is a stackable unification filesystem such as Unionfs, ++ which unifies several directories and provides a merged single ++ directory. ++ In the early days, aufs was entirely re-designed and ++ re-implemented Unionfs Version 1.x series. Introducing many ++ original ideas, approaches and improvements, it becomes totally ++ different from Unionfs while keeping the basic features. ++ ++if AUFS_FS ++choice ++ prompt "Maximum number of branches" ++ default AUFS_BRANCH_MAX_127 ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_127 ++ bool "127" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_511 ++ bool "511" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_1023 ++ bool "1023" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++config AUFS_BRANCH_MAX_32767 ++ bool "32767" ++ help ++ Specifies the maximum number of branches (or member directories) ++ in a single aufs. The larger value consumes more system ++ resources and has a minor impact to performance. ++endchoice ++ ++config AUFS_SBILIST ++ bool ++ depends on AUFS_MAGIC_SYSRQ || PROC_FS ++ default y ++ help ++ Automatic configuration for internal use. ++ When aufs supports Magic SysRq or /proc, enabled automatically. ++ ++config AUFS_HNOTIFY ++ bool "Detect direct branch access (bypassing aufs)" ++ help ++ If you want to modify files on branches directly, eg. bypassing aufs, ++ and want aufs to detect the changes of them fully, then enable this ++ option and use 'udba=notify' mount option. ++ Currently there is only one available configuration, "fsnotify". ++ It will have a negative impact to the performance. ++ See detail in aufs.5. ++ ++choice ++ prompt "method" if AUFS_HNOTIFY ++ default AUFS_HFSNOTIFY ++config AUFS_HFSNOTIFY ++ bool "fsnotify" ++ select FSNOTIFY ++endchoice ++ ++config AUFS_EXPORT ++ bool "NFS-exportable aufs" ++ depends on EXPORTFS ++ help ++ If you want to export your mounted aufs via NFS, then enable this ++ option. There are several requirements for this configuration. ++ See detail in aufs.5. ++ ++config AUFS_INO_T_64 ++ bool ++ depends on AUFS_EXPORT ++ depends on 64BIT && !(ALPHA || S390) ++ default y ++ help ++ Automatic configuration for internal use. ++ /* typedef unsigned long/int __kernel_ino_t */ ++ /* alpha and s390x are int */ ++ ++config AUFS_XATTR ++ bool "support for XATTR/EA (including Security Labels)" ++ help ++ If your branch fs supports XATTR/EA and you want to make them ++ available in aufs too, then enable this opsion and specify the ++ branch attributes for EA. ++ See detail in aufs.5. ++ ++config AUFS_FHSM ++ bool "File-based Hierarchical Storage Management" ++ help ++ Hierarchical Storage Management (or HSM) is a well-known feature ++ in the storage world. Aufs provides this feature as file-based. ++ with multiple branches. ++ These multiple branches are prioritized, ie. the topmost one ++ should be the fastest drive and be used heavily. ++ ++config AUFS_RDU ++ bool "Readdir in userspace" ++ help ++ Aufs has two methods to provide a merged view for a directory, ++ by a user-space library and by kernel-space natively. The latter ++ is always enabled but sometimes large and slow. ++ If you enable this option, install the library in aufs2-util ++ package, and set some environment variables for your readdir(3), ++ then the work will be handled in user-space which generally ++ shows better performance in most cases. ++ See detail in aufs.5. ++ ++config AUFS_DIRREN ++ bool "Workaround for rename(2)-ing a directory" ++ help ++ By default, aufs returns EXDEV error in renameing a dir who has ++ his child on the lower branch, since it is a bad idea to issue ++ rename(2) internally for every lower branch. But user may not ++ accept this behaviour. So here is a workaround to allow such ++ rename(2) and store some extra infromation on the writable ++ branch. Obviously this costs high (and I don't like it). ++ To use this feature, you need to enable this configuration AND ++ to specify the mount option `dirren.' ++ See details in aufs.5 and the design documents. ++ ++config AUFS_SHWH ++ bool "Show whiteouts" ++ help ++ If you want to make the whiteouts in aufs visible, then enable ++ this option and specify 'shwh' mount option. Although it may ++ sounds like philosophy or something, but in technically it ++ simply shows the name of whiteout with keeping its behaviour. ++ ++config AUFS_BR_RAMFS ++ bool "Ramfs (initramfs/rootfs) as an aufs branch" ++ help ++ If you want to use ramfs as an aufs branch fs, then enable this ++ option. Generally tmpfs is recommended. ++ Aufs prohibited them to be a branch fs by default, because ++ initramfs becomes unusable after switch_root or something ++ generally. If you sets initramfs as an aufs branch and boot your ++ system by switch_root, you will meet a problem easily since the ++ files in initramfs may be inaccessible. ++ Unless you are going to use ramfs as an aufs branch fs without ++ switch_root or something, leave it N. ++ ++config AUFS_BR_FUSE ++ bool "Fuse fs as an aufs branch" ++ depends on FUSE_FS ++ select AUFS_POLL ++ help ++ If you want to use fuse-based userspace filesystem as an aufs ++ branch fs, then enable this option. ++ It implements the internal poll(2) operation which is ++ implemented by fuse only (curretnly). ++ ++config AUFS_POLL ++ bool ++ help ++ Automatic configuration for internal use. ++ ++config AUFS_BR_HFSPLUS ++ bool "Hfsplus as an aufs branch" ++ depends on HFSPLUS_FS ++ default y ++ help ++ If you want to use hfsplus fs as an aufs branch fs, then enable ++ this option. This option introduces a small overhead at ++ copying-up a file on hfsplus. ++ ++config AUFS_BDEV_LOOP ++ bool ++ depends on BLK_DEV_LOOP ++ default y ++ help ++ Automatic configuration for internal use. ++ Convert =[ym] into =y. ++ ++config AUFS_DEBUG ++ bool "Debug aufs" ++ help ++ Enable this to compile aufs internal debug code. ++ It will have a negative impact to the performance. ++ ++config AUFS_MAGIC_SYSRQ ++ bool ++ depends on AUFS_DEBUG && MAGIC_SYSRQ ++ default y ++ help ++ Automatic configuration for internal use. ++ When aufs supports Magic SysRq, enabled automatically. ++endif +diff --git a/fs/aufs/Makefile b/fs/aufs/Makefile +new file mode 100644 +index 000000000000..4af8ecde3e3f +--- /dev/null ++++ b/fs/aufs/Makefile +@@ -0,0 +1,46 @@ ++# SPDX-License-Identifier: GPL-2.0 ++ ++include ${src}/magic.mk ++ifeq (${CONFIG_AUFS_FS},m) ++include ${src}/conf.mk ++endif ++-include ${src}/priv_def.mk ++ ++# cf. include/linux/kernel.h ++# enable pr_debug ++ccflags-y += -DDEBUG ++# sparse requires the full pathname ++ifdef M ++ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h ++else ++ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h ++endif ++ ++obj-$(CONFIG_AUFS_FS) += aufs.o ++aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o fsctx.o \ ++ wkq.o vfsub.o dcsub.o \ ++ cpup.o whout.o wbr_policy.o \ ++ dinfo.o dentry.o \ ++ dynop.o \ ++ finfo.o file.o f_op.o \ ++ dir.o vdir.o \ ++ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \ ++ mvdown.o ioctl.o ++ ++# all are boolean ++aufs-$(CONFIG_PROC_FS) += procfs.o plink.o ++aufs-$(CONFIG_SYSFS) += sysfs.o ++aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o ++aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o ++aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o ++aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o ++aufs-$(CONFIG_AUFS_EXPORT) += export.o ++aufs-$(CONFIG_AUFS_XATTR) += xattr.o ++aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o ++aufs-$(CONFIG_AUFS_DIRREN) += dirren.o ++aufs-$(CONFIG_AUFS_FHSM) += fhsm.o ++aufs-$(CONFIG_AUFS_POLL) += poll.o ++aufs-$(CONFIG_AUFS_RDU) += rdu.o ++aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o ++aufs-$(CONFIG_AUFS_DEBUG) += debug.o ++aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o +diff --git a/fs/aufs/aufs.h b/fs/aufs/aufs.h +new file mode 100644 +index 000000000000..1ec7a347cba1 +--- /dev/null ++++ b/fs/aufs/aufs.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -2127,9 +2467,11 @@ diff -ruN a/fs/aufs/aufs.h b/fs/aufs/aufs.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_H__ */ -diff -ruN a/fs/aufs/branch.c b/fs/aufs/branch.c ---- a/fs/aufs/branch.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/branch.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/branch.c b/fs/aufs/branch.c +new file mode 100644 +index 000000000000..6935b591f9cc +--- /dev/null ++++ b/fs/aufs/branch.c @@ -0,0 +1,1427 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -3558,9 +3900,11 @@ diff -ruN a/fs/aufs/branch.c b/fs/aufs/branch.c + + return err; +} -diff -ruN a/fs/aufs/branch.h b/fs/aufs/branch.h ---- a/fs/aufs/branch.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/branch.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/branch.h b/fs/aufs/branch.h +new file mode 100644 +index 000000000000..ef0afb45d616 +--- /dev/null ++++ b/fs/aufs/branch.h @@ -0,0 +1,375 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -3937,9 +4281,11 @@ diff -ruN a/fs/aufs/branch.h b/fs/aufs/branch.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_BRANCH_H__ */ -diff -ruN a/fs/aufs/conf.mk b/fs/aufs/conf.mk ---- a/fs/aufs/conf.mk 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/conf.mk 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/conf.mk b/fs/aufs/conf.mk +new file mode 100644 +index 000000000000..12782f8e0f38 +--- /dev/null ++++ b/fs/aufs/conf.mk @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: GPL-2.0 + @@ -3981,9 +4327,11 @@ diff -ruN a/fs/aufs/conf.mk b/fs/aufs/conf.mk +${obj}/sysfs.o: ${AuConfName} + +-include ${srctree}/${src}/conf_priv.mk -diff -ruN a/fs/aufs/cpup.c b/fs/aufs/cpup.c ---- a/fs/aufs/cpup.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/cpup.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c +new file mode 100644 +index 000000000000..5b8b6b25200e +--- /dev/null ++++ b/fs/aufs/cpup.c @@ -0,0 +1,1459 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -5444,9 +5792,11 @@ diff -ruN a/fs/aufs/cpup.c b/fs/aufs/cpup.c + dput(parent); + return err; +} -diff -ruN a/fs/aufs/cpup.h b/fs/aufs/cpup.h ---- a/fs/aufs/cpup.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/cpup.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/cpup.h b/fs/aufs/cpup.h +new file mode 100644 +index 000000000000..a2cff31c0427 +--- /dev/null ++++ b/fs/aufs/cpup.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -5548,9 +5898,11 @@ diff -ruN a/fs/aufs/cpup.h b/fs/aufs/cpup.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_CPUP_H__ */ -diff -ruN a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c ---- a/fs/aufs/dbgaufs.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dbgaufs.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c +new file mode 100644 +index 000000000000..7df6d89242f6 +--- /dev/null ++++ b/fs/aufs/dbgaufs.c @@ -0,0 +1,526 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -6078,9 +6430,11 @@ diff -ruN a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c + err = 0; + return err; +} -diff -ruN a/fs/aufs/dbgaufs.h b/fs/aufs/dbgaufs.h ---- a/fs/aufs/dbgaufs.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dbgaufs.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dbgaufs.h b/fs/aufs/dbgaufs.h +new file mode 100644 +index 000000000000..8e0567c72ec8 +--- /dev/null ++++ b/fs/aufs/dbgaufs.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -6135,9 +6489,11 @@ diff -ruN a/fs/aufs/dbgaufs.h b/fs/aufs/dbgaufs.h + +#endif /* __KERNEL__ */ +#endif /* __DBGAUFS_H__ */ -diff -ruN a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c ---- a/fs/aufs/dcsub.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dcsub.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c +new file mode 100644 +index 000000000000..0da952034aea +--- /dev/null ++++ b/fs/aufs/dcsub.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -6364,9 +6720,11 @@ diff -ruN a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c + + return path_is_under(path + 0, path + 1); +} -diff -ruN a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h ---- a/fs/aufs/dcsub.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dcsub.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h +new file mode 100644 +index 000000000000..1fde5f6aec5c +--- /dev/null ++++ b/fs/aufs/dcsub.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -6505,9 +6863,11 @@ diff -ruN a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DCSUB_H__ */ -diff -ruN a/fs/aufs/debug.c b/fs/aufs/debug.c ---- a/fs/aufs/debug.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/debug.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/debug.c b/fs/aufs/debug.c +new file mode 100644 +index 000000000000..3394083eadd7 +--- /dev/null ++++ b/fs/aufs/debug.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -6953,9 +7313,11 @@ diff -ruN a/fs/aufs/debug.c b/fs/aufs/debug.c + + return 0; +} -diff -ruN a/fs/aufs/debug.h b/fs/aufs/debug.h ---- a/fs/aufs/debug.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/debug.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/debug.h b/fs/aufs/debug.h +new file mode 100644 +index 000000000000..4ff77fb633ec +--- /dev/null ++++ b/fs/aufs/debug.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -7183,9 +7545,11 @@ diff -ruN a/fs/aufs/debug.h b/fs/aufs/debug.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DEBUG_H__ */ -diff -ruN a/fs/aufs/dentry.c b/fs/aufs/dentry.c ---- a/fs/aufs/dentry.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dentry.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dentry.c b/fs/aufs/dentry.c +new file mode 100644 +index 000000000000..8f5976bc4419 +--- /dev/null ++++ b/fs/aufs/dentry.c @@ -0,0 +1,1168 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -8355,9 +8719,11 @@ diff -ruN a/fs/aufs/dentry.c b/fs/aufs/dentry.c +const struct dentry_operations aufs_dop_noreval = { + .d_release = aufs_d_release +}; -diff -ruN a/fs/aufs/dentry.h b/fs/aufs/dentry.h ---- a/fs/aufs/dentry.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dentry.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dentry.h b/fs/aufs/dentry.h +new file mode 100644 +index 000000000000..26d1332d269b +--- /dev/null ++++ b/fs/aufs/dentry.h @@ -0,0 +1,269 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -8628,9 +8994,11 @@ diff -ruN a/fs/aufs/dentry.h b/fs/aufs/dentry.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DENTRY_H__ */ -diff -ruN a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c ---- a/fs/aufs/dinfo.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dinfo.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c +new file mode 100644 +index 000000000000..a350f11814c2 +--- /dev/null ++++ b/fs/aufs/dinfo.c @@ -0,0 +1,554 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -9186,9 +9554,11 @@ diff -ruN a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c + return bindex; + return -1; +} -diff -ruN a/fs/aufs/dir.c b/fs/aufs/dir.c ---- a/fs/aufs/dir.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dir.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dir.c b/fs/aufs/dir.c +new file mode 100644 +index 000000000000..f1d5f2f5fd29 +--- /dev/null ++++ b/fs/aufs/dir.c @@ -0,0 +1,765 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -9955,9 +10325,11 @@ diff -ruN a/fs/aufs/dir.c b/fs/aufs/dir.c + .flush = aufs_flush_dir, + .fsync = aufs_fsync_dir +}; -diff -ruN a/fs/aufs/dir.h b/fs/aufs/dir.h ---- a/fs/aufs/dir.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dir.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dir.h b/fs/aufs/dir.h +new file mode 100644 +index 000000000000..7a6e004307e9 +--- /dev/null ++++ b/fs/aufs/dir.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -10093,9 +10465,11 @@ diff -ruN a/fs/aufs/dir.h b/fs/aufs/dir.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DIR_H__ */ -diff -ruN a/fs/aufs/dirren.c b/fs/aufs/dirren.c ---- a/fs/aufs/dirren.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dirren.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dirren.c b/fs/aufs/dirren.c +new file mode 100644 +index 000000000000..341b706f5e13 +--- /dev/null ++++ b/fs/aufs/dirren.c @@ -0,0 +1,1315 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -11412,9 +11786,11 @@ diff -ruN a/fs/aufs/dirren.c b/fs/aufs/dirren.c +out: + return err; +} -diff -ruN a/fs/aufs/dirren.h b/fs/aufs/dirren.h ---- a/fs/aufs/dirren.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dirren.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dirren.h b/fs/aufs/dirren.h +new file mode 100644 +index 000000000000..2c96cf8b039e +--- /dev/null ++++ b/fs/aufs/dirren.h @@ -0,0 +1,140 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -11556,9 +11932,11 @@ diff -ruN a/fs/aufs/dirren.h b/fs/aufs/dirren.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DIRREN_H__ */ -diff -ruN a/fs/aufs/dynop.c b/fs/aufs/dynop.c ---- a/fs/aufs/dynop.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dynop.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dynop.c b/fs/aufs/dynop.c +new file mode 100644 +index 000000000000..a84d986307e5 +--- /dev/null ++++ b/fs/aufs/dynop.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -11928,9 +12306,11 @@ diff -ruN a/fs/aufs/dynop.c b/fs/aufs/dynop.c + for (i = 0; i < AuDyLast; i++) + WARN_ON(!hlist_bl_empty(dynop + i)); +} -diff -ruN a/fs/aufs/dynop.h b/fs/aufs/dynop.h ---- a/fs/aufs/dynop.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/dynop.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/dynop.h b/fs/aufs/dynop.h +new file mode 100644 +index 000000000000..a86b106b2a13 +--- /dev/null ++++ b/fs/aufs/dynop.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -12009,9 +12389,11 @@ diff -ruN a/fs/aufs/dynop.h b/fs/aufs/dynop.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_DYNOP_H__ */ -diff -ruN a/fs/aufs/export.c b/fs/aufs/export.c ---- a/fs/aufs/export.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/export.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/export.c b/fs/aufs/export.c +new file mode 100644 +index 000000000000..2e409acd84f0 +--- /dev/null ++++ b/fs/aufs/export.c @@ -0,0 +1,830 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -12843,9 +13225,788 @@ diff -ruN a/fs/aufs/export.c b/fs/aufs/export.c + BUILD_BUG_ON(sizeof(u) != sizeof(int)); + atomic_set(&sbinfo->si_xigen_next, u); +} -diff -ruN a/fs/aufs/fhsm.c b/fs/aufs/fhsm.c ---- a/fs/aufs/fhsm.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/fhsm.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c +new file mode 100644 +index 000000000000..0e240b85eb75 +--- /dev/null ++++ b/fs/aufs/f_op.c +@@ -0,0 +1,771 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2005-2021 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++/* ++ * file and vm operations ++ */ ++ ++#include ++#include ++#include ++#include ++#include "aufs.h" ++ ++int au_do_open_nondir(struct file *file, int flags, struct file *h_file) ++{ ++ int err; ++ aufs_bindex_t bindex; ++ struct dentry *dentry, *h_dentry; ++ struct au_finfo *finfo; ++ struct inode *h_inode; ++ ++ FiMustWriteLock(file); ++ ++ err = 0; ++ dentry = file->f_path.dentry; ++ AuDebugOn(IS_ERR_OR_NULL(dentry)); ++ finfo = au_fi(file); ++ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop)); ++ atomic_set(&finfo->fi_mmapped, 0); ++ bindex = au_dbtop(dentry); ++ if (!h_file) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb); ++ if (unlikely(err)) ++ goto out; ++ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0); ++ if (IS_ERR(h_file)) { ++ err = PTR_ERR(h_file); ++ goto out; ++ } ++ } else { ++ h_dentry = h_file->f_path.dentry; ++ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb); ++ if (unlikely(err)) ++ goto out; ++ /* br ref is already inc-ed */ ++ } ++ ++ if ((flags & __O_TMPFILE) ++ && !(flags & O_EXCL)) { ++ h_inode = file_inode(h_file); ++ spin_lock(&h_inode->i_lock); ++ h_inode->i_state |= I_LINKABLE; ++ spin_unlock(&h_inode->i_lock); ++ } ++ au_set_fbtop(file, bindex); ++ au_set_h_fptr(file, bindex, h_file); ++ au_update_figen(file); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ ++ ++out: ++ return err; ++} ++ ++static int aufs_open_nondir(struct inode *inode __maybe_unused, ++ struct file *file) ++{ ++ int err; ++ struct super_block *sb; ++ struct au_do_open_args args = { ++ .open = au_do_open_nondir ++ }; ++ ++ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n", ++ file, vfsub_file_flags(file), file->f_mode); ++ ++ sb = file->f_path.dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH); ++ err = au_do_open(file, &args); ++ si_read_unlock(sb); ++ return err; ++} ++ ++int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file) ++{ ++ struct au_finfo *finfo; ++ aufs_bindex_t bindex; ++ ++ finfo = au_fi(file); ++ au_hbl_del(&finfo->fi_hlist, ++ &au_sbi(file->f_path.dentry->d_sb)->si_files); ++ bindex = finfo->fi_btop; ++ if (bindex >= 0) ++ au_set_h_fptr(file, bindex, NULL); ++ ++ au_finfo_fin(file); ++ return 0; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int au_do_flush_nondir(struct file *file, fl_owner_t id) ++{ ++ int err; ++ struct file *h_file; ++ ++ err = 0; ++ h_file = au_hf_top(file); ++ if (h_file) ++ err = vfsub_flush(h_file, id); ++ return err; ++} ++ ++static int aufs_flush_nondir(struct file *file, fl_owner_t id) ++{ ++ return au_do_flush(file, id, au_do_flush_nondir); ++} ++ ++/* ---------------------------------------------------------------------- */ ++/* ++ * read and write functions acquire [fdi]_rwsem once, but release before ++ * mmap_sem. This is because to stop a race condition between mmap(2). ++ * Releasing these aufs-rwsem should be safe, no branch-management (by keeping ++ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in ++ * read functions after [fdi]_rwsem are released, but it should be harmless. ++ */ ++ ++/* Callers should call au_read_post() or fput() in the end */ ++struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc) ++{ ++ struct file *h_file; ++ int err; ++ ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0, lsc); ++ if (!err) { ++ di_read_unlock(file->f_path.dentry, AuLock_IR); ++ h_file = au_hf_top(file); ++ get_file(h_file); ++ if (!keep_fi) ++ fi_read_unlock(file); ++ } else ++ h_file = ERR_PTR(err); ++ ++ return h_file; ++} ++ ++static void au_read_post(struct inode *inode, struct file *h_file) ++{ ++ /* update without lock, I don't think it a problem */ ++ fsstack_copy_attr_atime(inode, file_inode(h_file)); ++ fput(h_file); ++} ++ ++struct au_write_pre { ++ /* input */ ++ unsigned int lsc; ++ ++ /* output */ ++ blkcnt_t blks; ++ aufs_bindex_t btop; ++}; ++ ++/* ++ * return with iinfo is write-locked ++ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the ++ * end ++ */ ++static struct file *au_write_pre(struct file *file, int do_ready, ++ struct au_write_pre *wpre) ++{ ++ struct file *h_file; ++ struct dentry *dentry; ++ int err; ++ unsigned int lsc; ++ struct au_pin pin; ++ ++ lsc = 0; ++ if (wpre) ++ lsc = wpre->lsc; ++ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1, lsc); ++ h_file = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; ++ ++ dentry = file->f_path.dentry; ++ if (do_ready) { ++ err = au_ready_to_write(file, -1, &pin); ++ if (unlikely(err)) { ++ h_file = ERR_PTR(err); ++ di_write_unlock(dentry); ++ goto out_fi; ++ } ++ } ++ ++ di_downgrade_lock(dentry, /*flags*/0); ++ if (wpre) ++ wpre->btop = au_fbtop(file); ++ h_file = au_hf_top(file); ++ get_file(h_file); ++ if (wpre) ++ wpre->blks = file_inode(h_file)->i_blocks; ++ if (do_ready) ++ au_unpin(&pin); ++ di_read_unlock(dentry, /*flags*/0); ++ ++out_fi: ++ fi_write_unlock(file); ++out: ++ return h_file; ++} ++ ++static void au_write_post(struct inode *inode, struct file *h_file, ++ struct au_write_pre *wpre, ssize_t written) ++{ ++ struct inode *h_inode; ++ ++ au_cpup_attr_timesizes(inode); ++ AuDebugOn(au_ibtop(inode) != wpre->btop); ++ h_inode = file_inode(h_file); ++ inode->i_mode = h_inode->i_mode; ++ ii_write_unlock(inode); ++ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */ ++ if (written > 0) ++ au_fhsm_wrote(inode->i_sb, wpre->btop, ++ /*force*/h_inode->i_blocks > wpre->blks); ++ fput(h_file); ++} ++ ++/* ++ * todo: very ugly ++ * it locks both of i_mutex and si_rwsem for read in safe. ++ * if the plink maintenance mode continues forever (that is the problem), ++ * may loop forever. ++ */ ++static void au_mtx_and_read_lock(struct inode *inode) ++{ ++ int err; ++ struct super_block *sb = inode->i_sb; ++ ++ while (1) { ++ inode_lock(inode); ++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); ++ if (!err) ++ break; ++ inode_unlock(inode); ++ si_read_lock(sb, AuLock_NOPLMW); ++ si_read_unlock(sb); ++ } ++} ++ ++static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio, ++ struct iov_iter *iov_iter) ++{ ++ ssize_t err; ++ struct file *file; ++ ssize_t (*iter)(struct kiocb *, struct iov_iter *); ++ ++ err = security_file_permission(h_file, rw); ++ if (unlikely(err)) ++ goto out; ++ ++ err = -ENOSYS; /* the branch doesn't have its ->(read|write)_iter() */ ++ iter = NULL; ++ if (rw == MAY_READ) ++ iter = h_file->f_op->read_iter; ++ else if (rw == MAY_WRITE) ++ iter = h_file->f_op->write_iter; ++ ++ file = kio->ki_filp; ++ kio->ki_filp = h_file; ++ if (iter) { ++ lockdep_off(); ++ err = iter(kio, iov_iter); ++ lockdep_on(); ++ } else ++ /* currently there is no such fs */ ++ WARN_ON_ONCE(1); ++ kio->ki_filp = file; ++ ++out: ++ return err; ++} ++ ++static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter) ++{ ++ ssize_t err; ++ struct file *file, *h_file; ++ struct inode *inode; ++ struct super_block *sb; ++ ++ file = kio->ki_filp; ++ inode = file_inode(file); ++ sb = inode->i_sb; ++ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); ++ ++ h_file = au_read_pre(file, /*keep_fi*/1, /*lsc*/0); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; ++ ++ if (0 && au_test_loopback_kthread()) { ++ au_warn_loopback(h_file->f_path.dentry->d_sb); ++ if (file->f_mapping != h_file->f_mapping) { ++ file->f_mapping = h_file->f_mapping; ++ smp_mb(); /* unnecessary? */ ++ } ++ } ++ fi_read_unlock(file); ++ ++ err = au_do_iter(h_file, MAY_READ, kio, iov_iter); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ ++ au_read_post(inode, h_file); ++ ++out: ++ si_read_unlock(sb); ++ return err; ++} ++ ++static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter) ++{ ++ ssize_t err; ++ struct au_write_pre wpre; ++ struct inode *inode; ++ struct file *file, *h_file; ++ ++ file = kio->ki_filp; ++ inode = file_inode(file); ++ au_mtx_and_read_lock(inode); ++ ++ wpre.lsc = 0; ++ h_file = au_write_pre(file, /*do_ready*/1, &wpre); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; ++ ++ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter); ++ au_write_post(inode, h_file, &wpre, err); ++ ++out: ++ si_read_unlock(inode->i_sb); ++ inode_unlock(inode); ++ return err; ++} ++ ++/* ++ * We may be able to remove aufs_splice_{read,write}() since almost all FSes ++ * don't have their own .splice_{read,write} implimentations, and they use ++ * generic_file_splice_read() and iter_file_splice_write() who can act like the ++ * simple converters to f_op->iter_read() and ->iter_write(). ++ * But we keep our own implementations because some non-mainlined FSes may have ++ * their own .splice_{read,write} implimentations and aufs doesn't want to take ++ * away an opportunity to co-work with aufs from them. ++ */ ++static ssize_t aufs_splice_read(struct file *file, loff_t *ppos, ++ struct pipe_inode_info *pipe, size_t len, ++ unsigned int flags) ++{ ++ ssize_t err; ++ struct file *h_file; ++ struct inode *inode; ++ struct super_block *sb; ++ ++ inode = file_inode(file); ++ sb = inode->i_sb; ++ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); ++ ++ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; ++ ++ err = vfsub_splice_to(h_file, ppos, pipe, len, flags); ++ /* todo: necessary? */ ++ /* file->f_ra = h_file->f_ra; */ ++ au_read_post(inode, h_file); ++ ++out: ++ si_read_unlock(sb); ++ return err; ++} ++ ++static ssize_t ++aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos, ++ size_t len, unsigned int flags) ++{ ++ ssize_t err; ++ struct au_write_pre wpre; ++ struct inode *inode; ++ struct file *h_file; ++ ++ inode = file_inode(file); ++ au_mtx_and_read_lock(inode); ++ ++ wpre.lsc = 0; ++ h_file = au_write_pre(file, /*do_ready*/1, &wpre); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; ++ ++ err = vfsub_splice_from(pipe, h_file, ppos, len, flags); ++ au_write_post(inode, h_file, &wpre, err); ++ ++out: ++ si_read_unlock(inode->i_sb); ++ inode_unlock(inode); ++ return err; ++} ++ ++static long aufs_fallocate(struct file *file, int mode, loff_t offset, ++ loff_t len) ++{ ++ long err; ++ struct au_write_pre wpre; ++ struct inode *inode; ++ struct file *h_file; ++ ++ inode = file_inode(file); ++ au_mtx_and_read_lock(inode); ++ ++ wpre.lsc = 0; ++ h_file = au_write_pre(file, /*do_ready*/1, &wpre); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; ++ ++ lockdep_off(); ++ err = vfs_fallocate(h_file, mode, offset, len); ++ lockdep_on(); ++ au_write_post(inode, h_file, &wpre, /*written*/1); ++ ++out: ++ si_read_unlock(inode->i_sb); ++ inode_unlock(inode); ++ return err; ++} ++ ++static ssize_t aufs_copy_file_range(struct file *src, loff_t src_pos, ++ struct file *dst, loff_t dst_pos, ++ size_t len, unsigned int flags) ++{ ++ ssize_t err; ++ struct au_write_pre wpre; ++ enum { SRC, DST }; ++ struct { ++ struct inode *inode; ++ struct file *h_file; ++ struct super_block *h_sb; ++ } a[2]; ++#define a_src a[SRC] ++#define a_dst a[DST] ++ ++ err = -EINVAL; ++ a_src.inode = file_inode(src); ++ if (unlikely(!S_ISREG(a_src.inode->i_mode))) ++ goto out; ++ a_dst.inode = file_inode(dst); ++ if (unlikely(!S_ISREG(a_dst.inode->i_mode))) ++ goto out; ++ ++ au_mtx_and_read_lock(a_dst.inode); ++ /* ++ * in order to match the order in di_write_lock2_{child,parent}(), ++ * use f_path.dentry for this comparison. ++ */ ++ if (src->f_path.dentry < dst->f_path.dentry) { ++ a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_1); ++ err = PTR_ERR(a_src.h_file); ++ if (IS_ERR(a_src.h_file)) ++ goto out_si; ++ ++ wpre.lsc = AuLsc_FI_2; ++ a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre); ++ err = PTR_ERR(a_dst.h_file); ++ if (IS_ERR(a_dst.h_file)) { ++ au_read_post(a_src.inode, a_src.h_file); ++ goto out_si; ++ } ++ } else { ++ wpre.lsc = AuLsc_FI_1; ++ a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre); ++ err = PTR_ERR(a_dst.h_file); ++ if (IS_ERR(a_dst.h_file)) ++ goto out_si; ++ ++ a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_2); ++ err = PTR_ERR(a_src.h_file); ++ if (IS_ERR(a_src.h_file)) { ++ au_write_post(a_dst.inode, a_dst.h_file, &wpre, ++ /*written*/0); ++ goto out_si; ++ } ++ } ++ ++ err = -EXDEV; ++ a_src.h_sb = file_inode(a_src.h_file)->i_sb; ++ a_dst.h_sb = file_inode(a_dst.h_file)->i_sb; ++ if (unlikely(a_src.h_sb != a_dst.h_sb)) { ++ AuDbgFile(src); ++ AuDbgFile(dst); ++ goto out_file; ++ } ++ ++ err = vfsub_copy_file_range(a_src.h_file, src_pos, a_dst.h_file, ++ dst_pos, len, flags); ++ ++out_file: ++ au_write_post(a_dst.inode, a_dst.h_file, &wpre, err); ++ fi_read_unlock(src); ++ au_read_post(a_src.inode, a_src.h_file); ++out_si: ++ si_read_unlock(a_dst.inode->i_sb); ++ inode_unlock(a_dst.inode); ++out: ++ return err; ++#undef a_src ++#undef a_dst ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * The locking order around current->mmap_sem. ++ * - in most and regular cases ++ * file I/O syscall -- aufs_read() or something ++ * -- si_rwsem for read -- mmap_sem ++ * (Note that [fdi]i_rwsem are released before mmap_sem). ++ * - in mmap case ++ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem ++ * This AB-BA order is definitely bad, but is not a problem since "si_rwsem for ++ * read" allows multiple processes to acquire it and [fdi]i_rwsem are not held ++ * in file I/O. Aufs needs to stop lockdep in aufs_mmap() though. ++ * It means that when aufs acquires si_rwsem for write, the process should never ++ * acquire mmap_sem. ++ * ++ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a ++ * problem either since any directory is not able to be mmap-ed. ++ * The similar scenario is applied to aufs_readlink() too. ++ */ ++ ++#if 0 /* stop calling security_file_mmap() */ ++/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */ ++#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b) ++ ++static unsigned long au_arch_prot_conv(unsigned long flags) ++{ ++ /* currently ppc64 only */ ++#ifdef CONFIG_PPC64 ++ /* cf. linux/arch/powerpc/include/asm/mman.h */ ++ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO); ++ return AuConv_VM_PROT(flags, SAO); ++#else ++ AuDebugOn(arch_calc_vm_prot_bits(-1)); ++ return 0; ++#endif ++} ++ ++static unsigned long au_prot_conv(unsigned long flags) ++{ ++ return AuConv_VM_PROT(flags, READ) ++ | AuConv_VM_PROT(flags, WRITE) ++ | AuConv_VM_PROT(flags, EXEC) ++ | au_arch_prot_conv(flags); ++} ++ ++/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */ ++#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b) ++ ++static unsigned long au_flag_conv(unsigned long flags) ++{ ++ return AuConv_VM_MAP(flags, GROWSDOWN) ++ | AuConv_VM_MAP(flags, DENYWRITE) ++ | AuConv_VM_MAP(flags, LOCKED); ++} ++#endif ++ ++static int aufs_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ int err; ++ const unsigned char wlock ++ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED); ++ struct super_block *sb; ++ struct file *h_file; ++ struct inode *inode; ++ ++ AuDbgVmRegion(file, vma); ++ ++ inode = file_inode(file); ++ sb = inode->i_sb; ++ lockdep_off(); ++ si_read_lock(sb, AuLock_NOPLMW); ++ ++ h_file = au_write_pre(file, wlock, /*wpre*/NULL); ++ lockdep_on(); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; ++ ++ err = 0; ++ au_set_mmapped(file); ++ au_vm_file_reset(vma, h_file); ++ /* ++ * we cannot call security_mmap_file() here since it may acquire ++ * mmap_sem or i_mutex. ++ * ++ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags), ++ * au_flag_conv(vma->vm_flags)); ++ */ ++ if (!err) ++ err = call_mmap(h_file, vma); ++ if (!err) { ++ au_vm_prfile_set(vma, file); ++ fsstack_copy_attr_atime(inode, file_inode(h_file)); ++ goto out_fput; /* success */ ++ } ++ au_unset_mmapped(file); ++ au_vm_file_reset(vma, file); ++ ++out_fput: ++ lockdep_off(); ++ ii_write_unlock(inode); ++ lockdep_on(); ++ fput(h_file); ++out: ++ lockdep_off(); ++ si_read_unlock(sb); ++ lockdep_on(); ++ AuTraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end, ++ int datasync) ++{ ++ int err; ++ struct au_write_pre wpre; ++ struct inode *inode; ++ struct file *h_file; ++ ++ err = 0; /* -EBADF; */ /* posix? */ ++ if (unlikely(!(file->f_mode & FMODE_WRITE))) ++ goto out; ++ ++ inode = file_inode(file); ++ au_mtx_and_read_lock(inode); ++ ++ wpre.lsc = 0; ++ h_file = au_write_pre(file, /*do_ready*/1, &wpre); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out_unlock; ++ ++ err = vfsub_fsync(h_file, &h_file->f_path, datasync); ++ au_write_post(inode, h_file, &wpre, /*written*/0); ++ ++out_unlock: ++ si_read_unlock(inode->i_sb); ++ inode_unlock(inode); ++out: ++ return err; ++} ++ ++static int aufs_fasync(int fd, struct file *file, int flag) ++{ ++ int err; ++ struct file *h_file; ++ struct super_block *sb; ++ ++ sb = file->f_path.dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); ++ ++ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; ++ ++ if (h_file->f_op->fasync) ++ err = h_file->f_op->fasync(fd, h_file, flag); ++ fput(h_file); /* instead of au_read_post() */ ++ ++out: ++ si_read_unlock(sb); ++ return err; ++} ++ ++static int aufs_setfl(struct file *file, unsigned long arg) ++{ ++ int err; ++ struct file *h_file; ++ struct super_block *sb; ++ ++ sb = file->f_path.dentry->d_sb; ++ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); ++ ++ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out; ++ ++ /* stop calling h_file->fasync */ ++ arg |= vfsub_file_flags(file) & FASYNC; ++ err = setfl(/*unused fd*/-1, h_file, arg); ++ fput(h_file); /* instead of au_read_post() */ ++ ++out: ++ si_read_unlock(sb); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* no one supports this operation, currently */ ++#if 0 /* reserved for future use */ ++static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset, ++ size_t len, loff_t *pos, int more) ++{ ++} ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++const struct file_operations aufs_file_fop = { ++ .owner = THIS_MODULE, ++ ++ .llseek = default_llseek, ++ ++ .read_iter = aufs_read_iter, ++ .write_iter = aufs_write_iter, ++ ++#ifdef CONFIG_AUFS_POLL ++ .poll = aufs_poll, ++#endif ++ .unlocked_ioctl = aufs_ioctl_nondir, ++#ifdef CONFIG_COMPAT ++ .compat_ioctl = aufs_compat_ioctl_nondir, ++#endif ++ .mmap = aufs_mmap, ++ .open = aufs_open_nondir, ++ .flush = aufs_flush_nondir, ++ .release = aufs_release_nondir, ++ .fsync = aufs_fsync_nondir, ++ .fasync = aufs_fasync, ++ /* .sendpage = aufs_sendpage, */ ++ .setfl = aufs_setfl, ++ .splice_write = aufs_splice_write, ++ .splice_read = aufs_splice_read, ++#if 0 /* reserved for future use */ ++ .aio_splice_write = aufs_aio_splice_write, ++ .aio_splice_read = aufs_aio_splice_read, ++#endif ++ .fallocate = aufs_fallocate, ++ .copy_file_range = aufs_copy_file_range ++}; +diff --git a/fs/aufs/fhsm.c b/fs/aufs/fhsm.c +new file mode 100644 +index 000000000000..9f3f8ba3fa76 +--- /dev/null ++++ b/fs/aufs/fhsm.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -13274,9 +14435,11 @@ diff -ruN a/fs/aufs/fhsm.c b/fs/aufs/fhsm.c + if (u != AUFS_FHSM_CACHE_DEF_SEC) + seq_printf(seq, ",fhsm_sec=%u", u); +} -diff -ruN a/fs/aufs/file.c b/fs/aufs/file.c ---- a/fs/aufs/file.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/file.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/file.c b/fs/aufs/file.c +new file mode 100644 +index 000000000000..df012d8b923c +--- /dev/null ++++ b/fs/aufs/file.c @@ -0,0 +1,863 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -14141,9 +15304,11 @@ diff -ruN a/fs/aufs/file.c b/fs/aufs/file.c + .swap_deactivate = aufs_swap_deactivate +#endif /* CONFIG_AUFS_DEBUG */ +}; -diff -ruN a/fs/aufs/file.h b/fs/aufs/file.h ---- a/fs/aufs/file.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/file.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/file.h b/fs/aufs/file.h +new file mode 100644 +index 000000000000..ddaf01ae6d5b +--- /dev/null ++++ b/fs/aufs/file.h @@ -0,0 +1,342 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -14487,9 +15652,11 @@ diff -ruN a/fs/aufs/file.h b/fs/aufs/file.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_FILE_H__ */ -diff -ruN a/fs/aufs/finfo.c b/fs/aufs/finfo.c ---- a/fs/aufs/finfo.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/finfo.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/finfo.c b/fs/aufs/finfo.c +new file mode 100644 +index 000000000000..73e1be63e03a +--- /dev/null ++++ b/fs/aufs/finfo.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -14640,784 +15807,11 @@ diff -ruN a/fs/aufs/finfo.c b/fs/aufs/finfo.c +out: + return err; +} -diff -ruN a/fs/aufs/f_op.c b/fs/aufs/f_op.c ---- a/fs/aufs/f_op.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/f_op.c 2022-03-30 02:32:42.072758101 +0300 -@@ -0,0 +1,771 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2005-2021 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program. If not, see . -+ */ -+ -+/* -+ * file and vm operations -+ */ -+ -+#include -+#include -+#include -+#include -+#include "aufs.h" -+ -+int au_do_open_nondir(struct file *file, int flags, struct file *h_file) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ struct dentry *dentry, *h_dentry; -+ struct au_finfo *finfo; -+ struct inode *h_inode; -+ -+ FiMustWriteLock(file); -+ -+ err = 0; -+ dentry = file->f_path.dentry; -+ AuDebugOn(IS_ERR_OR_NULL(dentry)); -+ finfo = au_fi(file); -+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop)); -+ atomic_set(&finfo->fi_mmapped, 0); -+ bindex = au_dbtop(dentry); -+ if (!h_file) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb); -+ if (unlikely(err)) -+ goto out; -+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0); -+ if (IS_ERR(h_file)) { -+ err = PTR_ERR(h_file); -+ goto out; -+ } -+ } else { -+ h_dentry = h_file->f_path.dentry; -+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb); -+ if (unlikely(err)) -+ goto out; -+ /* br ref is already inc-ed */ -+ } -+ -+ if ((flags & __O_TMPFILE) -+ && !(flags & O_EXCL)) { -+ h_inode = file_inode(h_file); -+ spin_lock(&h_inode->i_lock); -+ h_inode->i_state |= I_LINKABLE; -+ spin_unlock(&h_inode->i_lock); -+ } -+ au_set_fbtop(file, bindex); -+ au_set_h_fptr(file, bindex, h_file); -+ au_update_figen(file); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ -+out: -+ return err; -+} -+ -+static int aufs_open_nondir(struct inode *inode __maybe_unused, -+ struct file *file) -+{ -+ int err; -+ struct super_block *sb; -+ struct au_do_open_args args = { -+ .open = au_do_open_nondir -+ }; -+ -+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n", -+ file, vfsub_file_flags(file), file->f_mode); -+ -+ sb = file->f_path.dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_do_open(file, &args); -+ si_read_unlock(sb); -+ return err; -+} -+ -+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file) -+{ -+ struct au_finfo *finfo; -+ aufs_bindex_t bindex; -+ -+ finfo = au_fi(file); -+ au_hbl_del(&finfo->fi_hlist, -+ &au_sbi(file->f_path.dentry->d_sb)->si_files); -+ bindex = finfo->fi_btop; -+ if (bindex >= 0) -+ au_set_h_fptr(file, bindex, NULL); -+ -+ au_finfo_fin(file); -+ return 0; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_do_flush_nondir(struct file *file, fl_owner_t id) -+{ -+ int err; -+ struct file *h_file; -+ -+ err = 0; -+ h_file = au_hf_top(file); -+ if (h_file) -+ err = vfsub_flush(h_file, id); -+ return err; -+} -+ -+static int aufs_flush_nondir(struct file *file, fl_owner_t id) -+{ -+ return au_do_flush(file, id, au_do_flush_nondir); -+} -+ -+/* ---------------------------------------------------------------------- */ -+/* -+ * read and write functions acquire [fdi]_rwsem once, but release before -+ * mmap_sem. This is because to stop a race condition between mmap(2). -+ * Releasing these aufs-rwsem should be safe, no branch-management (by keeping -+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in -+ * read functions after [fdi]_rwsem are released, but it should be harmless. -+ */ -+ -+/* Callers should call au_read_post() or fput() in the end */ -+struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc) -+{ -+ struct file *h_file; -+ int err; -+ -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0, lsc); -+ if (!err) { -+ di_read_unlock(file->f_path.dentry, AuLock_IR); -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ if (!keep_fi) -+ fi_read_unlock(file); -+ } else -+ h_file = ERR_PTR(err); -+ -+ return h_file; -+} -+ -+static void au_read_post(struct inode *inode, struct file *h_file) -+{ -+ /* update without lock, I don't think it a problem */ -+ fsstack_copy_attr_atime(inode, file_inode(h_file)); -+ fput(h_file); -+} -+ -+struct au_write_pre { -+ /* input */ -+ unsigned int lsc; -+ -+ /* output */ -+ blkcnt_t blks; -+ aufs_bindex_t btop; -+}; -+ -+/* -+ * return with iinfo is write-locked -+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the -+ * end -+ */ -+static struct file *au_write_pre(struct file *file, int do_ready, -+ struct au_write_pre *wpre) -+{ -+ struct file *h_file; -+ struct dentry *dentry; -+ int err; -+ unsigned int lsc; -+ struct au_pin pin; -+ -+ lsc = 0; -+ if (wpre) -+ lsc = wpre->lsc; -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1, lsc); -+ h_file = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; -+ -+ dentry = file->f_path.dentry; -+ if (do_ready) { -+ err = au_ready_to_write(file, -1, &pin); -+ if (unlikely(err)) { -+ h_file = ERR_PTR(err); -+ di_write_unlock(dentry); -+ goto out_fi; -+ } -+ } -+ -+ di_downgrade_lock(dentry, /*flags*/0); -+ if (wpre) -+ wpre->btop = au_fbtop(file); -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ if (wpre) -+ wpre->blks = file_inode(h_file)->i_blocks; -+ if (do_ready) -+ au_unpin(&pin); -+ di_read_unlock(dentry, /*flags*/0); -+ -+out_fi: -+ fi_write_unlock(file); -+out: -+ return h_file; -+} -+ -+static void au_write_post(struct inode *inode, struct file *h_file, -+ struct au_write_pre *wpre, ssize_t written) -+{ -+ struct inode *h_inode; -+ -+ au_cpup_attr_timesizes(inode); -+ AuDebugOn(au_ibtop(inode) != wpre->btop); -+ h_inode = file_inode(h_file); -+ inode->i_mode = h_inode->i_mode; -+ ii_write_unlock(inode); -+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */ -+ if (written > 0) -+ au_fhsm_wrote(inode->i_sb, wpre->btop, -+ /*force*/h_inode->i_blocks > wpre->blks); -+ fput(h_file); -+} -+ -+/* -+ * todo: very ugly -+ * it locks both of i_mutex and si_rwsem for read in safe. -+ * if the plink maintenance mode continues forever (that is the problem), -+ * may loop forever. -+ */ -+static void au_mtx_and_read_lock(struct inode *inode) -+{ -+ int err; -+ struct super_block *sb = inode->i_sb; -+ -+ while (1) { -+ inode_lock(inode); -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (!err) -+ break; -+ inode_unlock(inode); -+ si_read_lock(sb, AuLock_NOPLMW); -+ si_read_unlock(sb); -+ } -+} -+ -+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio, -+ struct iov_iter *iov_iter) -+{ -+ ssize_t err; -+ struct file *file; -+ ssize_t (*iter)(struct kiocb *, struct iov_iter *); -+ -+ err = security_file_permission(h_file, rw); -+ if (unlikely(err)) -+ goto out; -+ -+ err = -ENOSYS; /* the branch doesn't have its ->(read|write)_iter() */ -+ iter = NULL; -+ if (rw == MAY_READ) -+ iter = h_file->f_op->read_iter; -+ else if (rw == MAY_WRITE) -+ iter = h_file->f_op->write_iter; -+ -+ file = kio->ki_filp; -+ kio->ki_filp = h_file; -+ if (iter) { -+ lockdep_off(); -+ err = iter(kio, iov_iter); -+ lockdep_on(); -+ } else -+ /* currently there is no such fs */ -+ WARN_ON_ONCE(1); -+ kio->ki_filp = file; -+ -+out: -+ return err; -+} -+ -+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter) -+{ -+ ssize_t err; -+ struct file *file, *h_file; -+ struct inode *inode; -+ struct super_block *sb; -+ -+ file = kio->ki_filp; -+ inode = file_inode(file); -+ sb = inode->i_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ -+ h_file = au_read_pre(file, /*keep_fi*/1, /*lsc*/0); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ -+ if (0 && au_test_loopback_kthread()) { -+ au_warn_loopback(h_file->f_path.dentry->d_sb); -+ if (file->f_mapping != h_file->f_mapping) { -+ file->f_mapping = h_file->f_mapping; -+ smp_mb(); /* unnecessary? */ -+ } -+ } -+ fi_read_unlock(file); -+ -+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ au_read_post(inode, h_file); -+ -+out: -+ si_read_unlock(sb); -+ return err; -+} -+ -+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter) -+{ -+ ssize_t err; -+ struct au_write_pre wpre; -+ struct inode *inode; -+ struct file *file, *h_file; -+ -+ file = kio->ki_filp; -+ inode = file_inode(file); -+ au_mtx_and_read_lock(inode); -+ -+ wpre.lsc = 0; -+ h_file = au_write_pre(file, /*do_ready*/1, &wpre); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ -+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter); -+ au_write_post(inode, h_file, &wpre, err); -+ -+out: -+ si_read_unlock(inode->i_sb); -+ inode_unlock(inode); -+ return err; -+} -+ -+/* -+ * We may be able to remove aufs_splice_{read,write}() since almost all FSes -+ * don't have their own .splice_{read,write} implimentations, and they use -+ * generic_file_splice_read() and iter_file_splice_write() who can act like the -+ * simple converters to f_op->iter_read() and ->iter_write(). -+ * But we keep our own implementations because some non-mainlined FSes may have -+ * their own .splice_{read,write} implimentations and aufs doesn't want to take -+ * away an opportunity to co-work with aufs from them. -+ */ -+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos, -+ struct pipe_inode_info *pipe, size_t len, -+ unsigned int flags) -+{ -+ ssize_t err; -+ struct file *h_file; -+ struct inode *inode; -+ struct super_block *sb; -+ -+ inode = file_inode(file); -+ sb = inode->i_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ -+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ -+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ au_read_post(inode, h_file); -+ -+out: -+ si_read_unlock(sb); -+ return err; -+} -+ -+static ssize_t -+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos, -+ size_t len, unsigned int flags) -+{ -+ ssize_t err; -+ struct au_write_pre wpre; -+ struct inode *inode; -+ struct file *h_file; -+ -+ inode = file_inode(file); -+ au_mtx_and_read_lock(inode); -+ -+ wpre.lsc = 0; -+ h_file = au_write_pre(file, /*do_ready*/1, &wpre); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ -+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags); -+ au_write_post(inode, h_file, &wpre, err); -+ -+out: -+ si_read_unlock(inode->i_sb); -+ inode_unlock(inode); -+ return err; -+} -+ -+static long aufs_fallocate(struct file *file, int mode, loff_t offset, -+ loff_t len) -+{ -+ long err; -+ struct au_write_pre wpre; -+ struct inode *inode; -+ struct file *h_file; -+ -+ inode = file_inode(file); -+ au_mtx_and_read_lock(inode); -+ -+ wpre.lsc = 0; -+ h_file = au_write_pre(file, /*do_ready*/1, &wpre); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ -+ lockdep_off(); -+ err = vfs_fallocate(h_file, mode, offset, len); -+ lockdep_on(); -+ au_write_post(inode, h_file, &wpre, /*written*/1); -+ -+out: -+ si_read_unlock(inode->i_sb); -+ inode_unlock(inode); -+ return err; -+} -+ -+static ssize_t aufs_copy_file_range(struct file *src, loff_t src_pos, -+ struct file *dst, loff_t dst_pos, -+ size_t len, unsigned int flags) -+{ -+ ssize_t err; -+ struct au_write_pre wpre; -+ enum { SRC, DST }; -+ struct { -+ struct inode *inode; -+ struct file *h_file; -+ struct super_block *h_sb; -+ } a[2]; -+#define a_src a[SRC] -+#define a_dst a[DST] -+ -+ err = -EINVAL; -+ a_src.inode = file_inode(src); -+ if (unlikely(!S_ISREG(a_src.inode->i_mode))) -+ goto out; -+ a_dst.inode = file_inode(dst); -+ if (unlikely(!S_ISREG(a_dst.inode->i_mode))) -+ goto out; -+ -+ au_mtx_and_read_lock(a_dst.inode); -+ /* -+ * in order to match the order in di_write_lock2_{child,parent}(), -+ * use f_path.dentry for this comparison. -+ */ -+ if (src->f_path.dentry < dst->f_path.dentry) { -+ a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_1); -+ err = PTR_ERR(a_src.h_file); -+ if (IS_ERR(a_src.h_file)) -+ goto out_si; -+ -+ wpre.lsc = AuLsc_FI_2; -+ a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre); -+ err = PTR_ERR(a_dst.h_file); -+ if (IS_ERR(a_dst.h_file)) { -+ au_read_post(a_src.inode, a_src.h_file); -+ goto out_si; -+ } -+ } else { -+ wpre.lsc = AuLsc_FI_1; -+ a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre); -+ err = PTR_ERR(a_dst.h_file); -+ if (IS_ERR(a_dst.h_file)) -+ goto out_si; -+ -+ a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_2); -+ err = PTR_ERR(a_src.h_file); -+ if (IS_ERR(a_src.h_file)) { -+ au_write_post(a_dst.inode, a_dst.h_file, &wpre, -+ /*written*/0); -+ goto out_si; -+ } -+ } -+ -+ err = -EXDEV; -+ a_src.h_sb = file_inode(a_src.h_file)->i_sb; -+ a_dst.h_sb = file_inode(a_dst.h_file)->i_sb; -+ if (unlikely(a_src.h_sb != a_dst.h_sb)) { -+ AuDbgFile(src); -+ AuDbgFile(dst); -+ goto out_file; -+ } -+ -+ err = vfsub_copy_file_range(a_src.h_file, src_pos, a_dst.h_file, -+ dst_pos, len, flags); -+ -+out_file: -+ au_write_post(a_dst.inode, a_dst.h_file, &wpre, err); -+ fi_read_unlock(src); -+ au_read_post(a_src.inode, a_src.h_file); -+out_si: -+ si_read_unlock(a_dst.inode->i_sb); -+ inode_unlock(a_dst.inode); -+out: -+ return err; -+#undef a_src -+#undef a_dst -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * The locking order around current->mmap_sem. -+ * - in most and regular cases -+ * file I/O syscall -- aufs_read() or something -+ * -- si_rwsem for read -- mmap_sem -+ * (Note that [fdi]i_rwsem are released before mmap_sem). -+ * - in mmap case -+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem -+ * This AB-BA order is definitely bad, but is not a problem since "si_rwsem for -+ * read" allows multiple processes to acquire it and [fdi]i_rwsem are not held -+ * in file I/O. Aufs needs to stop lockdep in aufs_mmap() though. -+ * It means that when aufs acquires si_rwsem for write, the process should never -+ * acquire mmap_sem. -+ * -+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a -+ * problem either since any directory is not able to be mmap-ed. -+ * The similar scenario is applied to aufs_readlink() too. -+ */ -+ -+#if 0 /* stop calling security_file_mmap() */ -+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */ -+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b) -+ -+static unsigned long au_arch_prot_conv(unsigned long flags) -+{ -+ /* currently ppc64 only */ -+#ifdef CONFIG_PPC64 -+ /* cf. linux/arch/powerpc/include/asm/mman.h */ -+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO); -+ return AuConv_VM_PROT(flags, SAO); -+#else -+ AuDebugOn(arch_calc_vm_prot_bits(-1)); -+ return 0; -+#endif -+} -+ -+static unsigned long au_prot_conv(unsigned long flags) -+{ -+ return AuConv_VM_PROT(flags, READ) -+ | AuConv_VM_PROT(flags, WRITE) -+ | AuConv_VM_PROT(flags, EXEC) -+ | au_arch_prot_conv(flags); -+} -+ -+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */ -+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b) -+ -+static unsigned long au_flag_conv(unsigned long flags) -+{ -+ return AuConv_VM_MAP(flags, GROWSDOWN) -+ | AuConv_VM_MAP(flags, DENYWRITE) -+ | AuConv_VM_MAP(flags, LOCKED); -+} -+#endif -+ -+static int aufs_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ int err; -+ const unsigned char wlock -+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED); -+ struct super_block *sb; -+ struct file *h_file; -+ struct inode *inode; -+ -+ AuDbgVmRegion(file, vma); -+ -+ inode = file_inode(file); -+ sb = inode->i_sb; -+ lockdep_off(); -+ si_read_lock(sb, AuLock_NOPLMW); -+ -+ h_file = au_write_pre(file, wlock, /*wpre*/NULL); -+ lockdep_on(); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ -+ err = 0; -+ au_set_mmapped(file); -+ au_vm_file_reset(vma, h_file); -+ /* -+ * we cannot call security_mmap_file() here since it may acquire -+ * mmap_sem or i_mutex. -+ * -+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags), -+ * au_flag_conv(vma->vm_flags)); -+ */ -+ if (!err) -+ err = call_mmap(h_file, vma); -+ if (!err) { -+ au_vm_prfile_set(vma, file); -+ fsstack_copy_attr_atime(inode, file_inode(h_file)); -+ goto out_fput; /* success */ -+ } -+ au_unset_mmapped(file); -+ au_vm_file_reset(vma, file); -+ -+out_fput: -+ lockdep_off(); -+ ii_write_unlock(inode); -+ lockdep_on(); -+ fput(h_file); -+out: -+ lockdep_off(); -+ si_read_unlock(sb); -+ lockdep_on(); -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end, -+ int datasync) -+{ -+ int err; -+ struct au_write_pre wpre; -+ struct inode *inode; -+ struct file *h_file; -+ -+ err = 0; /* -EBADF; */ /* posix? */ -+ if (unlikely(!(file->f_mode & FMODE_WRITE))) -+ goto out; -+ -+ inode = file_inode(file); -+ au_mtx_and_read_lock(inode); -+ -+ wpre.lsc = 0; -+ h_file = au_write_pre(file, /*do_ready*/1, &wpre); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out_unlock; -+ -+ err = vfsub_fsync(h_file, &h_file->f_path, datasync); -+ au_write_post(inode, h_file, &wpre, /*written*/0); -+ -+out_unlock: -+ si_read_unlock(inode->i_sb); -+ inode_unlock(inode); -+out: -+ return err; -+} -+ -+static int aufs_fasync(int fd, struct file *file, int flag) -+{ -+ int err; -+ struct file *h_file; -+ struct super_block *sb; -+ -+ sb = file->f_path.dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ -+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ -+ if (h_file->f_op->fasync) -+ err = h_file->f_op->fasync(fd, h_file, flag); -+ fput(h_file); /* instead of au_read_post() */ -+ -+out: -+ si_read_unlock(sb); -+ return err; -+} -+ -+static int aufs_setfl(struct file *file, unsigned long arg) -+{ -+ int err; -+ struct file *h_file; -+ struct super_block *sb; -+ -+ sb = file->f_path.dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ -+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ -+ /* stop calling h_file->fasync */ -+ arg |= vfsub_file_flags(file) & FASYNC; -+ err = setfl(/*unused fd*/-1, h_file, arg); -+ fput(h_file); /* instead of au_read_post() */ -+ -+out: -+ si_read_unlock(sb); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* no one supports this operation, currently */ -+#if 0 /* reserved for future use */ -+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset, -+ size_t len, loff_t *pos, int more) -+{ -+} -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+const struct file_operations aufs_file_fop = { -+ .owner = THIS_MODULE, -+ -+ .llseek = default_llseek, -+ -+ .read_iter = aufs_read_iter, -+ .write_iter = aufs_write_iter, -+ -+#ifdef CONFIG_AUFS_POLL -+ .poll = aufs_poll, -+#endif -+ .unlocked_ioctl = aufs_ioctl_nondir, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = aufs_compat_ioctl_nondir, -+#endif -+ .mmap = aufs_mmap, -+ .open = aufs_open_nondir, -+ .flush = aufs_flush_nondir, -+ .release = aufs_release_nondir, -+ .fsync = aufs_fsync_nondir, -+ .fasync = aufs_fasync, -+ /* .sendpage = aufs_sendpage, */ -+ .setfl = aufs_setfl, -+ .splice_write = aufs_splice_write, -+ .splice_read = aufs_splice_read, -+#if 0 /* reserved for future use */ -+ .aio_splice_write = aufs_aio_splice_write, -+ .aio_splice_read = aufs_aio_splice_read, -+#endif -+ .fallocate = aufs_fallocate, -+ .copy_file_range = aufs_copy_file_range -+}; -diff -ruN a/fs/aufs/fsctx.c b/fs/aufs/fsctx.c ---- a/fs/aufs/fsctx.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/fsctx.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/fsctx.c b/fs/aufs/fsctx.c +new file mode 100644 +index 000000000000..e5622fc17d60 +--- /dev/null ++++ b/fs/aufs/fsctx.c @@ -0,0 +1,1242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -16661,9 +17055,11 @@ diff -ruN a/fs/aufs/fsctx.c b/fs/aufs/fsctx.c + AuTraceErr(err); + return err; +} -diff -ruN a/fs/aufs/fstype.h b/fs/aufs/fstype.h ---- a/fs/aufs/fstype.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/fstype.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/fstype.h b/fs/aufs/fstype.h +new file mode 100644 +index 000000000000..33e7f2dc8a95 +--- /dev/null ++++ b/fs/aufs/fstype.h @@ -0,0 +1,401 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -17066,9 +17462,11 @@ diff -ruN a/fs/aufs/fstype.h b/fs/aufs/fstype.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_FSTYPE_H__ */ -diff -ruN a/fs/aufs/hbl.h b/fs/aufs/hbl.h ---- a/fs/aufs/hbl.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/hbl.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/hbl.h b/fs/aufs/hbl.h +new file mode 100644 +index 000000000000..33b6f7da81eb +--- /dev/null ++++ b/fs/aufs/hbl.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -17135,9 +17533,11 @@ diff -ruN a/fs/aufs/hbl.h b/fs/aufs/hbl.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_HBL_H__ */ -diff -ruN a/fs/aufs/hfsnotify.c b/fs/aufs/hfsnotify.c ---- a/fs/aufs/hfsnotify.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/hfsnotify.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/hfsnotify.c b/fs/aufs/hfsnotify.c +new file mode 100644 +index 000000000000..b029fa2085a8 +--- /dev/null ++++ b/fs/aufs/hfsnotify.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -17427,9 +17827,11 @@ diff -ruN a/fs/aufs/hfsnotify.c b/fs/aufs/hfsnotify.c + .fin_br = au_hfsn_fin_br, + .init_br = au_hfsn_init_br +}; -diff -ruN a/fs/aufs/hfsplus.c b/fs/aufs/hfsplus.c ---- a/fs/aufs/hfsplus.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/hfsplus.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/hfsplus.c b/fs/aufs/hfsplus.c +new file mode 100644 +index 000000000000..e65d87b3fcc1 +--- /dev/null ++++ b/fs/aufs/hfsplus.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -17491,9 +17893,11 @@ diff -ruN a/fs/aufs/hfsplus.c b/fs/aufs/hfsplus.c + au_lcnt_dec(&br->br_nfiles); + } +} -diff -ruN a/fs/aufs/hnotify.c b/fs/aufs/hnotify.c ---- a/fs/aufs/hnotify.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/hnotify.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/hnotify.c b/fs/aufs/hnotify.c +new file mode 100644 +index 000000000000..5c4d95bf33c2 +--- /dev/null ++++ b/fs/aufs/hnotify.c @@ -0,0 +1,715 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -18210,2712 +18614,11 @@ diff -ruN a/fs/aufs/hnotify.c b/fs/aufs/hnotify.c + if (au_cache[AuCache_HNOTIFY]) + au_hn_destroy_cache(); +} -diff -ruN a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c ---- a/fs/aufs/iinfo.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/iinfo.c 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,286 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2005-2021 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program. If not, see . -+ */ -+ -+/* -+ * inode private data -+ */ -+ -+#include "aufs.h" -+ -+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex) -+{ -+ struct inode *h_inode; -+ struct au_hinode *hinode; -+ -+ IiMustAnyLock(inode); -+ -+ hinode = au_hinode(au_ii(inode), bindex); -+ h_inode = hinode->hi_inode; -+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); -+ return h_inode; -+} -+ -+/* todo: hard/soft set? */ -+void au_hiput(struct au_hinode *hinode) -+{ -+ au_hn_free(hinode); -+ dput(hinode->hi_whdentry); -+ iput(hinode->hi_inode); -+} -+ -+unsigned int au_hi_flags(struct inode *inode, int isdir) -+{ -+ unsigned int flags; -+ const unsigned int mnt_flags = au_mntflags(inode->i_sb); -+ -+ flags = 0; -+ if (au_opt_test(mnt_flags, XINO)) -+ au_fset_hi(flags, XINO); -+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY)) -+ au_fset_hi(flags, HNOTIFY); -+ return flags; -+} -+ -+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode, unsigned int flags) -+{ -+ struct au_hinode *hinode; -+ struct inode *hi; -+ struct au_iinfo *iinfo = au_ii(inode); -+ -+ IiMustWriteLock(inode); -+ -+ hinode = au_hinode(iinfo, bindex); -+ hi = hinode->hi_inode; -+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); -+ -+ if (hi) -+ au_hiput(hinode); -+ hinode->hi_inode = h_inode; -+ if (h_inode) { -+ int err; -+ struct super_block *sb = inode->i_sb; -+ struct au_branch *br; -+ -+ AuDebugOn(inode->i_mode -+ && (h_inode->i_mode & S_IFMT) -+ != (inode->i_mode & S_IFMT)); -+ if (bindex == iinfo->ii_btop) -+ au_cpup_igen(inode, h_inode); -+ br = au_sbr(sb, bindex); -+ hinode->hi_id = br->br_id; -+ if (au_ftest_hi(flags, XINO)) { -+ err = au_xino_write(sb, bindex, h_inode->i_ino, -+ inode->i_ino); -+ if (unlikely(err)) -+ AuIOErr1("failed au_xino_write() %d\n", err); -+ } -+ -+ if (au_ftest_hi(flags, HNOTIFY) -+ && au_br_hnotifyable(br->br_perm)) { -+ err = au_hn_alloc(hinode, inode); -+ if (unlikely(err)) -+ AuIOErr1("au_hn_alloc() %d\n", err); -+ } -+ } -+} -+ -+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_wh) -+{ -+ struct au_hinode *hinode; -+ -+ IiMustWriteLock(inode); -+ -+ hinode = au_hinode(au_ii(inode), bindex); -+ AuDebugOn(hinode->hi_whdentry); -+ hinode->hi_whdentry = h_wh; -+} -+ -+void au_update_iigen(struct inode *inode, int half) -+{ -+ struct au_iinfo *iinfo; -+ struct au_iigen *iigen; -+ unsigned int sigen; -+ -+ sigen = au_sigen(inode->i_sb); -+ iinfo = au_ii(inode); -+ iigen = &iinfo->ii_generation; -+ spin_lock(&iigen->ig_spin); -+ iigen->ig_generation = sigen; -+ if (half) -+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED); -+ else -+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED); -+ spin_unlock(&iigen->ig_spin); -+} -+ -+/* it may be called at remount time, too */ -+void au_update_ibrange(struct inode *inode, int do_put_zero) -+{ -+ struct au_iinfo *iinfo; -+ aufs_bindex_t bindex, bbot; -+ -+ AuDebugOn(au_is_bad_inode(inode)); -+ IiMustWriteLock(inode); -+ -+ iinfo = au_ii(inode); -+ if (do_put_zero && iinfo->ii_btop >= 0) { -+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; -+ bindex++) { -+ struct inode *h_i; -+ -+ h_i = au_hinode(iinfo, bindex)->hi_inode; -+ if (h_i -+ && !h_i->i_nlink -+ && !(h_i->i_state & I_LINKABLE)) -+ au_set_h_iptr(inode, bindex, NULL, 0); -+ } -+ } -+ -+ iinfo->ii_btop = -1; -+ iinfo->ii_bbot = -1; -+ bbot = au_sbbot(inode->i_sb); -+ for (bindex = 0; bindex <= bbot; bindex++) -+ if (au_hinode(iinfo, bindex)->hi_inode) { -+ iinfo->ii_btop = bindex; -+ break; -+ } -+ if (iinfo->ii_btop >= 0) -+ for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--) -+ if (au_hinode(iinfo, bindex)->hi_inode) { -+ iinfo->ii_bbot = bindex; -+ break; -+ } -+ AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_icntnr_init_once(void *_c) -+{ -+ struct au_icntnr *c = _c; -+ struct au_iinfo *iinfo = &c->iinfo; -+ -+ spin_lock_init(&iinfo->ii_generation.ig_spin); -+ au_rw_init(&iinfo->ii_rwsem); -+ inode_init_once(&c->vfs_inode); -+} -+ -+void au_hinode_init(struct au_hinode *hinode) -+{ -+ hinode->hi_inode = NULL; -+ hinode->hi_id = -1; -+ au_hn_init(hinode); -+ hinode->hi_whdentry = NULL; -+} -+ -+int au_iinfo_init(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; -+ struct super_block *sb; -+ struct au_hinode *hi; -+ int nbr, i; -+ -+ sb = inode->i_sb; -+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); -+ nbr = au_sbbot(sb) + 1; -+ if (unlikely(nbr <= 0)) -+ nbr = 1; -+ hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS); -+ if (hi) { -+ au_lcnt_inc(&au_sbi(sb)->si_ninodes); -+ -+ iinfo->ii_hinode = hi; -+ for (i = 0; i < nbr; i++, hi++) -+ au_hinode_init(hi); -+ -+ iinfo->ii_generation.ig_generation = au_sigen(sb); -+ iinfo->ii_btop = -1; -+ iinfo->ii_bbot = -1; -+ iinfo->ii_vdir = NULL; -+ return 0; -+ } -+ return -ENOMEM; -+} -+ -+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink) -+{ -+ int err, i; -+ struct au_hinode *hip; -+ -+ AuRwMustWriteLock(&iinfo->ii_rwsem); -+ -+ err = -ENOMEM; -+ hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS, -+ may_shrink); -+ if (hip) { -+ iinfo->ii_hinode = hip; -+ i = iinfo->ii_bbot + 1; -+ hip += i; -+ for (; i < nbr; i++, hip++) -+ au_hinode_init(hip); -+ err = 0; -+ } -+ -+ return err; -+} -+ -+void au_iinfo_fin(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; -+ struct au_hinode *hi; -+ struct super_block *sb; -+ aufs_bindex_t bindex, bbot; -+ const unsigned char unlinked = !inode->i_nlink; -+ -+ AuDebugOn(au_is_bad_inode(inode)); -+ -+ sb = inode->i_sb; -+ au_lcnt_dec(&au_sbi(sb)->si_ninodes); -+ if (si_pid_test(sb)) -+ au_xino_delete_inode(inode, unlinked); -+ else { -+ /* -+ * it is safe to hide the dependency between sbinfo and -+ * sb->s_umount. -+ */ -+ lockdep_off(); -+ si_noflush_read_lock(sb); -+ au_xino_delete_inode(inode, unlinked); -+ si_read_unlock(sb); -+ lockdep_on(); -+ } -+ -+ iinfo = au_ii(inode); -+ if (iinfo->ii_vdir) -+ au_vdir_free(iinfo->ii_vdir); -+ -+ bindex = iinfo->ii_btop; -+ if (bindex >= 0) { -+ hi = au_hinode(iinfo, bindex); -+ bbot = iinfo->ii_bbot; -+ while (bindex++ <= bbot) { -+ if (hi->hi_inode) -+ au_hiput(hi); -+ hi++; -+ } -+ } -+ au_kfree_rcu(iinfo->ii_hinode); -+ AuRwDestroy(&iinfo->ii_rwsem); -+} -diff -ruN a/fs/aufs/inode.c b/fs/aufs/inode.c ---- a/fs/aufs/inode.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/inode.c 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,531 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2005-2021 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program. If not, see . -+ */ -+ -+/* -+ * inode functions -+ */ -+ -+#include -+#include "aufs.h" -+ -+struct inode *au_igrab(struct inode *inode) -+{ -+ if (inode) { -+ AuDebugOn(!atomic_read(&inode->i_count)); -+ ihold(inode); -+ } -+ return inode; -+} -+ -+static void au_refresh_hinode_attr(struct inode *inode, int do_version) -+{ -+ au_cpup_attr_all(inode, /*force*/0); -+ au_update_iigen(inode, /*half*/1); -+ if (do_version) -+ inode_inc_iversion(inode); -+} -+ -+static int au_ii_refresh(struct inode *inode, int *update) -+{ -+ int err, e, nbr; -+ umode_t type; -+ aufs_bindex_t bindex, new_bindex; -+ struct super_block *sb; -+ struct au_iinfo *iinfo; -+ struct au_hinode *p, *q, tmp; -+ -+ AuDebugOn(au_is_bad_inode(inode)); -+ IiMustWriteLock(inode); -+ -+ *update = 0; -+ sb = inode->i_sb; -+ nbr = au_sbbot(sb) + 1; -+ type = inode->i_mode & S_IFMT; -+ iinfo = au_ii(inode); -+ err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0); -+ if (unlikely(err)) -+ goto out; -+ -+ AuDebugOn(iinfo->ii_btop < 0); -+ p = au_hinode(iinfo, iinfo->ii_btop); -+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; -+ bindex++, p++) { -+ if (!p->hi_inode) -+ continue; -+ -+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT)); -+ new_bindex = au_br_index(sb, p->hi_id); -+ if (new_bindex == bindex) -+ continue; -+ -+ if (new_bindex < 0) { -+ *update = 1; -+ au_hiput(p); -+ p->hi_inode = NULL; -+ continue; -+ } -+ -+ if (new_bindex < iinfo->ii_btop) -+ iinfo->ii_btop = new_bindex; -+ if (iinfo->ii_bbot < new_bindex) -+ iinfo->ii_bbot = new_bindex; -+ /* swap two lower inode, and loop again */ -+ q = au_hinode(iinfo, new_bindex); -+ tmp = *q; -+ *q = *p; -+ *p = tmp; -+ if (tmp.hi_inode) { -+ bindex--; -+ p--; -+ } -+ } -+ au_update_ibrange(inode, /*do_put_zero*/0); -+ au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */ -+ e = au_dy_irefresh(inode); -+ if (unlikely(e && !err)) -+ err = e; -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+void au_refresh_iop(struct inode *inode, int force_getattr) -+{ -+ int type; -+ struct au_sbinfo *sbi = au_sbi(inode->i_sb); -+ const struct inode_operations *iop -+ = force_getattr ? aufs_iop : sbi->si_iop_array; -+ -+ if (inode->i_op == iop) -+ return; -+ -+ switch (inode->i_mode & S_IFMT) { -+ case S_IFDIR: -+ type = AuIop_DIR; -+ break; -+ case S_IFLNK: -+ type = AuIop_SYMLINK; -+ break; -+ default: -+ type = AuIop_OTHER; -+ break; -+ } -+ -+ inode->i_op = iop + type; -+ /* unnecessary smp_wmb() */ -+} -+ -+int au_refresh_hinode_self(struct inode *inode) -+{ -+ int err, update; -+ -+ err = au_ii_refresh(inode, &update); -+ if (!err) -+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode)); -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+int au_refresh_hinode(struct inode *inode, struct dentry *dentry) -+{ -+ int err, e, update; -+ unsigned int flags; -+ umode_t mode; -+ aufs_bindex_t bindex, bbot; -+ unsigned char isdir; -+ struct au_hinode *p; -+ struct au_iinfo *iinfo; -+ -+ err = au_ii_refresh(inode, &update); -+ if (unlikely(err)) -+ goto out; -+ -+ update = 0; -+ iinfo = au_ii(inode); -+ p = au_hinode(iinfo, iinfo->ii_btop); -+ mode = (inode->i_mode & S_IFMT); -+ isdir = S_ISDIR(mode); -+ flags = au_hi_flags(inode, isdir); -+ bbot = au_dbbot(dentry); -+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) { -+ struct inode *h_i, *h_inode; -+ struct dentry *h_d; -+ -+ h_d = au_h_dptr(dentry, bindex); -+ if (!h_d || d_is_negative(h_d)) -+ continue; -+ -+ h_inode = d_inode(h_d); -+ AuDebugOn(mode != (h_inode->i_mode & S_IFMT)); -+ if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) { -+ h_i = au_h_iptr(inode, bindex); -+ if (h_i) { -+ if (h_i == h_inode) -+ continue; -+ err = -EIO; -+ break; -+ } -+ } -+ if (bindex < iinfo->ii_btop) -+ iinfo->ii_btop = bindex; -+ if (iinfo->ii_bbot < bindex) -+ iinfo->ii_bbot = bindex; -+ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags); -+ update = 1; -+ } -+ au_update_ibrange(inode, /*do_put_zero*/0); -+ e = au_dy_irefresh(inode); -+ if (unlikely(e && !err)) -+ err = e; -+ if (!err) -+ au_refresh_hinode_attr(inode, update && isdir); -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static int set_inode(struct inode *inode, struct dentry *dentry) -+{ -+ int err; -+ unsigned int flags; -+ umode_t mode; -+ aufs_bindex_t bindex, btop, btail; -+ unsigned char isdir; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ struct au_iinfo *iinfo; -+ const struct inode_operations *iop; -+ -+ IiMustWriteLock(inode); -+ -+ err = 0; -+ isdir = 0; -+ iop = au_sbi(inode->i_sb)->si_iop_array; -+ btop = au_dbtop(dentry); -+ h_dentry = au_h_dptr(dentry, btop); -+ h_inode = d_inode(h_dentry); -+ mode = h_inode->i_mode; -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ btail = au_dbtail(dentry); -+ inode->i_op = iop + AuIop_OTHER; -+ inode->i_fop = &aufs_file_fop; -+ err = au_dy_iaop(inode, btop, h_inode); -+ if (unlikely(err)) -+ goto out; -+ break; -+ case S_IFDIR: -+ isdir = 1; -+ btail = au_dbtaildir(dentry); -+ inode->i_op = iop + AuIop_DIR; -+ inode->i_fop = &aufs_dir_fop; -+ break; -+ case S_IFLNK: -+ btail = au_dbtail(dentry); -+ inode->i_op = iop + AuIop_SYMLINK; -+ break; -+ case S_IFBLK: -+ case S_IFCHR: -+ case S_IFIFO: -+ case S_IFSOCK: -+ btail = au_dbtail(dentry); -+ inode->i_op = iop + AuIop_OTHER; -+ init_special_inode(inode, mode, h_inode->i_rdev); -+ break; -+ default: -+ AuIOErr("Unknown file type 0%o\n", mode); -+ err = -EIO; -+ goto out; -+ } -+ -+ /* do not set hnotify for whiteouted dirs (SHWH mode) */ -+ flags = au_hi_flags(inode, isdir); -+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH) -+ && au_ftest_hi(flags, HNOTIFY) -+ && dentry->d_name.len > AUFS_WH_PFX_LEN -+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) -+ au_fclr_hi(flags, HNOTIFY); -+ iinfo = au_ii(inode); -+ iinfo->ii_btop = btop; -+ iinfo->ii_bbot = btail; -+ for (bindex = btop; bindex <= btail; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry) -+ au_set_h_iptr(inode, bindex, -+ au_igrab(d_inode(h_dentry)), flags); -+ } -+ au_cpup_attr_all(inode, /*force*/1); -+ /* -+ * to force calling aufs_get_acl() every time, -+ * do not call cache_no_acl() for aufs inode. -+ */ -+ -+out: -+ return err; -+} -+ -+/* -+ * successful returns with iinfo write_locked -+ * minus: errno -+ * zero: success, matched -+ * plus: no error, but unmatched -+ */ -+static int reval_inode(struct inode *inode, struct dentry *dentry) -+{ -+ int err; -+ unsigned int gen, igflags; -+ aufs_bindex_t bindex, bbot; -+ struct inode *h_inode, *h_dinode; -+ struct dentry *h_dentry; -+ -+ /* -+ * before this function, if aufs got any iinfo lock, it must be only -+ * one, the parent dir. -+ * it can happen by UDBA and the obsoleted inode number. -+ */ -+ err = -EIO; -+ if (unlikely(inode->i_ino == parent_ino(dentry))) -+ goto out; -+ -+ err = 1; -+ ii_write_lock_new_child(inode); -+ h_dentry = au_h_dptr(dentry, au_dbtop(dentry)); -+ h_dinode = d_inode(h_dentry); -+ bbot = au_ibbot(inode); -+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) { -+ h_inode = au_h_iptr(inode, bindex); -+ if (!h_inode || h_inode != h_dinode) -+ continue; -+ -+ err = 0; -+ gen = au_iigen(inode, &igflags); -+ if (gen == au_digen(dentry) -+ && !au_ig_ftest(igflags, HALF_REFRESHED)) -+ break; -+ -+ /* fully refresh inode using dentry */ -+ err = au_refresh_hinode(inode, dentry); -+ if (!err) -+ au_update_iigen(inode, /*half*/0); -+ break; -+ } -+ -+ if (unlikely(err)) -+ ii_write_unlock(inode); -+out: -+ return err; -+} -+ -+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ unsigned int d_type, ino_t *ino) -+{ -+ int err, idx; -+ const int isnondir = d_type != DT_DIR; -+ -+ /* prevent hardlinked inode number from race condition */ -+ if (isnondir) { -+ err = au_xinondir_enter(sb, bindex, h_ino, &idx); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+ err = au_xino_read(sb, bindex, h_ino, ino); -+ if (unlikely(err)) -+ goto out_xinondir; -+ -+ if (!*ino) { -+ err = -EIO; -+ *ino = au_xino_new_ino(sb); -+ if (unlikely(!*ino)) -+ goto out_xinondir; -+ err = au_xino_write(sb, bindex, h_ino, *ino); -+ if (unlikely(err)) -+ goto out_xinondir; -+ } -+ -+out_xinondir: -+ if (isnondir && idx >= 0) -+ au_xinondir_leave(sb, bindex, h_ino, idx); -+out: -+ return err; -+} -+ -+/* successful returns with iinfo write_locked */ -+/* todo: return with unlocked? */ -+struct inode *au_new_inode(struct dentry *dentry, int must_new) -+{ -+ struct inode *inode, *h_inode; -+ struct dentry *h_dentry; -+ struct super_block *sb; -+ ino_t h_ino, ino; -+ int err, idx, hlinked; -+ aufs_bindex_t btop; -+ -+ sb = dentry->d_sb; -+ btop = au_dbtop(dentry); -+ h_dentry = au_h_dptr(dentry, btop); -+ h_inode = d_inode(h_dentry); -+ h_ino = h_inode->i_ino; -+ hlinked = !d_is_dir(h_dentry) && h_inode->i_nlink > 1; -+ -+new_ino: -+ /* -+ * stop 'race'-ing between hardlinks under different -+ * parents. -+ */ -+ if (hlinked) { -+ err = au_xinondir_enter(sb, btop, h_ino, &idx); -+ inode = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+ err = au_xino_read(sb, btop, h_ino, &ino); -+ inode = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_xinondir; -+ -+ if (!ino) { -+ ino = au_xino_new_ino(sb); -+ if (unlikely(!ino)) { -+ inode = ERR_PTR(-EIO); -+ goto out_xinondir; -+ } -+ } -+ -+ AuDbg("i%lu\n", (unsigned long)ino); -+ inode = au_iget_locked(sb, ino); -+ err = PTR_ERR(inode); -+ if (IS_ERR(inode)) -+ goto out_xinondir; -+ -+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW)); -+ if (inode->i_state & I_NEW) { -+ ii_write_lock_new_child(inode); -+ err = set_inode(inode, dentry); -+ if (!err) { -+ unlock_new_inode(inode); -+ goto out_xinondir; /* success */ -+ } -+ -+ /* -+ * iget_failed() calls iput(), but we need to call -+ * ii_write_unlock() after iget_failed(). so dirty hack for -+ * i_count. -+ */ -+ atomic_inc(&inode->i_count); -+ iget_failed(inode); -+ ii_write_unlock(inode); -+ au_xino_write(sb, btop, h_ino, /*ino*/0); -+ /* ignore this error */ -+ goto out_iput; -+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) { -+ /* -+ * horrible race condition between lookup, readdir and copyup -+ * (or something). -+ */ -+ if (hlinked && idx >= 0) -+ au_xinondir_leave(sb, btop, h_ino, idx); -+ err = reval_inode(inode, dentry); -+ if (unlikely(err < 0)) { -+ hlinked = 0; -+ goto out_iput; -+ } -+ if (!err) -+ goto out; /* success */ -+ else if (hlinked && idx >= 0) { -+ err = au_xinondir_enter(sb, btop, h_ino, &idx); -+ if (unlikely(err)) { -+ iput(inode); -+ inode = ERR_PTR(err); -+ goto out; -+ } -+ } -+ } -+ -+ if (unlikely(au_test_fs_unique_ino(h_inode))) -+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir," -+ " b%d, %s, %pd, hi%lu, i%lu.\n", -+ btop, au_sbtype(h_dentry->d_sb), dentry, -+ (unsigned long)h_ino, (unsigned long)ino); -+ ino = 0; -+ err = au_xino_write(sb, btop, h_ino, /*ino*/0); -+ if (!err) { -+ iput(inode); -+ if (hlinked && idx >= 0) -+ au_xinondir_leave(sb, btop, h_ino, idx); -+ goto new_ino; -+ } -+ -+out_iput: -+ iput(inode); -+ inode = ERR_PTR(err); -+out_xinondir: -+ if (hlinked && idx >= 0) -+ au_xinondir_leave(sb, btop, h_ino, idx); -+out: -+ return inode; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, -+ struct inode *inode) -+{ -+ int err; -+ struct inode *hi; -+ -+ err = au_br_rdonly(au_sbr(sb, bindex)); -+ -+ /* pseudo-link after flushed may happen out of bounds */ -+ if (!err -+ && inode -+ && au_ibtop(inode) <= bindex -+ && bindex <= au_ibbot(inode)) { -+ /* -+ * permission check is unnecessary since vfsub routine -+ * will be called later -+ */ -+ hi = au_h_iptr(inode, bindex); -+ if (hi) -+ err = IS_IMMUTABLE(hi) ? -EROFS : 0; -+ } -+ -+ return err; -+} -+ -+int au_test_h_perm(struct user_namespace *h_userns, struct inode *h_inode, -+ int mask) -+{ -+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) -+ return 0; -+ return inode_permission(h_userns, h_inode, mask); -+} -+ -+int au_test_h_perm_sio(struct user_namespace *h_userns, struct inode *h_inode, -+ int mask) -+{ -+ if (au_test_nfs(h_inode->i_sb) -+ && (mask & MAY_WRITE) -+ && S_ISDIR(h_inode->i_mode)) -+ mask |= MAY_READ; /* force permission check */ -+ return au_test_h_perm(h_userns, h_inode, mask); -+} -diff -ruN a/fs/aufs/inode.h b/fs/aufs/inode.h ---- a/fs/aufs/inode.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/inode.h 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,705 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+/* -+ * Copyright (C) 2005-2021 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program. If not, see . -+ */ -+ -+/* -+ * inode operations -+ */ -+ -+#ifndef __AUFS_INODE_H__ -+#define __AUFS_INODE_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include "rwsem.h" -+ -+struct vfsmount; -+ -+struct au_hnotify { -+#ifdef CONFIG_AUFS_HNOTIFY -+#ifdef CONFIG_AUFS_HFSNOTIFY -+ /* never use fsnotify_add_vfsmount_mark() */ -+ struct fsnotify_mark hn_mark; -+#endif -+ struct inode *hn_aufs_inode; /* no get/put */ -+ struct rcu_head rcu; -+#endif -+} ____cacheline_aligned_in_smp; -+ -+struct au_hinode { -+ struct inode *hi_inode; -+ aufs_bindex_t hi_id; -+#ifdef CONFIG_AUFS_HNOTIFY -+ struct au_hnotify *hi_notify; -+#endif -+ -+ /* reference to the copied-up whiteout with get/put */ -+ struct dentry *hi_whdentry; -+}; -+ -+/* ig_flags */ -+#define AuIG_HALF_REFRESHED 1 -+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name) -+#define au_ig_fset(flags, name) \ -+ do { (flags) |= AuIG_##name; } while (0) -+#define au_ig_fclr(flags, name) \ -+ do { (flags) &= ~AuIG_##name; } while (0) -+ -+struct au_iigen { -+ spinlock_t ig_spin; -+ __u32 ig_generation, ig_flags; -+}; -+ -+struct au_vdir; -+struct au_iinfo { -+ struct au_iigen ii_generation; -+ struct super_block *ii_hsb1; /* no get/put */ -+ -+ struct au_rwsem ii_rwsem; -+ aufs_bindex_t ii_btop, ii_bbot; -+ __u32 ii_higen; -+ struct au_hinode *ii_hinode; -+ struct au_vdir *ii_vdir; -+}; -+ -+struct au_icntnr { -+ struct au_iinfo iinfo; -+ struct inode vfs_inode; -+ struct hlist_bl_node plink; -+ struct rcu_head rcu; -+} ____cacheline_aligned_in_smp; -+ -+/* au_pin flags */ -+#define AuPin_DI_LOCKED 1 -+#define AuPin_MNT_WRITE (1 << 1) -+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name) -+#define au_fset_pin(flags, name) \ -+ do { (flags) |= AuPin_##name; } while (0) -+#define au_fclr_pin(flags, name) \ -+ do { (flags) &= ~AuPin_##name; } while (0) -+ -+struct au_pin { -+ /* input */ -+ struct dentry *dentry; -+ unsigned int udba; -+ unsigned char lsc_di, lsc_hi, flags; -+ aufs_bindex_t bindex; -+ -+ /* output */ -+ struct dentry *parent; -+ struct au_hinode *hdir; -+ struct vfsmount *h_mnt; -+ -+ /* temporary unlock/relock for copyup */ -+ struct dentry *h_dentry, *h_parent; -+ struct au_branch *br; -+ struct task_struct *task; -+}; -+ -+void au_pin_hdir_unlock(struct au_pin *p); -+int au_pin_hdir_lock(struct au_pin *p); -+int au_pin_hdir_relock(struct au_pin *p); -+void au_pin_hdir_acquire_nest(struct au_pin *p); -+void au_pin_hdir_release(struct au_pin *p); -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct au_iinfo *au_ii(struct inode *inode) -+{ -+ BUG_ON(is_bad_inode(inode)); -+ return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* inode.c */ -+struct inode *au_igrab(struct inode *inode); -+void au_refresh_iop(struct inode *inode, int force_getattr); -+int au_refresh_hinode_self(struct inode *inode); -+int au_refresh_hinode(struct inode *inode, struct dentry *dentry); -+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ unsigned int d_type, ino_t *ino); -+struct inode *au_new_inode(struct dentry *dentry, int must_new); -+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, -+ struct inode *inode); -+int au_test_h_perm(struct user_namespace *h_userns, struct inode *h_inode, -+ int mask); -+int au_test_h_perm_sio(struct user_namespace *h_userns, struct inode *h_inode, -+ int mask); -+ -+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex, -+ ino_t h_ino, unsigned int d_type, ino_t *ino) -+{ -+#ifdef CONFIG_AUFS_SHWH -+ return au_ino(sb, bindex, h_ino, d_type, ino); -+#else -+ return 0; -+#endif -+} -+ -+/* i_op.c */ -+enum { -+ AuIop_SYMLINK, -+ AuIop_DIR, -+ AuIop_OTHER, -+ AuIop_Last -+}; -+extern struct inode_operations aufs_iop[AuIop_Last], /* not const */ -+ aufs_iop_nogetattr[AuIop_Last]; -+ -+/* au_wr_dir flags */ -+#define AuWrDir_ADD_ENTRY 1 -+#define AuWrDir_ISDIR (1 << 1) -+#define AuWrDir_TMPFILE (1 << 2) -+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name) -+#define au_fset_wrdir(flags, name) \ -+ do { (flags) |= AuWrDir_##name; } while (0) -+#define au_fclr_wrdir(flags, name) \ -+ do { (flags) &= ~AuWrDir_##name; } while (0) -+ -+struct au_wr_dir_args { -+ aufs_bindex_t force_btgt; -+ unsigned char flags; -+}; -+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, -+ struct au_wr_dir_args *args); -+ -+struct dentry *au_pinned_h_parent(struct au_pin *pin); -+void au_pin_init(struct au_pin *pin, struct dentry *dentry, -+ aufs_bindex_t bindex, int lsc_di, int lsc_hi, -+ unsigned int udba, unsigned char flags); -+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int udba, unsigned char flags) __must_check; -+int au_do_pin(struct au_pin *pin) __must_check; -+void au_unpin(struct au_pin *pin); -+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen); -+ -+#define AuIcpup_DID_CPUP 1 -+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name) -+#define au_fset_icpup(flags, name) \ -+ do { (flags) |= AuIcpup_##name; } while (0) -+#define au_fclr_icpup(flags, name) \ -+ do { (flags) &= ~AuIcpup_##name; } while (0) -+ -+struct au_icpup_args { -+ unsigned char flags; -+ unsigned char pin_flags; -+ aufs_bindex_t btgt; -+ unsigned int udba; -+ struct au_pin pin; -+ struct path h_path; -+ struct inode *h_inode; -+}; -+ -+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia, -+ struct au_icpup_args *a); -+ -+int au_h_path_getattr(struct dentry *dentry, struct inode *inode, int force, -+ struct path *h_path, int locked); -+ -+/* i_op_add.c */ -+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir); -+int aufs_mknod(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, umode_t mode, dev_t dev); -+int aufs_symlink(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, const char *symname); -+int aufs_create(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, umode_t mode, bool want_excl); -+struct vfsub_aopen_args; -+int au_aopen_or_create(struct inode *dir, struct dentry *dentry, -+ struct vfsub_aopen_args *args); -+int aufs_tmpfile(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, umode_t mode); -+int aufs_link(struct dentry *src_dentry, struct inode *dir, -+ struct dentry *dentry); -+int aufs_mkdir(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, umode_t mode); -+ -+/* i_op_del.c */ -+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup); -+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir); -+int aufs_unlink(struct inode *dir, struct dentry *dentry); -+int aufs_rmdir(struct inode *dir, struct dentry *dentry); -+ -+/* i_op_ren.c */ -+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt); -+int aufs_rename(struct user_namespace *userns, -+ struct inode *_src_dir, struct dentry *_src_dentry, -+ struct inode *_dst_dir, struct dentry *_dst_dentry, -+ unsigned int _flags); -+ -+/* iinfo.c */ -+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex); -+void au_hiput(struct au_hinode *hinode); -+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_wh); -+unsigned int au_hi_flags(struct inode *inode, int isdir); -+ -+/* hinode flags */ -+#define AuHi_XINO 1 -+#define AuHi_HNOTIFY (1 << 1) -+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name) -+#define au_fset_hi(flags, name) \ -+ do { (flags) |= AuHi_##name; } while (0) -+#define au_fclr_hi(flags, name) \ -+ do { (flags) &= ~AuHi_##name; } while (0) -+ -+#ifndef CONFIG_AUFS_HNOTIFY -+#undef AuHi_HNOTIFY -+#define AuHi_HNOTIFY 0 -+#endif -+ -+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode, unsigned int flags); -+ -+void au_update_iigen(struct inode *inode, int half); -+void au_update_ibrange(struct inode *inode, int do_put_zero); -+ -+void au_icntnr_init_once(void *_c); -+void au_hinode_init(struct au_hinode *hinode); -+int au_iinfo_init(struct inode *inode); -+void au_iinfo_fin(struct inode *inode); -+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink); -+ -+#ifdef CONFIG_PROC_FS -+/* plink.c */ -+int au_plink_maint(struct super_block *sb, int flags); -+struct au_sbinfo; -+void au_plink_maint_leave(struct au_sbinfo *sbinfo); -+int au_plink_maint_enter(struct super_block *sb); -+#ifdef CONFIG_AUFS_DEBUG -+void au_plink_list(struct super_block *sb); -+#else -+AuStubVoid(au_plink_list, struct super_block *sb) -+#endif -+int au_plink_test(struct inode *inode); -+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex); -+void au_plink_append(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_dentry); -+void au_plink_put(struct super_block *sb, int verbose); -+void au_plink_clean(struct super_block *sb, int verbose); -+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id); -+#else -+AuStubInt0(au_plink_maint, struct super_block *sb, int flags); -+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo); -+AuStubInt0(au_plink_maint_enter, struct super_block *sb); -+AuStubVoid(au_plink_list, struct super_block *sb); -+AuStubInt0(au_plink_test, struct inode *inode); -+AuStub(struct dentry *, au_plink_lkup, return NULL, -+ struct inode *inode, aufs_bindex_t bindex); -+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_dentry); -+AuStubVoid(au_plink_put, struct super_block *sb, int verbose); -+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose); -+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id); -+#endif /* CONFIG_PROC_FS */ -+ -+#ifdef CONFIG_AUFS_XATTR -+/* xattr.c */ -+int au_cpup_xattr(struct path *h_dst, struct path *h_src, int ignore_flags, -+ unsigned int verbose); -+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size); -+void au_xattr_init(struct super_block *sb); -+#else -+AuStubInt0(au_cpup_xattr, struct path *h_dst, struct path *h_src, -+ int ignore_flags, unsigned int verbose); -+AuStubVoid(au_xattr_init, struct super_block *sb); -+#endif -+ -+#ifdef CONFIG_FS_POSIX_ACL -+struct posix_acl *aufs_get_acl(struct inode *inode, int type, bool rcu); -+int aufs_set_acl(struct user_namespace *userns, struct inode *inode, -+ struct posix_acl *acl, int type); -+#endif -+ -+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL) -+enum { -+ AU_XATTR_SET, -+ AU_ACL_SET -+}; -+ -+struct au_sxattr { -+ int type; -+ union { -+ struct { -+ const char *name; -+ const void *value; -+ size_t size; -+ int flags; -+ } set; -+ struct { -+ struct posix_acl *acl; -+ int type; -+ } acl_set; -+ } u; -+}; -+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode, -+ struct au_sxattr *arg); -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* lock subclass for iinfo */ -+enum { -+ AuLsc_II_CHILD, /* child first */ -+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */ -+ AuLsc_II_CHILD3, /* copyup dirs */ -+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */ -+ AuLsc_II_PARENT2, -+ AuLsc_II_PARENT3, /* copyup dirs */ -+ AuLsc_II_NEW_CHILD -+}; -+ -+/* -+ * ii_read_lock_child, ii_write_lock_child, -+ * ii_read_lock_child2, ii_write_lock_child2, -+ * ii_read_lock_child3, ii_write_lock_child3, -+ * ii_read_lock_parent, ii_write_lock_parent, -+ * ii_read_lock_parent2, ii_write_lock_parent2, -+ * ii_read_lock_parent3, ii_write_lock_parent3, -+ * ii_read_lock_new_child, ii_write_lock_new_child, -+ */ -+#define AuReadLockFunc(name, lsc) \ -+static inline void ii_read_lock_##name(struct inode *i) \ -+{ \ -+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ -+} -+ -+#define AuWriteLockFunc(name, lsc) \ -+static inline void ii_write_lock_##name(struct inode *i) \ -+{ \ -+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ -+} -+ -+#define AuRWLockFuncs(name, lsc) \ -+ AuReadLockFunc(name, lsc) \ -+ AuWriteLockFunc(name, lsc) -+ -+AuRWLockFuncs(child, CHILD); -+AuRWLockFuncs(child2, CHILD2); -+AuRWLockFuncs(child3, CHILD3); -+AuRWLockFuncs(parent, PARENT); -+AuRWLockFuncs(parent2, PARENT2); -+AuRWLockFuncs(parent3, PARENT3); -+AuRWLockFuncs(new_child, NEW_CHILD); -+ -+#undef AuReadLockFunc -+#undef AuWriteLockFunc -+#undef AuRWLockFuncs -+ -+#define ii_read_unlock(i) au_rw_read_unlock(&au_ii(i)->ii_rwsem) -+#define ii_write_unlock(i) au_rw_write_unlock(&au_ii(i)->ii_rwsem) -+#define ii_downgrade_lock(i) au_rw_dgrade_lock(&au_ii(i)->ii_rwsem) -+ -+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem) -+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem) -+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem) -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline void au_icntnr_init(struct au_icntnr *c) -+{ -+#ifdef CONFIG_AUFS_DEBUG -+ c->vfs_inode.i_mode = 0; -+#endif -+} -+ -+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags) -+{ -+ unsigned int gen; -+ struct au_iinfo *iinfo; -+ struct au_iigen *iigen; -+ -+ iinfo = au_ii(inode); -+ iigen = &iinfo->ii_generation; -+ spin_lock(&iigen->ig_spin); -+ if (igflags) -+ *igflags = iigen->ig_flags; -+ gen = iigen->ig_generation; -+ spin_unlock(&iigen->ig_spin); -+ -+ return gen; -+} -+ -+/* tiny test for inode number */ -+/* tmpfs generation is too rough */ -+static inline int au_test_higen(struct inode *inode, struct inode *h_inode) -+{ -+ struct au_iinfo *iinfo; -+ -+ iinfo = au_ii(inode); -+ AuRwMustAnyLock(&iinfo->ii_rwsem); -+ return !(iinfo->ii_hsb1 == h_inode->i_sb -+ && iinfo->ii_higen == h_inode->i_generation); -+} -+ -+static inline void au_iigen_dec(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; -+ struct au_iigen *iigen; -+ -+ iinfo = au_ii(inode); -+ iigen = &iinfo->ii_generation; -+ spin_lock(&iigen->ig_spin); -+ iigen->ig_generation--; -+ spin_unlock(&iigen->ig_spin); -+} -+ -+static inline int au_iigen_test(struct inode *inode, unsigned int sigen) -+{ -+ int err; -+ -+ err = 0; -+ if (unlikely(inode && au_iigen(inode, NULL) != sigen)) -+ err = -EIO; -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo, -+ aufs_bindex_t bindex) -+{ -+ return iinfo->ii_hinode + bindex; -+} -+ -+static inline int au_is_bad_inode(struct inode *inode) -+{ -+ return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0)); -+} -+ -+static inline aufs_bindex_t au_ii_br_id(struct inode *inode, -+ aufs_bindex_t bindex) -+{ -+ IiMustAnyLock(inode); -+ return au_hinode(au_ii(inode), bindex)->hi_id; -+} -+ -+static inline aufs_bindex_t au_ibtop(struct inode *inode) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_btop; -+} -+ -+static inline aufs_bindex_t au_ibbot(struct inode *inode) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_bbot; -+} -+ -+static inline struct au_vdir *au_ivdir(struct inode *inode) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_vdir; -+} -+ -+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustAnyLock(inode); -+ return au_hinode(au_ii(inode), bindex)->hi_whdentry; -+} -+ -+static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustWriteLock(inode); -+ au_ii(inode)->ii_btop = bindex; -+} -+ -+static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustWriteLock(inode); -+ au_ii(inode)->ii_bbot = bindex; -+} -+ -+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir) -+{ -+ IiMustWriteLock(inode); -+ au_ii(inode)->ii_vdir = vdir; -+} -+ -+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustAnyLock(inode); -+ return au_hinode(au_ii(inode), bindex); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct dentry *au_pinned_parent(struct au_pin *pin) -+{ -+ if (pin) -+ return pin->parent; -+ return NULL; -+} -+ -+static inline struct inode *au_pinned_h_dir(struct au_pin *pin) -+{ -+ if (pin && pin->hdir) -+ return pin->hdir->hi_inode; -+ return NULL; -+} -+ -+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin) -+{ -+ if (pin) -+ return pin->hdir; -+ return NULL; -+} -+ -+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry) -+{ -+ if (pin) -+ pin->dentry = dentry; -+} -+ -+static inline void au_pin_set_parent_lflag(struct au_pin *pin, -+ unsigned char lflag) -+{ -+ if (pin) { -+ if (lflag) -+ au_fset_pin(pin->flags, DI_LOCKED); -+ else -+ au_fclr_pin(pin->flags, DI_LOCKED); -+ } -+} -+ -+#if 0 /* reserved */ -+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent) -+{ -+ if (pin) { -+ dput(pin->parent); -+ pin->parent = dget(parent); -+ } -+} -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_branch; -+#ifdef CONFIG_AUFS_HNOTIFY -+struct au_hnotify_op { -+ void (*ctl)(struct au_hinode *hinode, int do_set); -+ int (*alloc)(struct au_hinode *hinode); -+ -+ /* -+ * if it returns true, the caller should free hinode->hi_notify, -+ * otherwise ->free() frees it. -+ */ -+ int (*free)(struct au_hinode *hinode, -+ struct au_hnotify *hn) __must_check; -+ -+ void (*fin)(void); -+ int (*init)(void); -+ -+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm); -+ void (*fin_br)(struct au_branch *br); -+ int (*init_br)(struct au_branch *br, int perm); -+}; -+ -+/* hnotify.c */ -+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode); -+void au_hn_free(struct au_hinode *hinode); -+void au_hn_ctl(struct au_hinode *hinode, int do_set); -+void au_hn_reset(struct inode *inode, unsigned int flags); -+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask, -+ const struct qstr *h_child_qstr, struct inode *h_child_inode); -+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm); -+int au_hnotify_init_br(struct au_branch *br, int perm); -+void au_hnotify_fin_br(struct au_branch *br); -+int __init au_hnotify_init(void); -+void au_hnotify_fin(void); -+ -+/* hfsnotify.c */ -+extern const struct au_hnotify_op au_hnotify_op; -+ -+static inline -+void au_hn_init(struct au_hinode *hinode) -+{ -+ hinode->hi_notify = NULL; -+} -+ -+static inline struct au_hnotify *au_hn(struct au_hinode *hinode) -+{ -+ return hinode->hi_notify; -+} -+ -+#else -+AuStub(int, au_hn_alloc, return -EOPNOTSUPP, -+ struct au_hinode *hinode __maybe_unused, -+ struct inode *inode __maybe_unused) -+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode) -+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused) -+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused, -+ int do_set __maybe_unused) -+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused, -+ unsigned int flags __maybe_unused) -+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused, -+ struct au_branch *br __maybe_unused, -+ int perm __maybe_unused) -+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused, -+ int perm __maybe_unused) -+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused) -+AuStubInt0(__init au_hnotify_init, void) -+AuStubVoid(au_hnotify_fin, void) -+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused) -+#endif /* CONFIG_AUFS_HNOTIFY */ -+ -+static inline void au_hn_suspend(struct au_hinode *hdir) -+{ -+ au_hn_ctl(hdir, /*do_set*/0); -+} -+ -+static inline void au_hn_resume(struct au_hinode *hdir) -+{ -+ au_hn_ctl(hdir, /*do_set*/1); -+} -+ -+static inline void au_hn_inode_lock(struct au_hinode *hdir) -+{ -+ inode_lock(hdir->hi_inode); -+ au_hn_suspend(hdir); -+} -+ -+static inline void au_hn_inode_lock_nested(struct au_hinode *hdir, -+ unsigned int sc __maybe_unused) -+{ -+ inode_lock_nested(hdir->hi_inode, sc); -+ au_hn_suspend(hdir); -+} -+ -+#if 0 /* unused */ -+#include "vfsub.h" -+static inline void au_hn_inode_lock_shared_nested(struct au_hinode *hdir, -+ unsigned int sc) -+{ -+ inode_lock_shared_nested(hdir->hi_inode, sc); -+ au_hn_suspend(hdir); -+} -+#endif -+ -+static inline void au_hn_inode_unlock(struct au_hinode *hdir) -+{ -+ au_hn_resume(hdir); -+ inode_unlock(hdir->hi_inode); -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_INODE_H__ */ -diff -ruN a/fs/aufs/ioctl.c b/fs/aufs/ioctl.c ---- a/fs/aufs/ioctl.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/ioctl.c 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,220 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2005-2021 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program. If not, see . -+ */ -+ -+/* -+ * ioctl -+ * plink-management and readdir in userspace. -+ * assist the pathconf(3) wrapper library. -+ * move-down -+ * File-based Hierarchical Storage Management. -+ */ -+ -+#include -+#include -+#include "aufs.h" -+ -+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg) -+{ -+ int err, fd; -+ aufs_bindex_t wbi, bindex, bbot; -+ struct file *h_file; -+ struct super_block *sb; -+ struct dentry *root; -+ struct au_branch *br; -+ struct aufs_wbr_fd wbrfd = { -+ .oflags = au_dir_roflags, -+ .brid = -1 -+ }; -+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY -+ | O_NOATIME | O_CLOEXEC; -+ -+ AuDebugOn(wbrfd.oflags & ~valid); -+ -+ if (arg) { -+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd)); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ goto out; -+ } -+ -+ err = -EINVAL; -+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid); -+ wbrfd.oflags |= au_dir_roflags; -+ AuDbg("0%o\n", wbrfd.oflags); -+ if (unlikely(wbrfd.oflags & ~valid)) -+ goto out; -+ } -+ -+ fd = get_unused_fd_flags(0); -+ err = fd; -+ if (unlikely(fd < 0)) -+ goto out; -+ -+ h_file = ERR_PTR(-EINVAL); -+ wbi = 0; -+ br = NULL; -+ sb = path->dentry->d_sb; -+ root = sb->s_root; -+ aufs_read_lock(root, AuLock_IR); -+ bbot = au_sbbot(sb); -+ if (wbrfd.brid >= 0) { -+ wbi = au_br_index(sb, wbrfd.brid); -+ if (unlikely(wbi < 0 || wbi > bbot)) -+ goto out_unlock; -+ } -+ -+ h_file = ERR_PTR(-ENOENT); -+ br = au_sbr(sb, wbi); -+ if (!au_br_writable(br->br_perm)) { -+ if (arg) -+ goto out_unlock; -+ -+ bindex = wbi + 1; -+ wbi = -1; -+ for (; bindex <= bbot; bindex++) { -+ br = au_sbr(sb, bindex); -+ if (au_br_writable(br->br_perm)) { -+ wbi = bindex; -+ br = au_sbr(sb, wbi); -+ break; -+ } -+ } -+ } -+ AuDbg("wbi %d\n", wbi); -+ if (wbi >= 0) -+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL, -+ /*force_wr*/0); -+ -+out_unlock: -+ aufs_read_unlock(root, AuLock_IR); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out_fd; -+ -+ au_lcnt_dec(&br->br_nfiles); /* cf. au_h_open() */ -+ fd_install(fd, h_file); -+ err = fd; -+ goto out; /* success */ -+ -+out_fd: -+ put_unused_fd(fd); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ long err; -+ struct dentry *dentry; -+ -+ switch (cmd) { -+ case AUFS_CTL_RDU: -+ case AUFS_CTL_RDU_INO: -+ err = au_rdu_ioctl(file, cmd, arg); -+ break; -+ -+ case AUFS_CTL_WBR_FD: -+ err = au_wbr_fd(&file->f_path, (void __user *)arg); -+ break; -+ -+ case AUFS_CTL_IBUSY: -+ err = au_ibusy_ioctl(file, arg); -+ break; -+ -+ case AUFS_CTL_BRINFO: -+ err = au_brinfo_ioctl(file, arg); -+ break; -+ -+ case AUFS_CTL_FHSM_FD: -+ dentry = file->f_path.dentry; -+ if (IS_ROOT(dentry)) -+ err = au_fhsm_fd(dentry->d_sb, arg); -+ else -+ err = -ENOTTY; -+ break; -+ -+ default: -+ /* do not call the lower */ -+ AuDbg("0x%x\n", cmd); -+ err = -ENOTTY; -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ long err; -+ -+ switch (cmd) { -+ case AUFS_CTL_MVDOWN: -+ err = au_mvdown(file->f_path.dentry, (void __user *)arg); -+ break; -+ -+ case AUFS_CTL_WBR_FD: -+ err = au_wbr_fd(&file->f_path, (void __user *)arg); -+ break; -+ -+ default: -+ /* do not call the lower */ -+ AuDbg("0x%x\n", cmd); -+ err = -ENOTTY; -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+#ifdef CONFIG_COMPAT -+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ long err; -+ -+ switch (cmd) { -+ case AUFS_CTL_RDU: -+ case AUFS_CTL_RDU_INO: -+ err = au_rdu_compat_ioctl(file, cmd, arg); -+ break; -+ -+ case AUFS_CTL_IBUSY: -+ err = au_ibusy_compat_ioctl(file, arg); -+ break; -+ -+ case AUFS_CTL_BRINFO: -+ err = au_brinfo_compat_ioctl(file, arg); -+ break; -+ -+ default: -+ err = aufs_ioctl_dir(file, cmd, arg); -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg)); -+} -+#endif -diff -ruN a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c ---- a/fs/aufs/i_op_add.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/i_op_add.c 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,941 @@ -+// SPDX-License-Identifier: GPL-2.0 -+/* -+ * Copyright (C) 2005-2021 Junjiro R. Okajima -+ * -+ * This program, aufs is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program. If not, see . -+ */ -+ -+/* -+ * inode operations (add entry) -+ */ -+ -+#include -+#include "aufs.h" -+ -+/* -+ * final procedure of adding a new entry, except link(2). -+ * remove whiteout, instantiate, copyup the parent dir's times and size -+ * and update version. -+ * if it failed, re-create the removed whiteout. -+ */ -+static int epilog(struct inode *dir, aufs_bindex_t bindex, -+ struct dentry *wh_dentry, struct dentry *dentry) -+{ -+ int err, rerr; -+ aufs_bindex_t bwh; -+ struct path h_path; -+ struct super_block *sb; -+ struct inode *inode, *h_dir; -+ struct dentry *wh; -+ -+ bwh = -1; -+ sb = dir->i_sb; -+ if (wh_dentry) { -+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */ -+ IMustLock(h_dir); -+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir); -+ bwh = au_dbwh(dentry); -+ h_path.dentry = wh_dentry; -+ h_path.mnt = au_sbr_mnt(sb, bindex); -+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, -+ dentry); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+ inode = au_new_inode(dentry, /*must_new*/1); -+ if (!IS_ERR(inode)) { -+ d_instantiate(dentry, inode); -+ dir = d_inode(dentry->d_parent); /* dir inode is locked */ -+ IMustLock(dir); -+ au_dir_ts(dir, bindex); -+ inode_inc_iversion(dir); -+ au_fhsm_wrote(sb, bindex, /*force*/0); -+ return 0; /* success */ -+ } -+ -+ err = PTR_ERR(inode); -+ if (!wh_dentry) -+ goto out; -+ -+ /* revert */ -+ /* dir inode is locked */ -+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent); -+ rerr = PTR_ERR(wh); -+ if (IS_ERR(wh)) { -+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", -+ dentry, err, rerr); -+ err = -EIO; -+ } else -+ dput(wh); -+ -+out: -+ return err; -+} -+ -+static int au_d_may_add(struct dentry *dentry) -+{ -+ int err; -+ -+ err = 0; -+ if (unlikely(d_unhashed(dentry))) -+ err = -ENOENT; -+ if (unlikely(d_really_is_positive(dentry))) -+ err = -EEXIST; -+ return err; -+} -+ -+/* -+ * simple tests for the adding inode operations. -+ * following the checks in vfs, plus the parent-child relationship. -+ */ -+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir) -+{ -+ int err; -+ umode_t h_mode; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ -+ err = -ENAMETOOLONG; -+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ goto out; -+ -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (d_really_is_negative(dentry)) { -+ err = -EEXIST; -+ if (unlikely(d_is_positive(h_dentry))) -+ goto out; -+ } else { -+ /* rename(2) case */ -+ err = -EIO; -+ if (unlikely(d_is_negative(h_dentry))) -+ goto out; -+ h_inode = d_inode(h_dentry); -+ if (unlikely(!h_inode->i_nlink)) -+ goto out; -+ -+ h_mode = h_inode->i_mode; -+ if (!isdir) { -+ err = -EISDIR; -+ if (unlikely(S_ISDIR(h_mode))) -+ goto out; -+ } else if (unlikely(!S_ISDIR(h_mode))) { -+ err = -ENOTDIR; -+ goto out; -+ } -+ } -+ -+ err = 0; -+ /* expected parent dir is locked */ -+ if (unlikely(h_parent != h_dentry->d_parent)) -+ err = -EIO; -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* -+ * initial procedure of adding a new entry. -+ * prepare writable branch and the parent dir, lock it, -+ * and lookup whiteout for the new entry. -+ */ -+static struct dentry* -+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt, -+ struct dentry *src_dentry, struct au_pin *pin, -+ struct au_wr_dir_args *wr_dir_args) -+{ -+ struct dentry *wh_dentry, *h_parent; -+ struct super_block *sb; -+ struct au_branch *br; -+ int err; -+ unsigned int udba; -+ aufs_bindex_t bcpup; -+ -+ AuDbg("%pd\n", dentry); -+ -+ err = au_wr_dir(dentry, src_dentry, wr_dir_args); -+ bcpup = err; -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err < 0)) -+ goto out; -+ -+ sb = dentry->d_sb; -+ udba = au_opt_udba(sb); -+ err = au_pin(pin, dentry, bcpup, udba, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; -+ -+ h_parent = au_pinned_h_parent(pin); -+ if (udba != AuOpt_UDBA_NONE -+ && au_dbtop(dentry) == bcpup) -+ err = au_may_add(dentry, bcpup, h_parent, -+ au_ftest_wrdir(wr_dir_args->flags, ISDIR)); -+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ err = -ENAMETOOLONG; -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_unpin; -+ -+ br = au_sbr(sb, bcpup); -+ if (dt) { -+ struct path tmp = { -+ .dentry = h_parent, -+ .mnt = au_br_mnt(br) -+ }; -+ au_dtime_store(dt, au_pinned_parent(pin), &tmp); -+ } -+ -+ wh_dentry = NULL; -+ if (bcpup != au_dbwh(dentry)) -+ goto out; /* success */ -+ -+ /* -+ * ENAMETOOLONG here means that if we allowed create such name, then it -+ * would not be able to removed in the future. So we don't allow such -+ * name here and we don't handle ENAMETOOLONG differently here. -+ */ -+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br); -+ -+out_unpin: -+ if (IS_ERR(wh_dentry)) -+ au_unpin(pin); -+out: -+ return wh_dentry; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+enum { Mknod, Symlink, Creat }; -+struct simple_arg { -+ int type; -+ union { -+ struct { -+ umode_t mode; -+ bool want_excl; -+ bool try_aopen; -+ struct vfsub_aopen_args *aopen; -+ } c; -+ struct { -+ const char *symname; -+ } s; -+ struct { -+ umode_t mode; -+ dev_t dev; -+ } m; -+ } u; -+}; -+ -+static int add_simple(struct inode *dir, struct dentry *dentry, -+ struct simple_arg *arg) -+{ -+ int err, rerr; -+ aufs_bindex_t btop; -+ unsigned char created; -+ const unsigned char try_aopen -+ = (arg->type == Creat && arg->u.c.try_aopen); -+ struct vfsub_aopen_args *aopen = arg->u.c.aopen; -+ struct dentry *wh_dentry, *parent; -+ struct inode *h_dir; -+ struct super_block *sb; -+ struct au_branch *br; -+ /* to reduce stack size */ -+ struct { -+ struct au_dtime dt; -+ struct au_pin pin; -+ struct path h_path; -+ struct au_wr_dir_args wr_dir_args; -+ } *a; -+ -+ AuDbg("%pd\n", dentry); -+ IMustLock(dir); -+ -+ err = -ENOMEM; -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ a->wr_dir_args.force_btgt = -1; -+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY; -+ -+ parent = dentry->d_parent; /* dir inode is locked */ -+ if (!try_aopen) { -+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_free; -+ } -+ err = au_d_may_add(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ if (!try_aopen) -+ di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL, -+ &a->pin, &a->wr_dir_args); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; -+ -+ btop = au_dbtop(dentry); -+ sb = dentry->d_sb; -+ br = au_sbr(sb, btop); -+ a->h_path.dentry = au_h_dptr(dentry, btop); -+ a->h_path.mnt = au_br_mnt(br); -+ h_dir = au_pinned_h_dir(&a->pin); -+ switch (arg->type) { -+ case Creat: -+ if (!try_aopen || !h_dir->i_op->atomic_open) { -+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode, -+ arg->u.c.want_excl); -+ created = !err; -+ if (!err && try_aopen) -+ aopen->file->f_mode |= FMODE_CREATED; -+ } else { -+ aopen->br = br; -+ err = vfsub_atomic_open(h_dir, a->h_path.dentry, aopen); -+ AuDbg("err %d\n", err); -+ AuDbgFile(aopen->file); -+ created = err >= 0 -+ && !!(aopen->file->f_mode & FMODE_CREATED); -+ } -+ break; -+ case Symlink: -+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname); -+ created = !err; -+ break; -+ case Mknod: -+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode, -+ arg->u.m.dev); -+ created = !err; -+ break; -+ default: -+ BUG(); -+ } -+ if (unlikely(err < 0)) -+ goto out_unpin; -+ -+ err = epilog(dir, btop, wh_dentry, dentry); -+ if (!err) -+ goto out_unpin; /* success */ -+ -+ /* revert */ -+ if (created /* && d_is_positive(a->h_path.dentry) */) { -+ /* no delegation since it is just created */ -+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL, -+ /*force*/0); -+ if (rerr) { -+ AuIOErr("%pd revert failure(%d, %d)\n", -+ dentry, err, rerr); -+ err = -EIO; -+ } -+ au_dtime_revert(&a->dt); -+ } -+ if (try_aopen && h_dir->i_op->atomic_open -+ && (aopen->file->f_mode & FMODE_OPENED)) -+ /* aopen->file is still opened */ -+ au_lcnt_dec(&aopen->br->br_nfiles); -+ -+out_unpin: -+ au_unpin(&a->pin); -+ dput(wh_dentry); -+out_parent: -+ if (!try_aopen) -+ di_write_unlock(parent); -+out_unlock: -+ if (unlikely(err)) { -+ au_update_dbtop(dentry); -+ d_drop(dentry); -+ } -+ if (!try_aopen) -+ aufs_read_unlock(dentry, AuLock_DW); -+out_free: -+ au_kfree_rcu(a); -+out: -+ return err; -+} -+ -+int aufs_mknod(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, umode_t mode, dev_t dev) -+{ -+ struct simple_arg arg = { -+ .type = Mknod, -+ .u.m = { -+ .mode = mode, -+ .dev = dev -+ } -+ }; -+ return add_simple(dir, dentry, &arg); -+} -+ -+int aufs_symlink(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, const char *symname) -+{ -+ struct simple_arg arg = { -+ .type = Symlink, -+ .u.s.symname = symname -+ }; -+ return add_simple(dir, dentry, &arg); -+} -+ -+int aufs_create(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, umode_t mode, bool want_excl) -+{ -+ struct simple_arg arg = { -+ .type = Creat, -+ .u.c = { -+ .mode = mode, -+ .want_excl = want_excl -+ } -+ }; -+ return add_simple(dir, dentry, &arg); -+} -+ -+int au_aopen_or_create(struct inode *dir, struct dentry *dentry, -+ struct vfsub_aopen_args *aopen_args) -+{ -+ struct simple_arg arg = { -+ .type = Creat, -+ .u.c = { -+ .mode = aopen_args->create_mode, -+ .want_excl = aopen_args->open_flag & O_EXCL, -+ .try_aopen = true, -+ .aopen = aopen_args -+ } -+ }; -+ return add_simple(dir, dentry, &arg); -+} -+ -+int aufs_tmpfile(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, umode_t mode) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ struct super_block *sb; -+ struct dentry *parent, *h_parent, *h_dentry; -+ struct inode *h_dir, *inode; -+ struct vfsmount *h_mnt; -+ struct user_namespace *h_userns; -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = AuWrDir_TMPFILE -+ }; -+ -+ /* copy-up may happen */ -+ inode_lock(dir); -+ -+ sb = dir->i_sb; -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_di_init(dentry); -+ if (unlikely(err)) -+ goto out_si; -+ -+ err = -EBUSY; -+ parent = d_find_any_alias(dir); -+ AuDebugOn(!parent); -+ di_write_lock_parent(parent); -+ if (unlikely(d_inode(parent) != dir)) -+ goto out_parent; -+ -+ err = au_digen_test(parent, au_sigen(sb)); -+ if (unlikely(err)) -+ goto out_parent; -+ -+ bindex = au_dbtop(parent); -+ au_set_dbtop(dentry, bindex); -+ au_set_dbbot(dentry, bindex); -+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args); -+ bindex = err; -+ if (unlikely(err < 0)) -+ goto out_parent; -+ -+ err = -EOPNOTSUPP; -+ h_dir = au_h_iptr(dir, bindex); -+ if (unlikely(!h_dir->i_op->tmpfile)) -+ goto out_parent; -+ -+ h_mnt = au_sbr_mnt(sb, bindex); -+ err = vfsub_mnt_want_write(h_mnt); -+ if (unlikely(err)) -+ goto out_parent; -+ -+ h_userns = mnt_user_ns(h_mnt); -+ h_parent = au_h_dptr(parent, bindex); -+ h_dentry = vfs_tmpfile(h_userns, h_parent, mode, /*open_flag*/0); -+ if (IS_ERR(h_dentry)) { -+ err = PTR_ERR(h_dentry); -+ goto out_mnt; -+ } -+ -+ au_set_dbtop(dentry, bindex); -+ au_set_dbbot(dentry, bindex); -+ au_set_h_dptr(dentry, bindex, dget(h_dentry)); -+ inode = au_new_inode(dentry, /*must_new*/1); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ au_set_h_dptr(dentry, bindex, NULL); -+ au_set_dbtop(dentry, -1); -+ au_set_dbbot(dentry, -1); -+ } else { -+ if (!inode->i_nlink) -+ set_nlink(inode, 1); -+ d_tmpfile(dentry, inode); -+ au_di(dentry)->di_tmpfile = 1; -+ -+ /* update without i_mutex */ -+ if (au_ibtop(dir) == au_dbtop(dentry)) -+ au_cpup_attr_timesizes(dir); -+ } -+ dput(h_dentry); -+ -+out_mnt: -+ vfsub_mnt_drop_write(h_mnt); -+out_parent: -+ di_write_unlock(parent); -+ dput(parent); -+ di_write_unlock(dentry); -+ if (unlikely(err)) { -+ au_di_fin(dentry); -+ dentry->d_fsdata = NULL; -+ } -+out_si: -+ si_read_unlock(sb); -+out: -+ inode_unlock(dir); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_link_args { -+ aufs_bindex_t bdst, bsrc; -+ struct au_pin pin; -+ struct path h_path; -+ struct dentry *src_parent, *parent; -+}; -+ -+static int au_cpup_before_link(struct dentry *src_dentry, -+ struct au_link_args *a) -+{ -+ int err; -+ struct dentry *h_src_dentry; -+ struct au_cp_generic cpg = { -+ .dentry = src_dentry, -+ .bdst = a->bdst, -+ .bsrc = a->bsrc, -+ .len = -1, -+ .pin = &a->pin, -+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */ -+ }; -+ -+ di_read_lock_parent(a->src_parent, AuLock_IR); -+ err = au_test_and_cpup_dirs(src_dentry, a->bdst); -+ if (unlikely(err)) -+ goto out; -+ -+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc); -+ err = au_pin(&a->pin, src_dentry, a->bdst, -+ au_opt_udba(src_dentry->d_sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_sio_cpup_simple(&cpg); -+ au_unpin(&a->pin); -+ -+out: -+ di_read_unlock(a->src_parent, AuLock_IR); -+ return err; -+} -+ -+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry, -+ struct au_link_args *a) -+{ -+ int err; -+ unsigned char plink; -+ aufs_bindex_t bbot; -+ struct dentry *h_src_dentry; -+ struct inode *h_inode, *inode, *delegated; -+ struct super_block *sb; -+ struct file *h_file; -+ -+ plink = 0; -+ h_inode = NULL; -+ sb = src_dentry->d_sb; -+ inode = d_inode(src_dentry); -+ if (au_ibtop(inode) <= a->bdst) -+ h_inode = au_h_iptr(inode, a->bdst); -+ if (!h_inode || !h_inode->i_nlink) { -+ /* copyup src_dentry as the name of dentry. */ -+ bbot = au_dbbot(dentry); -+ if (bbot < a->bsrc) -+ au_set_dbbot(dentry, a->bsrc); -+ au_set_h_dptr(dentry, a->bsrc, -+ dget(au_h_dptr(src_dentry, a->bsrc))); -+ dget(a->h_path.dentry); -+ au_set_h_dptr(dentry, a->bdst, NULL); -+ AuDbg("temporary d_inode...\n"); -+ spin_lock(&dentry->d_lock); -+ dentry->d_inode = d_inode(src_dentry); /* tmp */ -+ spin_unlock(&dentry->d_lock); -+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0); -+ if (IS_ERR(h_file)) -+ err = PTR_ERR(h_file); -+ else { -+ struct au_cp_generic cpg = { -+ .dentry = dentry, -+ .bdst = a->bdst, -+ .bsrc = -1, -+ .len = -1, -+ .pin = &a->pin, -+ .flags = AuCpup_KEEPLINO -+ }; -+ err = au_sio_cpup_simple(&cpg); -+ au_h_open_post(dentry, a->bsrc, h_file); -+ if (!err) { -+ dput(a->h_path.dentry); -+ a->h_path.dentry = au_h_dptr(dentry, a->bdst); -+ } else -+ au_set_h_dptr(dentry, a->bdst, -+ a->h_path.dentry); -+ } -+ spin_lock(&dentry->d_lock); -+ dentry->d_inode = NULL; /* restore */ -+ spin_unlock(&dentry->d_lock); -+ AuDbg("temporary d_inode...done\n"); -+ au_set_h_dptr(dentry, a->bsrc, NULL); -+ au_set_dbbot(dentry, bbot); -+ } else { -+ /* the inode of src_dentry already exists on a.bdst branch */ -+ h_src_dentry = d_find_alias(h_inode); -+ if (!h_src_dentry && au_plink_test(inode)) { -+ plink = 1; -+ h_src_dentry = au_plink_lkup(inode, a->bdst); -+ err = PTR_ERR(h_src_dentry); -+ if (IS_ERR(h_src_dentry)) -+ goto out; -+ -+ if (unlikely(d_is_negative(h_src_dentry))) { -+ dput(h_src_dentry); -+ h_src_dentry = NULL; -+ } -+ -+ } -+ if (h_src_dentry) { -+ delegated = NULL; -+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), -+ &a->h_path, &delegated); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal link\n"); -+ iput(delegated); -+ } -+ dput(h_src_dentry); -+ } else { -+ AuIOErr("no dentry found for hi%lu on b%d\n", -+ h_inode->i_ino, a->bdst); -+ err = -EIO; -+ } -+ } -+ -+ if (!err && !plink) -+ au_plink_append(inode, a->bdst, a->h_path.dentry); -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int aufs_link(struct dentry *src_dentry, struct inode *dir, -+ struct dentry *dentry) -+{ -+ int err, rerr; -+ struct au_dtime dt; -+ struct au_link_args *a; -+ struct dentry *wh_dentry, *h_src_dentry; -+ struct inode *inode, *delegated; -+ struct super_block *sb; -+ struct au_wr_dir_args wr_dir_args = { -+ /* .force_btgt = -1, */ -+ .flags = AuWrDir_ADD_ENTRY -+ }; -+ -+ IMustLock(dir); -+ inode = d_inode(src_dentry); -+ IMustLock(inode); -+ -+ err = -ENOMEM; -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ a->parent = dentry->d_parent; /* dir inode is locked */ -+ err = aufs_read_and_write_lock2(dentry, src_dentry, -+ AuLock_NOPLM | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_kfree; -+ err = au_d_linkable(src_dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ err = au_d_may_add(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ -+ a->src_parent = dget_parent(src_dentry); -+ wr_dir_args.force_btgt = au_ibtop(inode); -+ -+ di_write_lock_parent(a->parent); -+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin, -+ &wr_dir_args); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; -+ -+ err = 0; -+ sb = dentry->d_sb; -+ a->bdst = au_dbtop(dentry); -+ a->h_path.dentry = au_h_dptr(dentry, a->bdst); -+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst); -+ a->bsrc = au_ibtop(inode); -+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); -+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile) -+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc)); -+ if (!h_src_dentry) { -+ a->bsrc = au_dbtop(src_dentry); -+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); -+ AuDebugOn(!h_src_dentry); -+ } else if (IS_ERR(h_src_dentry)) { -+ err = PTR_ERR(h_src_dentry); -+ goto out_parent; -+ } -+ -+ /* -+ * aufs doesn't touch the credential so -+ * security_dentry_create_files_as() is unnecessary. -+ */ -+ if (au_opt_test(au_mntflags(sb), PLINK)) { -+ if (a->bdst < a->bsrc -+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) -+ err = au_cpup_or_link(src_dentry, dentry, a); -+ else { -+ delegated = NULL; -+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), -+ &a->h_path, &delegated); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal link\n"); -+ iput(delegated); -+ } -+ } -+ dput(h_src_dentry); -+ } else { -+ /* -+ * copyup src_dentry to the branch we process, -+ * and then link(2) to it. -+ */ -+ dput(h_src_dentry); -+ if (a->bdst < a->bsrc -+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) { -+ au_unpin(&a->pin); -+ di_write_unlock(a->parent); -+ err = au_cpup_before_link(src_dentry, a); -+ di_write_lock_parent(a->parent); -+ if (!err) -+ err = au_pin(&a->pin, dentry, a->bdst, -+ au_opt_udba(sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out_wh; -+ } -+ if (!err) { -+ h_src_dentry = au_h_dptr(src_dentry, a->bdst); -+ err = -ENOENT; -+ if (h_src_dentry && d_is_positive(h_src_dentry)) { -+ delegated = NULL; -+ err = vfsub_link(h_src_dentry, -+ au_pinned_h_dir(&a->pin), -+ &a->h_path, &delegated); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry" -+ " for NFSv4 delegation" -+ " for an internal link\n"); -+ iput(delegated); -+ } -+ } -+ } -+ } -+ if (unlikely(err)) -+ goto out_unpin; -+ -+ if (wh_dentry) { -+ a->h_path.dentry = wh_dentry; -+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path, -+ dentry); -+ if (unlikely(err)) -+ goto out_revert; -+ } -+ -+ au_dir_ts(dir, a->bdst); -+ inode_inc_iversion(dir); -+ inc_nlink(inode); -+ inode->i_ctime = dir->i_ctime; -+ d_instantiate(dentry, au_igrab(inode)); -+ if (d_unhashed(a->h_path.dentry)) -+ /* some filesystem calls d_drop() */ -+ d_drop(dentry); -+ /* some filesystems consume an inode even hardlink */ -+ au_fhsm_wrote(sb, a->bdst, /*force*/0); -+ goto out_unpin; /* success */ -+ -+out_revert: -+ /* no delegation since it is just created */ -+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, -+ /*delegated*/NULL, /*force*/0); -+ if (unlikely(rerr)) { -+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr); -+ err = -EIO; -+ } -+ au_dtime_revert(&dt); -+out_unpin: -+ au_unpin(&a->pin); -+out_wh: -+ dput(wh_dentry); -+out_parent: -+ di_write_unlock(a->parent); -+ dput(a->src_parent); -+out_unlock: -+ if (unlikely(err)) { -+ au_update_dbtop(dentry); -+ d_drop(dentry); -+ } -+ aufs_read_and_write_unlock2(dentry, src_dentry); -+out_kfree: -+ au_kfree_rcu(a); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int aufs_mkdir(struct user_namespace *userns, struct inode *dir, -+ struct dentry *dentry, umode_t mode) -+{ -+ int err, rerr; -+ aufs_bindex_t bindex; -+ unsigned char diropq; -+ struct path h_path; -+ struct dentry *wh_dentry, *parent, *opq_dentry; -+ struct inode *h_inode; -+ struct super_block *sb; -+ struct { -+ struct au_pin pin; -+ struct au_dtime dt; -+ } *a; /* reduce the stack usage */ -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR -+ }; -+ -+ IMustLock(dir); -+ -+ err = -ENOMEM; -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_free; -+ err = au_d_may_add(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL, -+ &a->pin, &wr_dir_args); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; -+ -+ sb = dentry->d_sb; -+ bindex = au_dbtop(dentry); -+ h_path.dentry = au_h_dptr(dentry, bindex); -+ h_path.mnt = au_sbr_mnt(sb, bindex); -+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode); -+ if (unlikely(err)) -+ goto out_unpin; -+ -+ /* make the dir opaque */ -+ diropq = 0; -+ h_inode = d_inode(h_path.dentry); -+ if (wh_dentry -+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) { -+ inode_lock_nested(h_inode, AuLsc_I_CHILD); -+ opq_dentry = au_diropq_create(dentry, bindex); -+ inode_unlock(h_inode); -+ err = PTR_ERR(opq_dentry); -+ if (IS_ERR(opq_dentry)) -+ goto out_dir; -+ dput(opq_dentry); -+ diropq = 1; -+ } -+ -+ err = epilog(dir, bindex, wh_dentry, dentry); -+ if (!err) { -+ inc_nlink(dir); -+ goto out_unpin; /* success */ -+ } -+ -+ /* revert */ -+ if (diropq) { -+ AuLabel(revert opq); -+ inode_lock_nested(h_inode, AuLsc_I_CHILD); -+ rerr = au_diropq_remove(dentry, bindex); -+ inode_unlock(h_inode); -+ if (rerr) { -+ AuIOErr("%pd reverting diropq failed(%d, %d)\n", -+ dentry, err, rerr); -+ err = -EIO; -+ } -+ } -+ -+out_dir: -+ AuLabel(revert dir); -+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path); -+ if (rerr) { -+ AuIOErr("%pd reverting dir failed(%d, %d)\n", -+ dentry, err, rerr); -+ err = -EIO; -+ } -+ au_dtime_revert(&a->dt); -+out_unpin: -+ au_unpin(&a->pin); -+ dput(wh_dentry); -+out_parent: -+ di_write_unlock(parent); -+out_unlock: -+ if (unlikely(err)) { -+ au_update_dbtop(dentry); -+ d_drop(dentry); -+ } -+ aufs_read_unlock(dentry, AuLock_DW); -+out_free: -+ au_kfree_rcu(a); -+out: -+ return err; -+} -diff -ruN a/fs/aufs/i_op.c b/fs/aufs/i_op.c ---- a/fs/aufs/i_op.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/i_op.c 2022-03-30 02:33:23.782731022 +0300 +diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c +new file mode 100644 +index 000000000000..9fe18fc0f239 +--- /dev/null ++++ b/fs/aufs/i_op.c @@ -0,0 +1,1516 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -22433,9 +20136,958 @@ diff -ruN a/fs/aufs/i_op.c b/fs/aufs/i_op.c + .update_time = aufs_update_time + } +}; -diff -ruN a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c ---- a/fs/aufs/i_op_del.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/i_op_del.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c +new file mode 100644 +index 000000000000..5c512f3403e2 +--- /dev/null ++++ b/fs/aufs/i_op_add.c +@@ -0,0 +1,941 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2005-2021 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++/* ++ * inode operations (add entry) ++ */ ++ ++#include ++#include "aufs.h" ++ ++/* ++ * final procedure of adding a new entry, except link(2). ++ * remove whiteout, instantiate, copyup the parent dir's times and size ++ * and update version. ++ * if it failed, re-create the removed whiteout. ++ */ ++static int epilog(struct inode *dir, aufs_bindex_t bindex, ++ struct dentry *wh_dentry, struct dentry *dentry) ++{ ++ int err, rerr; ++ aufs_bindex_t bwh; ++ struct path h_path; ++ struct super_block *sb; ++ struct inode *inode, *h_dir; ++ struct dentry *wh; ++ ++ bwh = -1; ++ sb = dir->i_sb; ++ if (wh_dentry) { ++ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */ ++ IMustLock(h_dir); ++ AuDebugOn(au_h_iptr(dir, bindex) != h_dir); ++ bwh = au_dbwh(dentry); ++ h_path.dentry = wh_dentry; ++ h_path.mnt = au_sbr_mnt(sb, bindex); ++ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, ++ dentry); ++ if (unlikely(err)) ++ goto out; ++ } ++ ++ inode = au_new_inode(dentry, /*must_new*/1); ++ if (!IS_ERR(inode)) { ++ d_instantiate(dentry, inode); ++ dir = d_inode(dentry->d_parent); /* dir inode is locked */ ++ IMustLock(dir); ++ au_dir_ts(dir, bindex); ++ inode_inc_iversion(dir); ++ au_fhsm_wrote(sb, bindex, /*force*/0); ++ return 0; /* success */ ++ } ++ ++ err = PTR_ERR(inode); ++ if (!wh_dentry) ++ goto out; ++ ++ /* revert */ ++ /* dir inode is locked */ ++ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent); ++ rerr = PTR_ERR(wh); ++ if (IS_ERR(wh)) { ++ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", ++ dentry, err, rerr); ++ err = -EIO; ++ } else ++ dput(wh); ++ ++out: ++ return err; ++} ++ ++static int au_d_may_add(struct dentry *dentry) ++{ ++ int err; ++ ++ err = 0; ++ if (unlikely(d_unhashed(dentry))) ++ err = -ENOENT; ++ if (unlikely(d_really_is_positive(dentry))) ++ err = -EEXIST; ++ return err; ++} ++ ++/* ++ * simple tests for the adding inode operations. ++ * following the checks in vfs, plus the parent-child relationship. ++ */ ++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir) ++{ ++ int err; ++ umode_t h_mode; ++ struct dentry *h_dentry; ++ struct inode *h_inode; ++ ++ err = -ENAMETOOLONG; ++ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) ++ goto out; ++ ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (d_really_is_negative(dentry)) { ++ err = -EEXIST; ++ if (unlikely(d_is_positive(h_dentry))) ++ goto out; ++ } else { ++ /* rename(2) case */ ++ err = -EIO; ++ if (unlikely(d_is_negative(h_dentry))) ++ goto out; ++ h_inode = d_inode(h_dentry); ++ if (unlikely(!h_inode->i_nlink)) ++ goto out; ++ ++ h_mode = h_inode->i_mode; ++ if (!isdir) { ++ err = -EISDIR; ++ if (unlikely(S_ISDIR(h_mode))) ++ goto out; ++ } else if (unlikely(!S_ISDIR(h_mode))) { ++ err = -ENOTDIR; ++ goto out; ++ } ++ } ++ ++ err = 0; ++ /* expected parent dir is locked */ ++ if (unlikely(h_parent != h_dentry->d_parent)) ++ err = -EIO; ++ ++out: ++ AuTraceErr(err); ++ return err; ++} ++ ++/* ++ * initial procedure of adding a new entry. ++ * prepare writable branch and the parent dir, lock it, ++ * and lookup whiteout for the new entry. ++ */ ++static struct dentry* ++lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt, ++ struct dentry *src_dentry, struct au_pin *pin, ++ struct au_wr_dir_args *wr_dir_args) ++{ ++ struct dentry *wh_dentry, *h_parent; ++ struct super_block *sb; ++ struct au_branch *br; ++ int err; ++ unsigned int udba; ++ aufs_bindex_t bcpup; ++ ++ AuDbg("%pd\n", dentry); ++ ++ err = au_wr_dir(dentry, src_dentry, wr_dir_args); ++ bcpup = err; ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err < 0)) ++ goto out; ++ ++ sb = dentry->d_sb; ++ udba = au_opt_udba(sb); ++ err = au_pin(pin, dentry, bcpup, udba, ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; ++ ++ h_parent = au_pinned_h_parent(pin); ++ if (udba != AuOpt_UDBA_NONE ++ && au_dbtop(dentry) == bcpup) ++ err = au_may_add(dentry, bcpup, h_parent, ++ au_ftest_wrdir(wr_dir_args->flags, ISDIR)); ++ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) ++ err = -ENAMETOOLONG; ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out_unpin; ++ ++ br = au_sbr(sb, bcpup); ++ if (dt) { ++ struct path tmp = { ++ .dentry = h_parent, ++ .mnt = au_br_mnt(br) ++ }; ++ au_dtime_store(dt, au_pinned_parent(pin), &tmp); ++ } ++ ++ wh_dentry = NULL; ++ if (bcpup != au_dbwh(dentry)) ++ goto out; /* success */ ++ ++ /* ++ * ENAMETOOLONG here means that if we allowed create such name, then it ++ * would not be able to removed in the future. So we don't allow such ++ * name here and we don't handle ENAMETOOLONG differently here. ++ */ ++ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br); ++ ++out_unpin: ++ if (IS_ERR(wh_dentry)) ++ au_unpin(pin); ++out: ++ return wh_dentry; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++enum { Mknod, Symlink, Creat }; ++struct simple_arg { ++ int type; ++ union { ++ struct { ++ umode_t mode; ++ bool want_excl; ++ bool try_aopen; ++ struct vfsub_aopen_args *aopen; ++ } c; ++ struct { ++ const char *symname; ++ } s; ++ struct { ++ umode_t mode; ++ dev_t dev; ++ } m; ++ } u; ++}; ++ ++static int add_simple(struct inode *dir, struct dentry *dentry, ++ struct simple_arg *arg) ++{ ++ int err, rerr; ++ aufs_bindex_t btop; ++ unsigned char created; ++ const unsigned char try_aopen ++ = (arg->type == Creat && arg->u.c.try_aopen); ++ struct vfsub_aopen_args *aopen = arg->u.c.aopen; ++ struct dentry *wh_dentry, *parent; ++ struct inode *h_dir; ++ struct super_block *sb; ++ struct au_branch *br; ++ /* to reduce stack size */ ++ struct { ++ struct au_dtime dt; ++ struct au_pin pin; ++ struct path h_path; ++ struct au_wr_dir_args wr_dir_args; ++ } *a; ++ ++ AuDbg("%pd\n", dentry); ++ IMustLock(dir); ++ ++ err = -ENOMEM; ++ a = kmalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) ++ goto out; ++ a->wr_dir_args.force_btgt = -1; ++ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY; ++ ++ parent = dentry->d_parent; /* dir inode is locked */ ++ if (!try_aopen) { ++ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); ++ if (unlikely(err)) ++ goto out_free; ++ } ++ err = au_d_may_add(dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ if (!try_aopen) ++ di_write_lock_parent(parent); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL, ++ &a->pin, &a->wr_dir_args); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out_parent; ++ ++ btop = au_dbtop(dentry); ++ sb = dentry->d_sb; ++ br = au_sbr(sb, btop); ++ a->h_path.dentry = au_h_dptr(dentry, btop); ++ a->h_path.mnt = au_br_mnt(br); ++ h_dir = au_pinned_h_dir(&a->pin); ++ switch (arg->type) { ++ case Creat: ++ if (!try_aopen || !h_dir->i_op->atomic_open) { ++ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode, ++ arg->u.c.want_excl); ++ created = !err; ++ if (!err && try_aopen) ++ aopen->file->f_mode |= FMODE_CREATED; ++ } else { ++ aopen->br = br; ++ err = vfsub_atomic_open(h_dir, a->h_path.dentry, aopen); ++ AuDbg("err %d\n", err); ++ AuDbgFile(aopen->file); ++ created = err >= 0 ++ && !!(aopen->file->f_mode & FMODE_CREATED); ++ } ++ break; ++ case Symlink: ++ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname); ++ created = !err; ++ break; ++ case Mknod: ++ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode, ++ arg->u.m.dev); ++ created = !err; ++ break; ++ default: ++ BUG(); ++ } ++ if (unlikely(err < 0)) ++ goto out_unpin; ++ ++ err = epilog(dir, btop, wh_dentry, dentry); ++ if (!err) ++ goto out_unpin; /* success */ ++ ++ /* revert */ ++ if (created /* && d_is_positive(a->h_path.dentry) */) { ++ /* no delegation since it is just created */ ++ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL, ++ /*force*/0); ++ if (rerr) { ++ AuIOErr("%pd revert failure(%d, %d)\n", ++ dentry, err, rerr); ++ err = -EIO; ++ } ++ au_dtime_revert(&a->dt); ++ } ++ if (try_aopen && h_dir->i_op->atomic_open ++ && (aopen->file->f_mode & FMODE_OPENED)) ++ /* aopen->file is still opened */ ++ au_lcnt_dec(&aopen->br->br_nfiles); ++ ++out_unpin: ++ au_unpin(&a->pin); ++ dput(wh_dentry); ++out_parent: ++ if (!try_aopen) ++ di_write_unlock(parent); ++out_unlock: ++ if (unlikely(err)) { ++ au_update_dbtop(dentry); ++ d_drop(dentry); ++ } ++ if (!try_aopen) ++ aufs_read_unlock(dentry, AuLock_DW); ++out_free: ++ au_kfree_rcu(a); ++out: ++ return err; ++} ++ ++int aufs_mknod(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, dev_t dev) ++{ ++ struct simple_arg arg = { ++ .type = Mknod, ++ .u.m = { ++ .mode = mode, ++ .dev = dev ++ } ++ }; ++ return add_simple(dir, dentry, &arg); ++} ++ ++int aufs_symlink(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, const char *symname) ++{ ++ struct simple_arg arg = { ++ .type = Symlink, ++ .u.s.symname = symname ++ }; ++ return add_simple(dir, dentry, &arg); ++} ++ ++int aufs_create(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, bool want_excl) ++{ ++ struct simple_arg arg = { ++ .type = Creat, ++ .u.c = { ++ .mode = mode, ++ .want_excl = want_excl ++ } ++ }; ++ return add_simple(dir, dentry, &arg); ++} ++ ++int au_aopen_or_create(struct inode *dir, struct dentry *dentry, ++ struct vfsub_aopen_args *aopen_args) ++{ ++ struct simple_arg arg = { ++ .type = Creat, ++ .u.c = { ++ .mode = aopen_args->create_mode, ++ .want_excl = aopen_args->open_flag & O_EXCL, ++ .try_aopen = true, ++ .aopen = aopen_args ++ } ++ }; ++ return add_simple(dir, dentry, &arg); ++} ++ ++int aufs_tmpfile(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode) ++{ ++ int err; ++ aufs_bindex_t bindex; ++ struct super_block *sb; ++ struct dentry *parent, *h_parent, *h_dentry; ++ struct inode *h_dir, *inode; ++ struct vfsmount *h_mnt; ++ struct user_namespace *h_userns; ++ struct au_wr_dir_args wr_dir_args = { ++ .force_btgt = -1, ++ .flags = AuWrDir_TMPFILE ++ }; ++ ++ /* copy-up may happen */ ++ inode_lock(dir); ++ ++ sb = dir->i_sb; ++ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); ++ if (unlikely(err)) ++ goto out; ++ ++ err = au_di_init(dentry); ++ if (unlikely(err)) ++ goto out_si; ++ ++ err = -EBUSY; ++ parent = d_find_any_alias(dir); ++ AuDebugOn(!parent); ++ di_write_lock_parent(parent); ++ if (unlikely(d_inode(parent) != dir)) ++ goto out_parent; ++ ++ err = au_digen_test(parent, au_sigen(sb)); ++ if (unlikely(err)) ++ goto out_parent; ++ ++ bindex = au_dbtop(parent); ++ au_set_dbtop(dentry, bindex); ++ au_set_dbbot(dentry, bindex); ++ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args); ++ bindex = err; ++ if (unlikely(err < 0)) ++ goto out_parent; ++ ++ err = -EOPNOTSUPP; ++ h_dir = au_h_iptr(dir, bindex); ++ if (unlikely(!h_dir->i_op->tmpfile)) ++ goto out_parent; ++ ++ h_mnt = au_sbr_mnt(sb, bindex); ++ err = vfsub_mnt_want_write(h_mnt); ++ if (unlikely(err)) ++ goto out_parent; ++ ++ h_userns = mnt_user_ns(h_mnt); ++ h_parent = au_h_dptr(parent, bindex); ++ h_dentry = vfs_tmpfile(h_userns, h_parent, mode, /*open_flag*/0); ++ if (IS_ERR(h_dentry)) { ++ err = PTR_ERR(h_dentry); ++ goto out_mnt; ++ } ++ ++ au_set_dbtop(dentry, bindex); ++ au_set_dbbot(dentry, bindex); ++ au_set_h_dptr(dentry, bindex, dget(h_dentry)); ++ inode = au_new_inode(dentry, /*must_new*/1); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ au_set_h_dptr(dentry, bindex, NULL); ++ au_set_dbtop(dentry, -1); ++ au_set_dbbot(dentry, -1); ++ } else { ++ if (!inode->i_nlink) ++ set_nlink(inode, 1); ++ d_tmpfile(dentry, inode); ++ au_di(dentry)->di_tmpfile = 1; ++ ++ /* update without i_mutex */ ++ if (au_ibtop(dir) == au_dbtop(dentry)) ++ au_cpup_attr_timesizes(dir); ++ } ++ dput(h_dentry); ++ ++out_mnt: ++ vfsub_mnt_drop_write(h_mnt); ++out_parent: ++ di_write_unlock(parent); ++ dput(parent); ++ di_write_unlock(dentry); ++ if (unlikely(err)) { ++ au_di_fin(dentry); ++ dentry->d_fsdata = NULL; ++ } ++out_si: ++ si_read_unlock(sb); ++out: ++ inode_unlock(dir); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct au_link_args { ++ aufs_bindex_t bdst, bsrc; ++ struct au_pin pin; ++ struct path h_path; ++ struct dentry *src_parent, *parent; ++}; ++ ++static int au_cpup_before_link(struct dentry *src_dentry, ++ struct au_link_args *a) ++{ ++ int err; ++ struct dentry *h_src_dentry; ++ struct au_cp_generic cpg = { ++ .dentry = src_dentry, ++ .bdst = a->bdst, ++ .bsrc = a->bsrc, ++ .len = -1, ++ .pin = &a->pin, ++ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */ ++ }; ++ ++ di_read_lock_parent(a->src_parent, AuLock_IR); ++ err = au_test_and_cpup_dirs(src_dentry, a->bdst); ++ if (unlikely(err)) ++ goto out; ++ ++ h_src_dentry = au_h_dptr(src_dentry, a->bsrc); ++ err = au_pin(&a->pin, src_dentry, a->bdst, ++ au_opt_udba(src_dentry->d_sb), ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (unlikely(err)) ++ goto out; ++ ++ err = au_sio_cpup_simple(&cpg); ++ au_unpin(&a->pin); ++ ++out: ++ di_read_unlock(a->src_parent, AuLock_IR); ++ return err; ++} ++ ++static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry, ++ struct au_link_args *a) ++{ ++ int err; ++ unsigned char plink; ++ aufs_bindex_t bbot; ++ struct dentry *h_src_dentry; ++ struct inode *h_inode, *inode, *delegated; ++ struct super_block *sb; ++ struct file *h_file; ++ ++ plink = 0; ++ h_inode = NULL; ++ sb = src_dentry->d_sb; ++ inode = d_inode(src_dentry); ++ if (au_ibtop(inode) <= a->bdst) ++ h_inode = au_h_iptr(inode, a->bdst); ++ if (!h_inode || !h_inode->i_nlink) { ++ /* copyup src_dentry as the name of dentry. */ ++ bbot = au_dbbot(dentry); ++ if (bbot < a->bsrc) ++ au_set_dbbot(dentry, a->bsrc); ++ au_set_h_dptr(dentry, a->bsrc, ++ dget(au_h_dptr(src_dentry, a->bsrc))); ++ dget(a->h_path.dentry); ++ au_set_h_dptr(dentry, a->bdst, NULL); ++ AuDbg("temporary d_inode...\n"); ++ spin_lock(&dentry->d_lock); ++ dentry->d_inode = d_inode(src_dentry); /* tmp */ ++ spin_unlock(&dentry->d_lock); ++ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0); ++ if (IS_ERR(h_file)) ++ err = PTR_ERR(h_file); ++ else { ++ struct au_cp_generic cpg = { ++ .dentry = dentry, ++ .bdst = a->bdst, ++ .bsrc = -1, ++ .len = -1, ++ .pin = &a->pin, ++ .flags = AuCpup_KEEPLINO ++ }; ++ err = au_sio_cpup_simple(&cpg); ++ au_h_open_post(dentry, a->bsrc, h_file); ++ if (!err) { ++ dput(a->h_path.dentry); ++ a->h_path.dentry = au_h_dptr(dentry, a->bdst); ++ } else ++ au_set_h_dptr(dentry, a->bdst, ++ a->h_path.dentry); ++ } ++ spin_lock(&dentry->d_lock); ++ dentry->d_inode = NULL; /* restore */ ++ spin_unlock(&dentry->d_lock); ++ AuDbg("temporary d_inode...done\n"); ++ au_set_h_dptr(dentry, a->bsrc, NULL); ++ au_set_dbbot(dentry, bbot); ++ } else { ++ /* the inode of src_dentry already exists on a.bdst branch */ ++ h_src_dentry = d_find_alias(h_inode); ++ if (!h_src_dentry && au_plink_test(inode)) { ++ plink = 1; ++ h_src_dentry = au_plink_lkup(inode, a->bdst); ++ err = PTR_ERR(h_src_dentry); ++ if (IS_ERR(h_src_dentry)) ++ goto out; ++ ++ if (unlikely(d_is_negative(h_src_dentry))) { ++ dput(h_src_dentry); ++ h_src_dentry = NULL; ++ } ++ ++ } ++ if (h_src_dentry) { ++ delegated = NULL; ++ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), ++ &a->h_path, &delegated); ++ if (unlikely(err == -EWOULDBLOCK)) { ++ pr_warn("cannot retry for NFSv4 delegation" ++ " for an internal link\n"); ++ iput(delegated); ++ } ++ dput(h_src_dentry); ++ } else { ++ AuIOErr("no dentry found for hi%lu on b%d\n", ++ h_inode->i_ino, a->bdst); ++ err = -EIO; ++ } ++ } ++ ++ if (!err && !plink) ++ au_plink_append(inode, a->bdst, a->h_path.dentry); ++ ++out: ++ AuTraceErr(err); ++ return err; ++} ++ ++int aufs_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry) ++{ ++ int err, rerr; ++ struct au_dtime dt; ++ struct au_link_args *a; ++ struct dentry *wh_dentry, *h_src_dentry; ++ struct inode *inode, *delegated; ++ struct super_block *sb; ++ struct au_wr_dir_args wr_dir_args = { ++ /* .force_btgt = -1, */ ++ .flags = AuWrDir_ADD_ENTRY ++ }; ++ ++ IMustLock(dir); ++ inode = d_inode(src_dentry); ++ IMustLock(inode); ++ ++ err = -ENOMEM; ++ a = kzalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) ++ goto out; ++ ++ a->parent = dentry->d_parent; /* dir inode is locked */ ++ err = aufs_read_and_write_lock2(dentry, src_dentry, ++ AuLock_NOPLM | AuLock_GEN); ++ if (unlikely(err)) ++ goto out_kfree; ++ err = au_d_linkable(src_dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ err = au_d_may_add(dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ ++ a->src_parent = dget_parent(src_dentry); ++ wr_dir_args.force_btgt = au_ibtop(inode); ++ ++ di_write_lock_parent(a->parent); ++ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin, ++ &wr_dir_args); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out_parent; ++ ++ err = 0; ++ sb = dentry->d_sb; ++ a->bdst = au_dbtop(dentry); ++ a->h_path.dentry = au_h_dptr(dentry, a->bdst); ++ a->h_path.mnt = au_sbr_mnt(sb, a->bdst); ++ a->bsrc = au_ibtop(inode); ++ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); ++ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile) ++ h_src_dentry = dget(au_hi_wh(inode, a->bsrc)); ++ if (!h_src_dentry) { ++ a->bsrc = au_dbtop(src_dentry); ++ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); ++ AuDebugOn(!h_src_dentry); ++ } else if (IS_ERR(h_src_dentry)) { ++ err = PTR_ERR(h_src_dentry); ++ goto out_parent; ++ } ++ ++ /* ++ * aufs doesn't touch the credential so ++ * security_dentry_create_files_as() is unnecessary. ++ */ ++ if (au_opt_test(au_mntflags(sb), PLINK)) { ++ if (a->bdst < a->bsrc ++ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) ++ err = au_cpup_or_link(src_dentry, dentry, a); ++ else { ++ delegated = NULL; ++ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), ++ &a->h_path, &delegated); ++ if (unlikely(err == -EWOULDBLOCK)) { ++ pr_warn("cannot retry for NFSv4 delegation" ++ " for an internal link\n"); ++ iput(delegated); ++ } ++ } ++ dput(h_src_dentry); ++ } else { ++ /* ++ * copyup src_dentry to the branch we process, ++ * and then link(2) to it. ++ */ ++ dput(h_src_dentry); ++ if (a->bdst < a->bsrc ++ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) { ++ au_unpin(&a->pin); ++ di_write_unlock(a->parent); ++ err = au_cpup_before_link(src_dentry, a); ++ di_write_lock_parent(a->parent); ++ if (!err) ++ err = au_pin(&a->pin, dentry, a->bdst, ++ au_opt_udba(sb), ++ AuPin_DI_LOCKED | AuPin_MNT_WRITE); ++ if (unlikely(err)) ++ goto out_wh; ++ } ++ if (!err) { ++ h_src_dentry = au_h_dptr(src_dentry, a->bdst); ++ err = -ENOENT; ++ if (h_src_dentry && d_is_positive(h_src_dentry)) { ++ delegated = NULL; ++ err = vfsub_link(h_src_dentry, ++ au_pinned_h_dir(&a->pin), ++ &a->h_path, &delegated); ++ if (unlikely(err == -EWOULDBLOCK)) { ++ pr_warn("cannot retry" ++ " for NFSv4 delegation" ++ " for an internal link\n"); ++ iput(delegated); ++ } ++ } ++ } ++ } ++ if (unlikely(err)) ++ goto out_unpin; ++ ++ if (wh_dentry) { ++ a->h_path.dentry = wh_dentry; ++ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path, ++ dentry); ++ if (unlikely(err)) ++ goto out_revert; ++ } ++ ++ au_dir_ts(dir, a->bdst); ++ inode_inc_iversion(dir); ++ inc_nlink(inode); ++ inode->i_ctime = dir->i_ctime; ++ d_instantiate(dentry, au_igrab(inode)); ++ if (d_unhashed(a->h_path.dentry)) ++ /* some filesystem calls d_drop() */ ++ d_drop(dentry); ++ /* some filesystems consume an inode even hardlink */ ++ au_fhsm_wrote(sb, a->bdst, /*force*/0); ++ goto out_unpin; /* success */ ++ ++out_revert: ++ /* no delegation since it is just created */ ++ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, ++ /*delegated*/NULL, /*force*/0); ++ if (unlikely(rerr)) { ++ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr); ++ err = -EIO; ++ } ++ au_dtime_revert(&dt); ++out_unpin: ++ au_unpin(&a->pin); ++out_wh: ++ dput(wh_dentry); ++out_parent: ++ di_write_unlock(a->parent); ++ dput(a->src_parent); ++out_unlock: ++ if (unlikely(err)) { ++ au_update_dbtop(dentry); ++ d_drop(dentry); ++ } ++ aufs_read_and_write_unlock2(dentry, src_dentry); ++out_kfree: ++ au_kfree_rcu(a); ++out: ++ AuTraceErr(err); ++ return err; ++} ++ ++int aufs_mkdir(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode) ++{ ++ int err, rerr; ++ aufs_bindex_t bindex; ++ unsigned char diropq; ++ struct path h_path; ++ struct dentry *wh_dentry, *parent, *opq_dentry; ++ struct inode *h_inode; ++ struct super_block *sb; ++ struct { ++ struct au_pin pin; ++ struct au_dtime dt; ++ } *a; /* reduce the stack usage */ ++ struct au_wr_dir_args wr_dir_args = { ++ .force_btgt = -1, ++ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR ++ }; ++ ++ IMustLock(dir); ++ ++ err = -ENOMEM; ++ a = kmalloc(sizeof(*a), GFP_NOFS); ++ if (unlikely(!a)) ++ goto out; ++ ++ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); ++ if (unlikely(err)) ++ goto out_free; ++ err = au_d_may_add(dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ ++ parent = dentry->d_parent; /* dir inode is locked */ ++ di_write_lock_parent(parent); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL, ++ &a->pin, &wr_dir_args); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out_parent; ++ ++ sb = dentry->d_sb; ++ bindex = au_dbtop(dentry); ++ h_path.dentry = au_h_dptr(dentry, bindex); ++ h_path.mnt = au_sbr_mnt(sb, bindex); ++ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode); ++ if (unlikely(err)) ++ goto out_unpin; ++ ++ /* make the dir opaque */ ++ diropq = 0; ++ h_inode = d_inode(h_path.dentry); ++ if (wh_dentry ++ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) { ++ inode_lock_nested(h_inode, AuLsc_I_CHILD); ++ opq_dentry = au_diropq_create(dentry, bindex); ++ inode_unlock(h_inode); ++ err = PTR_ERR(opq_dentry); ++ if (IS_ERR(opq_dentry)) ++ goto out_dir; ++ dput(opq_dentry); ++ diropq = 1; ++ } ++ ++ err = epilog(dir, bindex, wh_dentry, dentry); ++ if (!err) { ++ inc_nlink(dir); ++ goto out_unpin; /* success */ ++ } ++ ++ /* revert */ ++ if (diropq) { ++ AuLabel(revert opq); ++ inode_lock_nested(h_inode, AuLsc_I_CHILD); ++ rerr = au_diropq_remove(dentry, bindex); ++ inode_unlock(h_inode); ++ if (rerr) { ++ AuIOErr("%pd reverting diropq failed(%d, %d)\n", ++ dentry, err, rerr); ++ err = -EIO; ++ } ++ } ++ ++out_dir: ++ AuLabel(revert dir); ++ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path); ++ if (rerr) { ++ AuIOErr("%pd reverting dir failed(%d, %d)\n", ++ dentry, err, rerr); ++ err = -EIO; ++ } ++ au_dtime_revert(&a->dt); ++out_unpin: ++ au_unpin(&a->pin); ++ dput(wh_dentry); ++out_parent: ++ di_write_unlock(parent); ++out_unlock: ++ if (unlikely(err)) { ++ au_update_dbtop(dentry); ++ d_drop(dentry); ++ } ++ aufs_read_unlock(dentry, AuLock_DW); ++out_free: ++ au_kfree_rcu(a); ++out: ++ return err; ++} +diff --git a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c +new file mode 100644 +index 000000000000..b7b5a4f69e98 +--- /dev/null ++++ b/fs/aufs/i_op_del.c @@ -0,0 +1,522 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -22959,9 +21611,11 @@ diff -ruN a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c + AuTraceErr(err); + return err; +} -diff -ruN a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c ---- a/fs/aufs/i_op_ren.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/i_op_ren.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c +new file mode 100644 +index 000000000000..b8cf8c5dd020 +--- /dev/null ++++ b/fs/aufs/i_op_ren.c @@ -0,0 +1,1257 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -24220,212 +22874,1777 @@ diff -ruN a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c + AuTraceErr(err); + return err; +} -diff -ruN a/fs/aufs/Kconfig b/fs/aufs/Kconfig ---- a/fs/aufs/Kconfig 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/Kconfig 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,199 @@ -+# SPDX-License-Identifier: GPL-2.0 -+config AUFS_FS -+ tristate "Aufs (Advanced multi layered unification filesystem) support" -+ help -+ Aufs is a stackable unification filesystem such as Unionfs, -+ which unifies several directories and provides a merged single -+ directory. -+ In the early days, aufs was entirely re-designed and -+ re-implemented Unionfs Version 1.x series. Introducing many -+ original ideas, approaches and improvements, it becomes totally -+ different from Unionfs while keeping the basic features. +diff --git a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c +new file mode 100644 +index 000000000000..f3ee066219ae +--- /dev/null ++++ b/fs/aufs/iinfo.c +@@ -0,0 +1,286 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2005-2021 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ + -+if AUFS_FS -+choice -+ prompt "Maximum number of branches" -+ default AUFS_BRANCH_MAX_127 -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_127 -+ bool "127" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_511 -+ bool "511" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_1023 -+ bool "1023" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_32767 -+ bool "32767" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+endchoice ++/* ++ * inode private data ++ */ + -+config AUFS_SBILIST -+ bool -+ depends on AUFS_MAGIC_SYSRQ || PROC_FS -+ default y -+ help -+ Automatic configuration for internal use. -+ When aufs supports Magic SysRq or /proc, enabled automatically. ++#include "aufs.h" + -+config AUFS_HNOTIFY -+ bool "Detect direct branch access (bypassing aufs)" -+ help -+ If you want to modify files on branches directly, eg. bypassing aufs, -+ and want aufs to detect the changes of them fully, then enable this -+ option and use 'udba=notify' mount option. -+ Currently there is only one available configuration, "fsnotify". -+ It will have a negative impact to the performance. -+ See detail in aufs.5. ++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex) ++{ ++ struct inode *h_inode; ++ struct au_hinode *hinode; + -+choice -+ prompt "method" if AUFS_HNOTIFY -+ default AUFS_HFSNOTIFY -+config AUFS_HFSNOTIFY -+ bool "fsnotify" -+ select FSNOTIFY -+endchoice ++ IiMustAnyLock(inode); + -+config AUFS_EXPORT -+ bool "NFS-exportable aufs" -+ depends on EXPORTFS -+ help -+ If you want to export your mounted aufs via NFS, then enable this -+ option. There are several requirements for this configuration. -+ See detail in aufs.5. ++ hinode = au_hinode(au_ii(inode), bindex); ++ h_inode = hinode->hi_inode; ++ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); ++ return h_inode; ++} + -+config AUFS_INO_T_64 -+ bool -+ depends on AUFS_EXPORT -+ depends on 64BIT && !(ALPHA || S390) -+ default y -+ help -+ Automatic configuration for internal use. -+ /* typedef unsigned long/int __kernel_ino_t */ -+ /* alpha and s390x are int */ ++/* todo: hard/soft set? */ ++void au_hiput(struct au_hinode *hinode) ++{ ++ au_hn_free(hinode); ++ dput(hinode->hi_whdentry); ++ iput(hinode->hi_inode); ++} + -+config AUFS_XATTR -+ bool "support for XATTR/EA (including Security Labels)" -+ help -+ If your branch fs supports XATTR/EA and you want to make them -+ available in aufs too, then enable this opsion and specify the -+ branch attributes for EA. -+ See detail in aufs.5. ++unsigned int au_hi_flags(struct inode *inode, int isdir) ++{ ++ unsigned int flags; ++ const unsigned int mnt_flags = au_mntflags(inode->i_sb); + -+config AUFS_FHSM -+ bool "File-based Hierarchical Storage Management" -+ help -+ Hierarchical Storage Management (or HSM) is a well-known feature -+ in the storage world. Aufs provides this feature as file-based. -+ with multiple branches. -+ These multiple branches are prioritized, ie. the topmost one -+ should be the fastest drive and be used heavily. ++ flags = 0; ++ if (au_opt_test(mnt_flags, XINO)) ++ au_fset_hi(flags, XINO); ++ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY)) ++ au_fset_hi(flags, HNOTIFY); ++ return flags; ++} + -+config AUFS_RDU -+ bool "Readdir in userspace" -+ help -+ Aufs has two methods to provide a merged view for a directory, -+ by a user-space library and by kernel-space natively. The latter -+ is always enabled but sometimes large and slow. -+ If you enable this option, install the library in aufs2-util -+ package, and set some environment variables for your readdir(3), -+ then the work will be handled in user-space which generally -+ shows better performance in most cases. -+ See detail in aufs.5. ++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode, unsigned int flags) ++{ ++ struct au_hinode *hinode; ++ struct inode *hi; ++ struct au_iinfo *iinfo = au_ii(inode); + -+config AUFS_DIRREN -+ bool "Workaround for rename(2)-ing a directory" -+ help -+ By default, aufs returns EXDEV error in renameing a dir who has -+ his child on the lower branch, since it is a bad idea to issue -+ rename(2) internally for every lower branch. But user may not -+ accept this behaviour. So here is a workaround to allow such -+ rename(2) and store some extra infromation on the writable -+ branch. Obviously this costs high (and I don't like it). -+ To use this feature, you need to enable this configuration AND -+ to specify the mount option `dirren.' -+ See details in aufs.5 and the design documents. ++ IiMustWriteLock(inode); + -+config AUFS_SHWH -+ bool "Show whiteouts" -+ help -+ If you want to make the whiteouts in aufs visible, then enable -+ this option and specify 'shwh' mount option. Although it may -+ sounds like philosophy or something, but in technically it -+ simply shows the name of whiteout with keeping its behaviour. ++ hinode = au_hinode(iinfo, bindex); ++ hi = hinode->hi_inode; ++ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); + -+config AUFS_BR_RAMFS -+ bool "Ramfs (initramfs/rootfs) as an aufs branch" -+ help -+ If you want to use ramfs as an aufs branch fs, then enable this -+ option. Generally tmpfs is recommended. -+ Aufs prohibited them to be a branch fs by default, because -+ initramfs becomes unusable after switch_root or something -+ generally. If you sets initramfs as an aufs branch and boot your -+ system by switch_root, you will meet a problem easily since the -+ files in initramfs may be inaccessible. -+ Unless you are going to use ramfs as an aufs branch fs without -+ switch_root or something, leave it N. ++ if (hi) ++ au_hiput(hinode); ++ hinode->hi_inode = h_inode; ++ if (h_inode) { ++ int err; ++ struct super_block *sb = inode->i_sb; ++ struct au_branch *br; + -+config AUFS_BR_FUSE -+ bool "Fuse fs as an aufs branch" -+ depends on FUSE_FS -+ select AUFS_POLL -+ help -+ If you want to use fuse-based userspace filesystem as an aufs -+ branch fs, then enable this option. -+ It implements the internal poll(2) operation which is -+ implemented by fuse only (curretnly). ++ AuDebugOn(inode->i_mode ++ && (h_inode->i_mode & S_IFMT) ++ != (inode->i_mode & S_IFMT)); ++ if (bindex == iinfo->ii_btop) ++ au_cpup_igen(inode, h_inode); ++ br = au_sbr(sb, bindex); ++ hinode->hi_id = br->br_id; ++ if (au_ftest_hi(flags, XINO)) { ++ err = au_xino_write(sb, bindex, h_inode->i_ino, ++ inode->i_ino); ++ if (unlikely(err)) ++ AuIOErr1("failed au_xino_write() %d\n", err); ++ } + -+config AUFS_POLL -+ bool -+ help -+ Automatic configuration for internal use. ++ if (au_ftest_hi(flags, HNOTIFY) ++ && au_br_hnotifyable(br->br_perm)) { ++ err = au_hn_alloc(hinode, inode); ++ if (unlikely(err)) ++ AuIOErr1("au_hn_alloc() %d\n", err); ++ } ++ } ++} + -+config AUFS_BR_HFSPLUS -+ bool "Hfsplus as an aufs branch" -+ depends on HFSPLUS_FS -+ default y -+ help -+ If you want to use hfsplus fs as an aufs branch fs, then enable -+ this option. This option introduces a small overhead at -+ copying-up a file on hfsplus. ++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_wh) ++{ ++ struct au_hinode *hinode; + -+config AUFS_BDEV_LOOP -+ bool -+ depends on BLK_DEV_LOOP -+ default y -+ help -+ Automatic configuration for internal use. -+ Convert =[ym] into =y. ++ IiMustWriteLock(inode); + -+config AUFS_DEBUG -+ bool "Debug aufs" -+ help -+ Enable this to compile aufs internal debug code. -+ It will have a negative impact to the performance. ++ hinode = au_hinode(au_ii(inode), bindex); ++ AuDebugOn(hinode->hi_whdentry); ++ hinode->hi_whdentry = h_wh; ++} + -+config AUFS_MAGIC_SYSRQ -+ bool -+ depends on AUFS_DEBUG && MAGIC_SYSRQ -+ default y -+ help -+ Automatic configuration for internal use. -+ When aufs supports Magic SysRq, enabled automatically. -+endif -diff -ruN a/fs/aufs/lcnt.h b/fs/aufs/lcnt.h ---- a/fs/aufs/lcnt.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/lcnt.h 2022-03-09 15:19:00.000000000 +0300 ++void au_update_iigen(struct inode *inode, int half) ++{ ++ struct au_iinfo *iinfo; ++ struct au_iigen *iigen; ++ unsigned int sigen; ++ ++ sigen = au_sigen(inode->i_sb); ++ iinfo = au_ii(inode); ++ iigen = &iinfo->ii_generation; ++ spin_lock(&iigen->ig_spin); ++ iigen->ig_generation = sigen; ++ if (half) ++ au_ig_fset(iigen->ig_flags, HALF_REFRESHED); ++ else ++ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED); ++ spin_unlock(&iigen->ig_spin); ++} ++ ++/* it may be called at remount time, too */ ++void au_update_ibrange(struct inode *inode, int do_put_zero) ++{ ++ struct au_iinfo *iinfo; ++ aufs_bindex_t bindex, bbot; ++ ++ AuDebugOn(au_is_bad_inode(inode)); ++ IiMustWriteLock(inode); ++ ++ iinfo = au_ii(inode); ++ if (do_put_zero && iinfo->ii_btop >= 0) { ++ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; ++ bindex++) { ++ struct inode *h_i; ++ ++ h_i = au_hinode(iinfo, bindex)->hi_inode; ++ if (h_i ++ && !h_i->i_nlink ++ && !(h_i->i_state & I_LINKABLE)) ++ au_set_h_iptr(inode, bindex, NULL, 0); ++ } ++ } ++ ++ iinfo->ii_btop = -1; ++ iinfo->ii_bbot = -1; ++ bbot = au_sbbot(inode->i_sb); ++ for (bindex = 0; bindex <= bbot; bindex++) ++ if (au_hinode(iinfo, bindex)->hi_inode) { ++ iinfo->ii_btop = bindex; ++ break; ++ } ++ if (iinfo->ii_btop >= 0) ++ for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--) ++ if (au_hinode(iinfo, bindex)->hi_inode) { ++ iinfo->ii_bbot = bindex; ++ break; ++ } ++ AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++void au_icntnr_init_once(void *_c) ++{ ++ struct au_icntnr *c = _c; ++ struct au_iinfo *iinfo = &c->iinfo; ++ ++ spin_lock_init(&iinfo->ii_generation.ig_spin); ++ au_rw_init(&iinfo->ii_rwsem); ++ inode_init_once(&c->vfs_inode); ++} ++ ++void au_hinode_init(struct au_hinode *hinode) ++{ ++ hinode->hi_inode = NULL; ++ hinode->hi_id = -1; ++ au_hn_init(hinode); ++ hinode->hi_whdentry = NULL; ++} ++ ++int au_iinfo_init(struct inode *inode) ++{ ++ struct au_iinfo *iinfo; ++ struct super_block *sb; ++ struct au_hinode *hi; ++ int nbr, i; ++ ++ sb = inode->i_sb; ++ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); ++ nbr = au_sbbot(sb) + 1; ++ if (unlikely(nbr <= 0)) ++ nbr = 1; ++ hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS); ++ if (hi) { ++ au_lcnt_inc(&au_sbi(sb)->si_ninodes); ++ ++ iinfo->ii_hinode = hi; ++ for (i = 0; i < nbr; i++, hi++) ++ au_hinode_init(hi); ++ ++ iinfo->ii_generation.ig_generation = au_sigen(sb); ++ iinfo->ii_btop = -1; ++ iinfo->ii_bbot = -1; ++ iinfo->ii_vdir = NULL; ++ return 0; ++ } ++ return -ENOMEM; ++} ++ ++int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink) ++{ ++ int err, i; ++ struct au_hinode *hip; ++ ++ AuRwMustWriteLock(&iinfo->ii_rwsem); ++ ++ err = -ENOMEM; ++ hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS, ++ may_shrink); ++ if (hip) { ++ iinfo->ii_hinode = hip; ++ i = iinfo->ii_bbot + 1; ++ hip += i; ++ for (; i < nbr; i++, hip++) ++ au_hinode_init(hip); ++ err = 0; ++ } ++ ++ return err; ++} ++ ++void au_iinfo_fin(struct inode *inode) ++{ ++ struct au_iinfo *iinfo; ++ struct au_hinode *hi; ++ struct super_block *sb; ++ aufs_bindex_t bindex, bbot; ++ const unsigned char unlinked = !inode->i_nlink; ++ ++ AuDebugOn(au_is_bad_inode(inode)); ++ ++ sb = inode->i_sb; ++ au_lcnt_dec(&au_sbi(sb)->si_ninodes); ++ if (si_pid_test(sb)) ++ au_xino_delete_inode(inode, unlinked); ++ else { ++ /* ++ * it is safe to hide the dependency between sbinfo and ++ * sb->s_umount. ++ */ ++ lockdep_off(); ++ si_noflush_read_lock(sb); ++ au_xino_delete_inode(inode, unlinked); ++ si_read_unlock(sb); ++ lockdep_on(); ++ } ++ ++ iinfo = au_ii(inode); ++ if (iinfo->ii_vdir) ++ au_vdir_free(iinfo->ii_vdir); ++ ++ bindex = iinfo->ii_btop; ++ if (bindex >= 0) { ++ hi = au_hinode(iinfo, bindex); ++ bbot = iinfo->ii_bbot; ++ while (bindex++ <= bbot) { ++ if (hi->hi_inode) ++ au_hiput(hi); ++ hi++; ++ } ++ } ++ au_kfree_rcu(iinfo->ii_hinode); ++ AuRwDestroy(&iinfo->ii_rwsem); ++} +diff --git a/fs/aufs/inode.c b/fs/aufs/inode.c +new file mode 100644 +index 000000000000..5b6bacc5dc2b +--- /dev/null ++++ b/fs/aufs/inode.c +@@ -0,0 +1,531 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2005-2021 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++/* ++ * inode functions ++ */ ++ ++#include ++#include "aufs.h" ++ ++struct inode *au_igrab(struct inode *inode) ++{ ++ if (inode) { ++ AuDebugOn(!atomic_read(&inode->i_count)); ++ ihold(inode); ++ } ++ return inode; ++} ++ ++static void au_refresh_hinode_attr(struct inode *inode, int do_version) ++{ ++ au_cpup_attr_all(inode, /*force*/0); ++ au_update_iigen(inode, /*half*/1); ++ if (do_version) ++ inode_inc_iversion(inode); ++} ++ ++static int au_ii_refresh(struct inode *inode, int *update) ++{ ++ int err, e, nbr; ++ umode_t type; ++ aufs_bindex_t bindex, new_bindex; ++ struct super_block *sb; ++ struct au_iinfo *iinfo; ++ struct au_hinode *p, *q, tmp; ++ ++ AuDebugOn(au_is_bad_inode(inode)); ++ IiMustWriteLock(inode); ++ ++ *update = 0; ++ sb = inode->i_sb; ++ nbr = au_sbbot(sb) + 1; ++ type = inode->i_mode & S_IFMT; ++ iinfo = au_ii(inode); ++ err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0); ++ if (unlikely(err)) ++ goto out; ++ ++ AuDebugOn(iinfo->ii_btop < 0); ++ p = au_hinode(iinfo, iinfo->ii_btop); ++ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; ++ bindex++, p++) { ++ if (!p->hi_inode) ++ continue; ++ ++ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT)); ++ new_bindex = au_br_index(sb, p->hi_id); ++ if (new_bindex == bindex) ++ continue; ++ ++ if (new_bindex < 0) { ++ *update = 1; ++ au_hiput(p); ++ p->hi_inode = NULL; ++ continue; ++ } ++ ++ if (new_bindex < iinfo->ii_btop) ++ iinfo->ii_btop = new_bindex; ++ if (iinfo->ii_bbot < new_bindex) ++ iinfo->ii_bbot = new_bindex; ++ /* swap two lower inode, and loop again */ ++ q = au_hinode(iinfo, new_bindex); ++ tmp = *q; ++ *q = *p; ++ *p = tmp; ++ if (tmp.hi_inode) { ++ bindex--; ++ p--; ++ } ++ } ++ au_update_ibrange(inode, /*do_put_zero*/0); ++ au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */ ++ e = au_dy_irefresh(inode); ++ if (unlikely(e && !err)) ++ err = e; ++ ++out: ++ AuTraceErr(err); ++ return err; ++} ++ ++void au_refresh_iop(struct inode *inode, int force_getattr) ++{ ++ int type; ++ struct au_sbinfo *sbi = au_sbi(inode->i_sb); ++ const struct inode_operations *iop ++ = force_getattr ? aufs_iop : sbi->si_iop_array; ++ ++ if (inode->i_op == iop) ++ return; ++ ++ switch (inode->i_mode & S_IFMT) { ++ case S_IFDIR: ++ type = AuIop_DIR; ++ break; ++ case S_IFLNK: ++ type = AuIop_SYMLINK; ++ break; ++ default: ++ type = AuIop_OTHER; ++ break; ++ } ++ ++ inode->i_op = iop + type; ++ /* unnecessary smp_wmb() */ ++} ++ ++int au_refresh_hinode_self(struct inode *inode) ++{ ++ int err, update; ++ ++ err = au_ii_refresh(inode, &update); ++ if (!err) ++ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode)); ++ ++ AuTraceErr(err); ++ return err; ++} ++ ++int au_refresh_hinode(struct inode *inode, struct dentry *dentry) ++{ ++ int err, e, update; ++ unsigned int flags; ++ umode_t mode; ++ aufs_bindex_t bindex, bbot; ++ unsigned char isdir; ++ struct au_hinode *p; ++ struct au_iinfo *iinfo; ++ ++ err = au_ii_refresh(inode, &update); ++ if (unlikely(err)) ++ goto out; ++ ++ update = 0; ++ iinfo = au_ii(inode); ++ p = au_hinode(iinfo, iinfo->ii_btop); ++ mode = (inode->i_mode & S_IFMT); ++ isdir = S_ISDIR(mode); ++ flags = au_hi_flags(inode, isdir); ++ bbot = au_dbbot(dentry); ++ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) { ++ struct inode *h_i, *h_inode; ++ struct dentry *h_d; ++ ++ h_d = au_h_dptr(dentry, bindex); ++ if (!h_d || d_is_negative(h_d)) ++ continue; ++ ++ h_inode = d_inode(h_d); ++ AuDebugOn(mode != (h_inode->i_mode & S_IFMT)); ++ if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) { ++ h_i = au_h_iptr(inode, bindex); ++ if (h_i) { ++ if (h_i == h_inode) ++ continue; ++ err = -EIO; ++ break; ++ } ++ } ++ if (bindex < iinfo->ii_btop) ++ iinfo->ii_btop = bindex; ++ if (iinfo->ii_bbot < bindex) ++ iinfo->ii_bbot = bindex; ++ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags); ++ update = 1; ++ } ++ au_update_ibrange(inode, /*do_put_zero*/0); ++ e = au_dy_irefresh(inode); ++ if (unlikely(e && !err)) ++ err = e; ++ if (!err) ++ au_refresh_hinode_attr(inode, update && isdir); ++ ++out: ++ AuTraceErr(err); ++ return err; ++} ++ ++static int set_inode(struct inode *inode, struct dentry *dentry) ++{ ++ int err; ++ unsigned int flags; ++ umode_t mode; ++ aufs_bindex_t bindex, btop, btail; ++ unsigned char isdir; ++ struct dentry *h_dentry; ++ struct inode *h_inode; ++ struct au_iinfo *iinfo; ++ const struct inode_operations *iop; ++ ++ IiMustWriteLock(inode); ++ ++ err = 0; ++ isdir = 0; ++ iop = au_sbi(inode->i_sb)->si_iop_array; ++ btop = au_dbtop(dentry); ++ h_dentry = au_h_dptr(dentry, btop); ++ h_inode = d_inode(h_dentry); ++ mode = h_inode->i_mode; ++ switch (mode & S_IFMT) { ++ case S_IFREG: ++ btail = au_dbtail(dentry); ++ inode->i_op = iop + AuIop_OTHER; ++ inode->i_fop = &aufs_file_fop; ++ err = au_dy_iaop(inode, btop, h_inode); ++ if (unlikely(err)) ++ goto out; ++ break; ++ case S_IFDIR: ++ isdir = 1; ++ btail = au_dbtaildir(dentry); ++ inode->i_op = iop + AuIop_DIR; ++ inode->i_fop = &aufs_dir_fop; ++ break; ++ case S_IFLNK: ++ btail = au_dbtail(dentry); ++ inode->i_op = iop + AuIop_SYMLINK; ++ break; ++ case S_IFBLK: ++ case S_IFCHR: ++ case S_IFIFO: ++ case S_IFSOCK: ++ btail = au_dbtail(dentry); ++ inode->i_op = iop + AuIop_OTHER; ++ init_special_inode(inode, mode, h_inode->i_rdev); ++ break; ++ default: ++ AuIOErr("Unknown file type 0%o\n", mode); ++ err = -EIO; ++ goto out; ++ } ++ ++ /* do not set hnotify for whiteouted dirs (SHWH mode) */ ++ flags = au_hi_flags(inode, isdir); ++ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH) ++ && au_ftest_hi(flags, HNOTIFY) ++ && dentry->d_name.len > AUFS_WH_PFX_LEN ++ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) ++ au_fclr_hi(flags, HNOTIFY); ++ iinfo = au_ii(inode); ++ iinfo->ii_btop = btop; ++ iinfo->ii_bbot = btail; ++ for (bindex = btop; bindex <= btail; bindex++) { ++ h_dentry = au_h_dptr(dentry, bindex); ++ if (h_dentry) ++ au_set_h_iptr(inode, bindex, ++ au_igrab(d_inode(h_dentry)), flags); ++ } ++ au_cpup_attr_all(inode, /*force*/1); ++ /* ++ * to force calling aufs_get_acl() every time, ++ * do not call cache_no_acl() for aufs inode. ++ */ ++ ++out: ++ return err; ++} ++ ++/* ++ * successful returns with iinfo write_locked ++ * minus: errno ++ * zero: success, matched ++ * plus: no error, but unmatched ++ */ ++static int reval_inode(struct inode *inode, struct dentry *dentry) ++{ ++ int err; ++ unsigned int gen, igflags; ++ aufs_bindex_t bindex, bbot; ++ struct inode *h_inode, *h_dinode; ++ struct dentry *h_dentry; ++ ++ /* ++ * before this function, if aufs got any iinfo lock, it must be only ++ * one, the parent dir. ++ * it can happen by UDBA and the obsoleted inode number. ++ */ ++ err = -EIO; ++ if (unlikely(inode->i_ino == parent_ino(dentry))) ++ goto out; ++ ++ err = 1; ++ ii_write_lock_new_child(inode); ++ h_dentry = au_h_dptr(dentry, au_dbtop(dentry)); ++ h_dinode = d_inode(h_dentry); ++ bbot = au_ibbot(inode); ++ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) { ++ h_inode = au_h_iptr(inode, bindex); ++ if (!h_inode || h_inode != h_dinode) ++ continue; ++ ++ err = 0; ++ gen = au_iigen(inode, &igflags); ++ if (gen == au_digen(dentry) ++ && !au_ig_ftest(igflags, HALF_REFRESHED)) ++ break; ++ ++ /* fully refresh inode using dentry */ ++ err = au_refresh_hinode(inode, dentry); ++ if (!err) ++ au_update_iigen(inode, /*half*/0); ++ break; ++ } ++ ++ if (unlikely(err)) ++ ii_write_unlock(inode); ++out: ++ return err; ++} ++ ++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ unsigned int d_type, ino_t *ino) ++{ ++ int err, idx; ++ const int isnondir = d_type != DT_DIR; ++ ++ /* prevent hardlinked inode number from race condition */ ++ if (isnondir) { ++ err = au_xinondir_enter(sb, bindex, h_ino, &idx); ++ if (unlikely(err)) ++ goto out; ++ } ++ ++ err = au_xino_read(sb, bindex, h_ino, ino); ++ if (unlikely(err)) ++ goto out_xinondir; ++ ++ if (!*ino) { ++ err = -EIO; ++ *ino = au_xino_new_ino(sb); ++ if (unlikely(!*ino)) ++ goto out_xinondir; ++ err = au_xino_write(sb, bindex, h_ino, *ino); ++ if (unlikely(err)) ++ goto out_xinondir; ++ } ++ ++out_xinondir: ++ if (isnondir && idx >= 0) ++ au_xinondir_leave(sb, bindex, h_ino, idx); ++out: ++ return err; ++} ++ ++/* successful returns with iinfo write_locked */ ++/* todo: return with unlocked? */ ++struct inode *au_new_inode(struct dentry *dentry, int must_new) ++{ ++ struct inode *inode, *h_inode; ++ struct dentry *h_dentry; ++ struct super_block *sb; ++ ino_t h_ino, ino; ++ int err, idx, hlinked; ++ aufs_bindex_t btop; ++ ++ sb = dentry->d_sb; ++ btop = au_dbtop(dentry); ++ h_dentry = au_h_dptr(dentry, btop); ++ h_inode = d_inode(h_dentry); ++ h_ino = h_inode->i_ino; ++ hlinked = !d_is_dir(h_dentry) && h_inode->i_nlink > 1; ++ ++new_ino: ++ /* ++ * stop 'race'-ing between hardlinks under different ++ * parents. ++ */ ++ if (hlinked) { ++ err = au_xinondir_enter(sb, btop, h_ino, &idx); ++ inode = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; ++ } ++ ++ err = au_xino_read(sb, btop, h_ino, &ino); ++ inode = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out_xinondir; ++ ++ if (!ino) { ++ ino = au_xino_new_ino(sb); ++ if (unlikely(!ino)) { ++ inode = ERR_PTR(-EIO); ++ goto out_xinondir; ++ } ++ } ++ ++ AuDbg("i%lu\n", (unsigned long)ino); ++ inode = au_iget_locked(sb, ino); ++ err = PTR_ERR(inode); ++ if (IS_ERR(inode)) ++ goto out_xinondir; ++ ++ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW)); ++ if (inode->i_state & I_NEW) { ++ ii_write_lock_new_child(inode); ++ err = set_inode(inode, dentry); ++ if (!err) { ++ unlock_new_inode(inode); ++ goto out_xinondir; /* success */ ++ } ++ ++ /* ++ * iget_failed() calls iput(), but we need to call ++ * ii_write_unlock() after iget_failed(). so dirty hack for ++ * i_count. ++ */ ++ atomic_inc(&inode->i_count); ++ iget_failed(inode); ++ ii_write_unlock(inode); ++ au_xino_write(sb, btop, h_ino, /*ino*/0); ++ /* ignore this error */ ++ goto out_iput; ++ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) { ++ /* ++ * horrible race condition between lookup, readdir and copyup ++ * (or something). ++ */ ++ if (hlinked && idx >= 0) ++ au_xinondir_leave(sb, btop, h_ino, idx); ++ err = reval_inode(inode, dentry); ++ if (unlikely(err < 0)) { ++ hlinked = 0; ++ goto out_iput; ++ } ++ if (!err) ++ goto out; /* success */ ++ else if (hlinked && idx >= 0) { ++ err = au_xinondir_enter(sb, btop, h_ino, &idx); ++ if (unlikely(err)) { ++ iput(inode); ++ inode = ERR_PTR(err); ++ goto out; ++ } ++ } ++ } ++ ++ if (unlikely(au_test_fs_unique_ino(h_inode))) ++ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir," ++ " b%d, %s, %pd, hi%lu, i%lu.\n", ++ btop, au_sbtype(h_dentry->d_sb), dentry, ++ (unsigned long)h_ino, (unsigned long)ino); ++ ino = 0; ++ err = au_xino_write(sb, btop, h_ino, /*ino*/0); ++ if (!err) { ++ iput(inode); ++ if (hlinked && idx >= 0) ++ au_xinondir_leave(sb, btop, h_ino, idx); ++ goto new_ino; ++ } ++ ++out_iput: ++ iput(inode); ++ inode = ERR_PTR(err); ++out_xinondir: ++ if (hlinked && idx >= 0) ++ au_xinondir_leave(sb, btop, h_ino, idx); ++out: ++ return inode; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, ++ struct inode *inode) ++{ ++ int err; ++ struct inode *hi; ++ ++ err = au_br_rdonly(au_sbr(sb, bindex)); ++ ++ /* pseudo-link after flushed may happen out of bounds */ ++ if (!err ++ && inode ++ && au_ibtop(inode) <= bindex ++ && bindex <= au_ibbot(inode)) { ++ /* ++ * permission check is unnecessary since vfsub routine ++ * will be called later ++ */ ++ hi = au_h_iptr(inode, bindex); ++ if (hi) ++ err = IS_IMMUTABLE(hi) ? -EROFS : 0; ++ } ++ ++ return err; ++} ++ ++int au_test_h_perm(struct user_namespace *h_userns, struct inode *h_inode, ++ int mask) ++{ ++ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) ++ return 0; ++ return inode_permission(h_userns, h_inode, mask); ++} ++ ++int au_test_h_perm_sio(struct user_namespace *h_userns, struct inode *h_inode, ++ int mask) ++{ ++ if (au_test_nfs(h_inode->i_sb) ++ && (mask & MAY_WRITE) ++ && S_ISDIR(h_inode->i_mode)) ++ mask |= MAY_READ; /* force permission check */ ++ return au_test_h_perm(h_userns, h_inode, mask); ++} +diff --git a/fs/aufs/inode.h b/fs/aufs/inode.h +new file mode 100644 +index 000000000000..9d923fd3d77c +--- /dev/null ++++ b/fs/aufs/inode.h +@@ -0,0 +1,705 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright (C) 2005-2021 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++/* ++ * inode operations ++ */ ++ ++#ifndef __AUFS_INODE_H__ ++#define __AUFS_INODE_H__ ++ ++#ifdef __KERNEL__ ++ ++#include ++#include "rwsem.h" ++ ++struct vfsmount; ++ ++struct au_hnotify { ++#ifdef CONFIG_AUFS_HNOTIFY ++#ifdef CONFIG_AUFS_HFSNOTIFY ++ /* never use fsnotify_add_vfsmount_mark() */ ++ struct fsnotify_mark hn_mark; ++#endif ++ struct inode *hn_aufs_inode; /* no get/put */ ++ struct rcu_head rcu; ++#endif ++} ____cacheline_aligned_in_smp; ++ ++struct au_hinode { ++ struct inode *hi_inode; ++ aufs_bindex_t hi_id; ++#ifdef CONFIG_AUFS_HNOTIFY ++ struct au_hnotify *hi_notify; ++#endif ++ ++ /* reference to the copied-up whiteout with get/put */ ++ struct dentry *hi_whdentry; ++}; ++ ++/* ig_flags */ ++#define AuIG_HALF_REFRESHED 1 ++#define au_ig_ftest(flags, name) ((flags) & AuIG_##name) ++#define au_ig_fset(flags, name) \ ++ do { (flags) |= AuIG_##name; } while (0) ++#define au_ig_fclr(flags, name) \ ++ do { (flags) &= ~AuIG_##name; } while (0) ++ ++struct au_iigen { ++ spinlock_t ig_spin; ++ __u32 ig_generation, ig_flags; ++}; ++ ++struct au_vdir; ++struct au_iinfo { ++ struct au_iigen ii_generation; ++ struct super_block *ii_hsb1; /* no get/put */ ++ ++ struct au_rwsem ii_rwsem; ++ aufs_bindex_t ii_btop, ii_bbot; ++ __u32 ii_higen; ++ struct au_hinode *ii_hinode; ++ struct au_vdir *ii_vdir; ++}; ++ ++struct au_icntnr { ++ struct au_iinfo iinfo; ++ struct inode vfs_inode; ++ struct hlist_bl_node plink; ++ struct rcu_head rcu; ++} ____cacheline_aligned_in_smp; ++ ++/* au_pin flags */ ++#define AuPin_DI_LOCKED 1 ++#define AuPin_MNT_WRITE (1 << 1) ++#define au_ftest_pin(flags, name) ((flags) & AuPin_##name) ++#define au_fset_pin(flags, name) \ ++ do { (flags) |= AuPin_##name; } while (0) ++#define au_fclr_pin(flags, name) \ ++ do { (flags) &= ~AuPin_##name; } while (0) ++ ++struct au_pin { ++ /* input */ ++ struct dentry *dentry; ++ unsigned int udba; ++ unsigned char lsc_di, lsc_hi, flags; ++ aufs_bindex_t bindex; ++ ++ /* output */ ++ struct dentry *parent; ++ struct au_hinode *hdir; ++ struct vfsmount *h_mnt; ++ ++ /* temporary unlock/relock for copyup */ ++ struct dentry *h_dentry, *h_parent; ++ struct au_branch *br; ++ struct task_struct *task; ++}; ++ ++void au_pin_hdir_unlock(struct au_pin *p); ++int au_pin_hdir_lock(struct au_pin *p); ++int au_pin_hdir_relock(struct au_pin *p); ++void au_pin_hdir_acquire_nest(struct au_pin *p); ++void au_pin_hdir_release(struct au_pin *p); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline struct au_iinfo *au_ii(struct inode *inode) ++{ ++ BUG_ON(is_bad_inode(inode)); ++ return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* inode.c */ ++struct inode *au_igrab(struct inode *inode); ++void au_refresh_iop(struct inode *inode, int force_getattr); ++int au_refresh_hinode_self(struct inode *inode); ++int au_refresh_hinode(struct inode *inode, struct dentry *dentry); ++int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ unsigned int d_type, ino_t *ino); ++struct inode *au_new_inode(struct dentry *dentry, int must_new); ++int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, ++ struct inode *inode); ++int au_test_h_perm(struct user_namespace *h_userns, struct inode *h_inode, ++ int mask); ++int au_test_h_perm_sio(struct user_namespace *h_userns, struct inode *h_inode, ++ int mask); ++ ++static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex, ++ ino_t h_ino, unsigned int d_type, ino_t *ino) ++{ ++#ifdef CONFIG_AUFS_SHWH ++ return au_ino(sb, bindex, h_ino, d_type, ino); ++#else ++ return 0; ++#endif ++} ++ ++/* i_op.c */ ++enum { ++ AuIop_SYMLINK, ++ AuIop_DIR, ++ AuIop_OTHER, ++ AuIop_Last ++}; ++extern struct inode_operations aufs_iop[AuIop_Last], /* not const */ ++ aufs_iop_nogetattr[AuIop_Last]; ++ ++/* au_wr_dir flags */ ++#define AuWrDir_ADD_ENTRY 1 ++#define AuWrDir_ISDIR (1 << 1) ++#define AuWrDir_TMPFILE (1 << 2) ++#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name) ++#define au_fset_wrdir(flags, name) \ ++ do { (flags) |= AuWrDir_##name; } while (0) ++#define au_fclr_wrdir(flags, name) \ ++ do { (flags) &= ~AuWrDir_##name; } while (0) ++ ++struct au_wr_dir_args { ++ aufs_bindex_t force_btgt; ++ unsigned char flags; ++}; ++int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, ++ struct au_wr_dir_args *args); ++ ++struct dentry *au_pinned_h_parent(struct au_pin *pin); ++void au_pin_init(struct au_pin *pin, struct dentry *dentry, ++ aufs_bindex_t bindex, int lsc_di, int lsc_hi, ++ unsigned int udba, unsigned char flags); ++int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, ++ unsigned int udba, unsigned char flags) __must_check; ++int au_do_pin(struct au_pin *pin) __must_check; ++void au_unpin(struct au_pin *pin); ++int au_reval_for_attr(struct dentry *dentry, unsigned int sigen); ++ ++#define AuIcpup_DID_CPUP 1 ++#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name) ++#define au_fset_icpup(flags, name) \ ++ do { (flags) |= AuIcpup_##name; } while (0) ++#define au_fclr_icpup(flags, name) \ ++ do { (flags) &= ~AuIcpup_##name; } while (0) ++ ++struct au_icpup_args { ++ unsigned char flags; ++ unsigned char pin_flags; ++ aufs_bindex_t btgt; ++ unsigned int udba; ++ struct au_pin pin; ++ struct path h_path; ++ struct inode *h_inode; ++}; ++ ++int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia, ++ struct au_icpup_args *a); ++ ++int au_h_path_getattr(struct dentry *dentry, struct inode *inode, int force, ++ struct path *h_path, int locked); ++ ++/* i_op_add.c */ ++int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir); ++int aufs_mknod(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, dev_t dev); ++int aufs_symlink(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, const char *symname); ++int aufs_create(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode, bool want_excl); ++struct vfsub_aopen_args; ++int au_aopen_or_create(struct inode *dir, struct dentry *dentry, ++ struct vfsub_aopen_args *args); ++int aufs_tmpfile(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode); ++int aufs_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry); ++int aufs_mkdir(struct user_namespace *userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode); ++ ++/* i_op_del.c */ ++int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup); ++int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, int isdir); ++int aufs_unlink(struct inode *dir, struct dentry *dentry); ++int aufs_rmdir(struct inode *dir, struct dentry *dentry); ++ ++/* i_op_ren.c */ ++int au_wbr(struct dentry *dentry, aufs_bindex_t btgt); ++int aufs_rename(struct user_namespace *userns, ++ struct inode *_src_dir, struct dentry *_src_dentry, ++ struct inode *_dst_dir, struct dentry *_dst_dentry, ++ unsigned int _flags); ++ ++/* iinfo.c */ ++struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex); ++void au_hiput(struct au_hinode *hinode); ++void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_wh); ++unsigned int au_hi_flags(struct inode *inode, int isdir); ++ ++/* hinode flags */ ++#define AuHi_XINO 1 ++#define AuHi_HNOTIFY (1 << 1) ++#define au_ftest_hi(flags, name) ((flags) & AuHi_##name) ++#define au_fset_hi(flags, name) \ ++ do { (flags) |= AuHi_##name; } while (0) ++#define au_fclr_hi(flags, name) \ ++ do { (flags) &= ~AuHi_##name; } while (0) ++ ++#ifndef CONFIG_AUFS_HNOTIFY ++#undef AuHi_HNOTIFY ++#define AuHi_HNOTIFY 0 ++#endif ++ ++void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode, unsigned int flags); ++ ++void au_update_iigen(struct inode *inode, int half); ++void au_update_ibrange(struct inode *inode, int do_put_zero); ++ ++void au_icntnr_init_once(void *_c); ++void au_hinode_init(struct au_hinode *hinode); ++int au_iinfo_init(struct inode *inode); ++void au_iinfo_fin(struct inode *inode); ++int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink); ++ ++#ifdef CONFIG_PROC_FS ++/* plink.c */ ++int au_plink_maint(struct super_block *sb, int flags); ++struct au_sbinfo; ++void au_plink_maint_leave(struct au_sbinfo *sbinfo); ++int au_plink_maint_enter(struct super_block *sb); ++#ifdef CONFIG_AUFS_DEBUG ++void au_plink_list(struct super_block *sb); ++#else ++AuStubVoid(au_plink_list, struct super_block *sb) ++#endif ++int au_plink_test(struct inode *inode); ++struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex); ++void au_plink_append(struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_dentry); ++void au_plink_put(struct super_block *sb, int verbose); ++void au_plink_clean(struct super_block *sb, int verbose); ++void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id); ++#else ++AuStubInt0(au_plink_maint, struct super_block *sb, int flags); ++AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo); ++AuStubInt0(au_plink_maint_enter, struct super_block *sb); ++AuStubVoid(au_plink_list, struct super_block *sb); ++AuStubInt0(au_plink_test, struct inode *inode); ++AuStub(struct dentry *, au_plink_lkup, return NULL, ++ struct inode *inode, aufs_bindex_t bindex); ++AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex, ++ struct dentry *h_dentry); ++AuStubVoid(au_plink_put, struct super_block *sb, int verbose); ++AuStubVoid(au_plink_clean, struct super_block *sb, int verbose); ++AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id); ++#endif /* CONFIG_PROC_FS */ ++ ++#ifdef CONFIG_AUFS_XATTR ++/* xattr.c */ ++int au_cpup_xattr(struct path *h_dst, struct path *h_src, int ignore_flags, ++ unsigned int verbose); ++ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size); ++void au_xattr_init(struct super_block *sb); ++#else ++AuStubInt0(au_cpup_xattr, struct path *h_dst, struct path *h_src, ++ int ignore_flags, unsigned int verbose); ++AuStubVoid(au_xattr_init, struct super_block *sb); ++#endif ++ ++#ifdef CONFIG_FS_POSIX_ACL ++struct posix_acl *aufs_get_acl(struct inode *inode, int type, bool rcu); ++int aufs_set_acl(struct user_namespace *userns, struct inode *inode, ++ struct posix_acl *acl, int type); ++#endif ++ ++#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL) ++enum { ++ AU_XATTR_SET, ++ AU_ACL_SET ++}; ++ ++struct au_sxattr { ++ int type; ++ union { ++ struct { ++ const char *name; ++ const void *value; ++ size_t size; ++ int flags; ++ } set; ++ struct { ++ struct posix_acl *acl; ++ int type; ++ } acl_set; ++ } u; ++}; ++ssize_t au_sxattr(struct dentry *dentry, struct inode *inode, ++ struct au_sxattr *arg); ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* lock subclass for iinfo */ ++enum { ++ AuLsc_II_CHILD, /* child first */ ++ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */ ++ AuLsc_II_CHILD3, /* copyup dirs */ ++ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */ ++ AuLsc_II_PARENT2, ++ AuLsc_II_PARENT3, /* copyup dirs */ ++ AuLsc_II_NEW_CHILD ++}; ++ ++/* ++ * ii_read_lock_child, ii_write_lock_child, ++ * ii_read_lock_child2, ii_write_lock_child2, ++ * ii_read_lock_child3, ii_write_lock_child3, ++ * ii_read_lock_parent, ii_write_lock_parent, ++ * ii_read_lock_parent2, ii_write_lock_parent2, ++ * ii_read_lock_parent3, ii_write_lock_parent3, ++ * ii_read_lock_new_child, ii_write_lock_new_child, ++ */ ++#define AuReadLockFunc(name, lsc) \ ++static inline void ii_read_lock_##name(struct inode *i) \ ++{ \ ++ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ ++} ++ ++#define AuWriteLockFunc(name, lsc) \ ++static inline void ii_write_lock_##name(struct inode *i) \ ++{ \ ++ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ ++} ++ ++#define AuRWLockFuncs(name, lsc) \ ++ AuReadLockFunc(name, lsc) \ ++ AuWriteLockFunc(name, lsc) ++ ++AuRWLockFuncs(child, CHILD); ++AuRWLockFuncs(child2, CHILD2); ++AuRWLockFuncs(child3, CHILD3); ++AuRWLockFuncs(parent, PARENT); ++AuRWLockFuncs(parent2, PARENT2); ++AuRWLockFuncs(parent3, PARENT3); ++AuRWLockFuncs(new_child, NEW_CHILD); ++ ++#undef AuReadLockFunc ++#undef AuWriteLockFunc ++#undef AuRWLockFuncs ++ ++#define ii_read_unlock(i) au_rw_read_unlock(&au_ii(i)->ii_rwsem) ++#define ii_write_unlock(i) au_rw_write_unlock(&au_ii(i)->ii_rwsem) ++#define ii_downgrade_lock(i) au_rw_dgrade_lock(&au_ii(i)->ii_rwsem) ++ ++#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem) ++#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem) ++#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem) ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline void au_icntnr_init(struct au_icntnr *c) ++{ ++#ifdef CONFIG_AUFS_DEBUG ++ c->vfs_inode.i_mode = 0; ++#endif ++} ++ ++static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags) ++{ ++ unsigned int gen; ++ struct au_iinfo *iinfo; ++ struct au_iigen *iigen; ++ ++ iinfo = au_ii(inode); ++ iigen = &iinfo->ii_generation; ++ spin_lock(&iigen->ig_spin); ++ if (igflags) ++ *igflags = iigen->ig_flags; ++ gen = iigen->ig_generation; ++ spin_unlock(&iigen->ig_spin); ++ ++ return gen; ++} ++ ++/* tiny test for inode number */ ++/* tmpfs generation is too rough */ ++static inline int au_test_higen(struct inode *inode, struct inode *h_inode) ++{ ++ struct au_iinfo *iinfo; ++ ++ iinfo = au_ii(inode); ++ AuRwMustAnyLock(&iinfo->ii_rwsem); ++ return !(iinfo->ii_hsb1 == h_inode->i_sb ++ && iinfo->ii_higen == h_inode->i_generation); ++} ++ ++static inline void au_iigen_dec(struct inode *inode) ++{ ++ struct au_iinfo *iinfo; ++ struct au_iigen *iigen; ++ ++ iinfo = au_ii(inode); ++ iigen = &iinfo->ii_generation; ++ spin_lock(&iigen->ig_spin); ++ iigen->ig_generation--; ++ spin_unlock(&iigen->ig_spin); ++} ++ ++static inline int au_iigen_test(struct inode *inode, unsigned int sigen) ++{ ++ int err; ++ ++ err = 0; ++ if (unlikely(inode && au_iigen(inode, NULL) != sigen)) ++ err = -EIO; ++ ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo, ++ aufs_bindex_t bindex) ++{ ++ return iinfo->ii_hinode + bindex; ++} ++ ++static inline int au_is_bad_inode(struct inode *inode) ++{ ++ return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0)); ++} ++ ++static inline aufs_bindex_t au_ii_br_id(struct inode *inode, ++ aufs_bindex_t bindex) ++{ ++ IiMustAnyLock(inode); ++ return au_hinode(au_ii(inode), bindex)->hi_id; ++} ++ ++static inline aufs_bindex_t au_ibtop(struct inode *inode) ++{ ++ IiMustAnyLock(inode); ++ return au_ii(inode)->ii_btop; ++} ++ ++static inline aufs_bindex_t au_ibbot(struct inode *inode) ++{ ++ IiMustAnyLock(inode); ++ return au_ii(inode)->ii_bbot; ++} ++ ++static inline struct au_vdir *au_ivdir(struct inode *inode) ++{ ++ IiMustAnyLock(inode); ++ return au_ii(inode)->ii_vdir; ++} ++ ++static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex) ++{ ++ IiMustAnyLock(inode); ++ return au_hinode(au_ii(inode), bindex)->hi_whdentry; ++} ++ ++static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex) ++{ ++ IiMustWriteLock(inode); ++ au_ii(inode)->ii_btop = bindex; ++} ++ ++static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex) ++{ ++ IiMustWriteLock(inode); ++ au_ii(inode)->ii_bbot = bindex; ++} ++ ++static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir) ++{ ++ IiMustWriteLock(inode); ++ au_ii(inode)->ii_vdir = vdir; ++} ++ ++static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex) ++{ ++ IiMustAnyLock(inode); ++ return au_hinode(au_ii(inode), bindex); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline struct dentry *au_pinned_parent(struct au_pin *pin) ++{ ++ if (pin) ++ return pin->parent; ++ return NULL; ++} ++ ++static inline struct inode *au_pinned_h_dir(struct au_pin *pin) ++{ ++ if (pin && pin->hdir) ++ return pin->hdir->hi_inode; ++ return NULL; ++} ++ ++static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin) ++{ ++ if (pin) ++ return pin->hdir; ++ return NULL; ++} ++ ++static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry) ++{ ++ if (pin) ++ pin->dentry = dentry; ++} ++ ++static inline void au_pin_set_parent_lflag(struct au_pin *pin, ++ unsigned char lflag) ++{ ++ if (pin) { ++ if (lflag) ++ au_fset_pin(pin->flags, DI_LOCKED); ++ else ++ au_fclr_pin(pin->flags, DI_LOCKED); ++ } ++} ++ ++#if 0 /* reserved */ ++static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent) ++{ ++ if (pin) { ++ dput(pin->parent); ++ pin->parent = dget(parent); ++ } ++} ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct au_branch; ++#ifdef CONFIG_AUFS_HNOTIFY ++struct au_hnotify_op { ++ void (*ctl)(struct au_hinode *hinode, int do_set); ++ int (*alloc)(struct au_hinode *hinode); ++ ++ /* ++ * if it returns true, the caller should free hinode->hi_notify, ++ * otherwise ->free() frees it. ++ */ ++ int (*free)(struct au_hinode *hinode, ++ struct au_hnotify *hn) __must_check; ++ ++ void (*fin)(void); ++ int (*init)(void); ++ ++ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm); ++ void (*fin_br)(struct au_branch *br); ++ int (*init_br)(struct au_branch *br, int perm); ++}; ++ ++/* hnotify.c */ ++int au_hn_alloc(struct au_hinode *hinode, struct inode *inode); ++void au_hn_free(struct au_hinode *hinode); ++void au_hn_ctl(struct au_hinode *hinode, int do_set); ++void au_hn_reset(struct inode *inode, unsigned int flags); ++int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask, ++ const struct qstr *h_child_qstr, struct inode *h_child_inode); ++int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm); ++int au_hnotify_init_br(struct au_branch *br, int perm); ++void au_hnotify_fin_br(struct au_branch *br); ++int __init au_hnotify_init(void); ++void au_hnotify_fin(void); ++ ++/* hfsnotify.c */ ++extern const struct au_hnotify_op au_hnotify_op; ++ ++static inline ++void au_hn_init(struct au_hinode *hinode) ++{ ++ hinode->hi_notify = NULL; ++} ++ ++static inline struct au_hnotify *au_hn(struct au_hinode *hinode) ++{ ++ return hinode->hi_notify; ++} ++ ++#else ++AuStub(int, au_hn_alloc, return -EOPNOTSUPP, ++ struct au_hinode *hinode __maybe_unused, ++ struct inode *inode __maybe_unused) ++AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode) ++AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused) ++AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused, ++ int do_set __maybe_unused) ++AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused, ++ unsigned int flags __maybe_unused) ++AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused, ++ struct au_branch *br __maybe_unused, ++ int perm __maybe_unused) ++AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused, ++ int perm __maybe_unused) ++AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused) ++AuStubInt0(__init au_hnotify_init, void) ++AuStubVoid(au_hnotify_fin, void) ++AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused) ++#endif /* CONFIG_AUFS_HNOTIFY */ ++ ++static inline void au_hn_suspend(struct au_hinode *hdir) ++{ ++ au_hn_ctl(hdir, /*do_set*/0); ++} ++ ++static inline void au_hn_resume(struct au_hinode *hdir) ++{ ++ au_hn_ctl(hdir, /*do_set*/1); ++} ++ ++static inline void au_hn_inode_lock(struct au_hinode *hdir) ++{ ++ inode_lock(hdir->hi_inode); ++ au_hn_suspend(hdir); ++} ++ ++static inline void au_hn_inode_lock_nested(struct au_hinode *hdir, ++ unsigned int sc __maybe_unused) ++{ ++ inode_lock_nested(hdir->hi_inode, sc); ++ au_hn_suspend(hdir); ++} ++ ++#if 0 /* unused */ ++#include "vfsub.h" ++static inline void au_hn_inode_lock_shared_nested(struct au_hinode *hdir, ++ unsigned int sc) ++{ ++ inode_lock_shared_nested(hdir->hi_inode, sc); ++ au_hn_suspend(hdir); ++} ++#endif ++ ++static inline void au_hn_inode_unlock(struct au_hinode *hdir) ++{ ++ au_hn_resume(hdir); ++ inode_unlock(hdir->hi_inode); ++} ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_INODE_H__ */ +diff --git a/fs/aufs/ioctl.c b/fs/aufs/ioctl.c +new file mode 100644 +index 000000000000..67128f425fc4 +--- /dev/null ++++ b/fs/aufs/ioctl.c +@@ -0,0 +1,220 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (C) 2005-2021 Junjiro R. Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++/* ++ * ioctl ++ * plink-management and readdir in userspace. ++ * assist the pathconf(3) wrapper library. ++ * move-down ++ * File-based Hierarchical Storage Management. ++ */ ++ ++#include ++#include ++#include "aufs.h" ++ ++static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg) ++{ ++ int err, fd; ++ aufs_bindex_t wbi, bindex, bbot; ++ struct file *h_file; ++ struct super_block *sb; ++ struct dentry *root; ++ struct au_branch *br; ++ struct aufs_wbr_fd wbrfd = { ++ .oflags = au_dir_roflags, ++ .brid = -1 ++ }; ++ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY ++ | O_NOATIME | O_CLOEXEC; ++ ++ AuDebugOn(wbrfd.oflags & ~valid); ++ ++ if (arg) { ++ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd)); ++ if (unlikely(err)) { ++ err = -EFAULT; ++ goto out; ++ } ++ ++ err = -EINVAL; ++ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid); ++ wbrfd.oflags |= au_dir_roflags; ++ AuDbg("0%o\n", wbrfd.oflags); ++ if (unlikely(wbrfd.oflags & ~valid)) ++ goto out; ++ } ++ ++ fd = get_unused_fd_flags(0); ++ err = fd; ++ if (unlikely(fd < 0)) ++ goto out; ++ ++ h_file = ERR_PTR(-EINVAL); ++ wbi = 0; ++ br = NULL; ++ sb = path->dentry->d_sb; ++ root = sb->s_root; ++ aufs_read_lock(root, AuLock_IR); ++ bbot = au_sbbot(sb); ++ if (wbrfd.brid >= 0) { ++ wbi = au_br_index(sb, wbrfd.brid); ++ if (unlikely(wbi < 0 || wbi > bbot)) ++ goto out_unlock; ++ } ++ ++ h_file = ERR_PTR(-ENOENT); ++ br = au_sbr(sb, wbi); ++ if (!au_br_writable(br->br_perm)) { ++ if (arg) ++ goto out_unlock; ++ ++ bindex = wbi + 1; ++ wbi = -1; ++ for (; bindex <= bbot; bindex++) { ++ br = au_sbr(sb, bindex); ++ if (au_br_writable(br->br_perm)) { ++ wbi = bindex; ++ br = au_sbr(sb, wbi); ++ break; ++ } ++ } ++ } ++ AuDbg("wbi %d\n", wbi); ++ if (wbi >= 0) ++ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL, ++ /*force_wr*/0); ++ ++out_unlock: ++ aufs_read_unlock(root, AuLock_IR); ++ err = PTR_ERR(h_file); ++ if (IS_ERR(h_file)) ++ goto out_fd; ++ ++ au_lcnt_dec(&br->br_nfiles); /* cf. au_h_open() */ ++ fd_install(fd, h_file); ++ err = fd; ++ goto out; /* success */ ++ ++out_fd: ++ put_unused_fd(fd); ++out: ++ AuTraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ long err; ++ struct dentry *dentry; ++ ++ switch (cmd) { ++ case AUFS_CTL_RDU: ++ case AUFS_CTL_RDU_INO: ++ err = au_rdu_ioctl(file, cmd, arg); ++ break; ++ ++ case AUFS_CTL_WBR_FD: ++ err = au_wbr_fd(&file->f_path, (void __user *)arg); ++ break; ++ ++ case AUFS_CTL_IBUSY: ++ err = au_ibusy_ioctl(file, arg); ++ break; ++ ++ case AUFS_CTL_BRINFO: ++ err = au_brinfo_ioctl(file, arg); ++ break; ++ ++ case AUFS_CTL_FHSM_FD: ++ dentry = file->f_path.dentry; ++ if (IS_ROOT(dentry)) ++ err = au_fhsm_fd(dentry->d_sb, arg); ++ else ++ err = -ENOTTY; ++ break; ++ ++ default: ++ /* do not call the lower */ ++ AuDbg("0x%x\n", cmd); ++ err = -ENOTTY; ++ } ++ ++ AuTraceErr(err); ++ return err; ++} ++ ++long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ long err; ++ ++ switch (cmd) { ++ case AUFS_CTL_MVDOWN: ++ err = au_mvdown(file->f_path.dentry, (void __user *)arg); ++ break; ++ ++ case AUFS_CTL_WBR_FD: ++ err = au_wbr_fd(&file->f_path, (void __user *)arg); ++ break; ++ ++ default: ++ /* do not call the lower */ ++ AuDbg("0x%x\n", cmd); ++ err = -ENOTTY; ++ } ++ ++ AuTraceErr(err); ++ return err; ++} ++ ++#ifdef CONFIG_COMPAT ++long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ long err; ++ ++ switch (cmd) { ++ case AUFS_CTL_RDU: ++ case AUFS_CTL_RDU_INO: ++ err = au_rdu_compat_ioctl(file, cmd, arg); ++ break; ++ ++ case AUFS_CTL_IBUSY: ++ err = au_ibusy_compat_ioctl(file, arg); ++ break; ++ ++ case AUFS_CTL_BRINFO: ++ err = au_brinfo_compat_ioctl(file, arg); ++ break; ++ ++ default: ++ err = aufs_ioctl_dir(file, cmd, arg); ++ } ++ ++ AuTraceErr(err); ++ return err; ++} ++ ++long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg)); ++} ++#endif +diff --git a/fs/aufs/lcnt.h b/fs/aufs/lcnt.h +new file mode 100644 +index 000000000000..4ee7bec9dca3 +--- /dev/null ++++ b/fs/aufs/lcnt.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -24613,9 +24832,11 @@ diff -ruN a/fs/aufs/lcnt.h b/fs/aufs/lcnt.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_LCNT_H__ */ -diff -ruN a/fs/aufs/loop.c b/fs/aufs/loop.c ---- a/fs/aufs/loop.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/loop.c 2022-03-30 02:32:42.072758101 +0300 +diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c +new file mode 100644 +index 000000000000..6ce160f6bf8f +--- /dev/null ++++ b/fs/aufs/loop.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -24781,9 +25002,11 @@ diff -ruN a/fs/aufs/loop.c b/fs/aufs/loop.c + AuDebugOn(!f); + return f; +} -diff -ruN a/fs/aufs/loop.h b/fs/aufs/loop.h ---- a/fs/aufs/loop.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/loop.h 2022-03-30 02:32:42.072758101 +0300 +diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h +new file mode 100644 +index 000000000000..bfa5260174c1 +--- /dev/null ++++ b/fs/aufs/loop.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -24844,9 +25067,11 @@ diff -ruN a/fs/aufs/loop.h b/fs/aufs/loop.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_LOOP_H__ */ -diff -ruN a/fs/aufs/magic.mk b/fs/aufs/magic.mk ---- a/fs/aufs/magic.mk 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/magic.mk 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/magic.mk b/fs/aufs/magic.mk +new file mode 100644 +index 000000000000..7bc9eef3ffec +--- /dev/null ++++ b/fs/aufs/magic.mk @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 + @@ -24879,59 +25104,11 @@ diff -ruN a/fs/aufs/magic.mk b/fs/aufs/magic.mk +ifdef CONFIG_HFSPLUS_FS +ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b +endif -diff -ruN a/fs/aufs/Makefile b/fs/aufs/Makefile ---- a/fs/aufs/Makefile 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/Makefile 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1,46 @@ -+# SPDX-License-Identifier: GPL-2.0 -+ -+include ${src}/magic.mk -+ifeq (${CONFIG_AUFS_FS},m) -+include ${src}/conf.mk -+endif -+-include ${src}/priv_def.mk -+ -+# cf. include/linux/kernel.h -+# enable pr_debug -+ccflags-y += -DDEBUG -+# sparse requires the full pathname -+ifdef M -+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h -+else -+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h -+endif -+ -+obj-$(CONFIG_AUFS_FS) += aufs.o -+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o fsctx.o \ -+ wkq.o vfsub.o dcsub.o \ -+ cpup.o whout.o wbr_policy.o \ -+ dinfo.o dentry.o \ -+ dynop.o \ -+ finfo.o file.o f_op.o \ -+ dir.o vdir.o \ -+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \ -+ mvdown.o ioctl.o -+ -+# all are boolean -+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o -+aufs-$(CONFIG_SYSFS) += sysfs.o -+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o -+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o -+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o -+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o -+aufs-$(CONFIG_AUFS_EXPORT) += export.o -+aufs-$(CONFIG_AUFS_XATTR) += xattr.o -+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o -+aufs-$(CONFIG_AUFS_DIRREN) += dirren.o -+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o -+aufs-$(CONFIG_AUFS_POLL) += poll.o -+aufs-$(CONFIG_AUFS_RDU) += rdu.o -+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o -+aufs-$(CONFIG_AUFS_DEBUG) += debug.o -+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o -diff -ruN a/fs/aufs/module.c b/fs/aufs/module.c ---- a/fs/aufs/module.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/module.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/module.c b/fs/aufs/module.c +new file mode 100644 +index 000000000000..0d8c78c94264 +--- /dev/null ++++ b/fs/aufs/module.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -25206,9 +25383,11 @@ diff -ruN a/fs/aufs/module.c b/fs/aufs/module.c + +module_init(aufs_init); +module_exit(aufs_exit); -diff -ruN a/fs/aufs/module.h b/fs/aufs/module.h ---- a/fs/aufs/module.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/module.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/module.h b/fs/aufs/module.h +new file mode 100644 +index 000000000000..13f960e0887e +--- /dev/null ++++ b/fs/aufs/module.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -25376,9 +25555,11 @@ diff -ruN a/fs/aufs/module.h b/fs/aufs/module.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_MODULE_H__ */ -diff -ruN a/fs/aufs/mvdown.c b/fs/aufs/mvdown.c ---- a/fs/aufs/mvdown.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/mvdown.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/mvdown.c b/fs/aufs/mvdown.c +new file mode 100644 +index 000000000000..26f643adcdb2 +--- /dev/null ++++ b/fs/aufs/mvdown.c @@ -0,0 +1,706 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -26086,9 +26267,11 @@ diff -ruN a/fs/aufs/mvdown.c b/fs/aufs/mvdown.c + AuTraceErr(err); + return err; +} -diff -ruN a/fs/aufs/opts.c b/fs/aufs/opts.c ---- a/fs/aufs/opts.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/opts.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/opts.c b/fs/aufs/opts.c +new file mode 100644 +index 000000000000..703ff034c467 +--- /dev/null ++++ b/fs/aufs/opts.c @@ -0,0 +1,1032 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -27122,9 +27305,11 @@ diff -ruN a/fs/aufs/opts.c b/fs/aufs/opts.c +{ + return au_mntflags(sb) & AuOptMask_UDBA; +} -diff -ruN a/fs/aufs/opts.h b/fs/aufs/opts.h ---- a/fs/aufs/opts.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/opts.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/opts.h b/fs/aufs/opts.h +new file mode 100644 +index 000000000000..3aa8ed1ccc03 +--- /dev/null ++++ b/fs/aufs/opts.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -27389,9 +27574,11 @@ diff -ruN a/fs/aufs/opts.h b/fs/aufs/opts.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_OPTS_H__ */ -diff -ruN a/fs/aufs/plink.c b/fs/aufs/plink.c ---- a/fs/aufs/plink.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/plink.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/plink.c b/fs/aufs/plink.c +new file mode 100644 +index 000000000000..82e51a50191c +--- /dev/null ++++ b/fs/aufs/plink.c @@ -0,0 +1,516 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -27909,9 +28096,11 @@ diff -ruN a/fs/aufs/plink.c b/fs/aufs/plink.c + } + } +} -diff -ruN a/fs/aufs/poll.c b/fs/aufs/poll.c ---- a/fs/aufs/poll.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/poll.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/poll.c b/fs/aufs/poll.c +new file mode 100644 +index 000000000000..d1081a7f02f2 +--- /dev/null ++++ b/fs/aufs/poll.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -27964,9 +28153,11 @@ diff -ruN a/fs/aufs/poll.c b/fs/aufs/poll.c + AuDbg("mask 0x%x\n", mask); + return mask; +} -diff -ruN a/fs/aufs/posix_acl.c b/fs/aufs/posix_acl.c ---- a/fs/aufs/posix_acl.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/posix_acl.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/posix_acl.c b/fs/aufs/posix_acl.c +new file mode 100644 +index 000000000000..69adf2bb5212 +--- /dev/null ++++ b/fs/aufs/posix_acl.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -28079,9 +28270,11 @@ diff -ruN a/fs/aufs/posix_acl.c b/fs/aufs/posix_acl.c +out: + return err; +} -diff -ruN a/fs/aufs/procfs.c b/fs/aufs/procfs.c ---- a/fs/aufs/procfs.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/procfs.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/procfs.c b/fs/aufs/procfs.c +new file mode 100644 +index 000000000000..303558e92736 +--- /dev/null ++++ b/fs/aufs/procfs.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -28253,9 +28446,11 @@ diff -ruN a/fs/aufs/procfs.c b/fs/aufs/procfs.c +out: + return err; +} -diff -ruN a/fs/aufs/rdu.c b/fs/aufs/rdu.c ---- a/fs/aufs/rdu.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/rdu.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/rdu.c b/fs/aufs/rdu.c +new file mode 100644 +index 000000000000..54aa07ba5b54 +--- /dev/null ++++ b/fs/aufs/rdu.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -28641,9 +28836,11 @@ diff -ruN a/fs/aufs/rdu.c b/fs/aufs/rdu.c + return err; +} +#endif -diff -ruN a/fs/aufs/rwsem.h b/fs/aufs/rwsem.h ---- a/fs/aufs/rwsem.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/rwsem.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/rwsem.h b/fs/aufs/rwsem.h +new file mode 100644 +index 000000000000..57b7c6ccabef +--- /dev/null ++++ b/fs/aufs/rwsem.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -28730,9 +28927,11 @@ diff -ruN a/fs/aufs/rwsem.h b/fs/aufs/rwsem.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_RWSEM_H__ */ -diff -ruN a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c ---- a/fs/aufs/sbinfo.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/sbinfo.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c +new file mode 100644 +index 000000000000..0169428a4298 +--- /dev/null ++++ b/fs/aufs/sbinfo.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -29050,9 +29249,11 @@ diff -ruN a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c + di_write_unlock2(d1, d2); + si_read_unlock(d1->d_sb); +} -diff -ruN a/fs/aufs/super.c b/fs/aufs/super.c ---- a/fs/aufs/super.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/super.c 2022-03-30 02:32:42.072758101 +0300 +diff --git a/fs/aufs/super.c b/fs/aufs/super.c +new file mode 100644 +index 000000000000..3756bbc49727 +--- /dev/null ++++ b/fs/aufs/super.c @@ -0,0 +1,874 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -29928,9 +30129,11 @@ diff -ruN a/fs/aufs/super.c b/fs/aufs/super.c + /* no need to __module_get() and module_put(). */ + .owner = THIS_MODULE, +}; -diff -ruN a/fs/aufs/super.h b/fs/aufs/super.h ---- a/fs/aufs/super.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/super.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/super.h b/fs/aufs/super.h +new file mode 100644 +index 000000000000..9f7ff1953185 +--- /dev/null ++++ b/fs/aufs/super.h @@ -0,0 +1,592 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -30524,9 +30727,11 @@ diff -ruN a/fs/aufs/super.h b/fs/aufs/super.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_SUPER_H__ */ -diff -ruN a/fs/aufs/sysaufs.c b/fs/aufs/sysaufs.c ---- a/fs/aufs/sysaufs.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/sysaufs.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/sysaufs.c b/fs/aufs/sysaufs.c +new file mode 100644 +index 000000000000..00d53dea4bf9 +--- /dev/null ++++ b/fs/aufs/sysaufs.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -30621,9 +30826,11 @@ diff -ruN a/fs/aufs/sysaufs.c b/fs/aufs/sysaufs.c +out: + return err; +} -diff -ruN a/fs/aufs/sysaufs.h b/fs/aufs/sysaufs.h ---- a/fs/aufs/sysaufs.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/sysaufs.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/sysaufs.h b/fs/aufs/sysaufs.h +new file mode 100644 +index 000000000000..32b5c2cb6a7d +--- /dev/null ++++ b/fs/aufs/sysaufs.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -30727,9 +30934,11 @@ diff -ruN a/fs/aufs/sysaufs.h b/fs/aufs/sysaufs.h + +#endif /* __KERNEL__ */ +#endif /* __SYSAUFS_H__ */ -diff -ruN a/fs/aufs/sysfs.c b/fs/aufs/sysfs.c ---- a/fs/aufs/sysfs.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/sysfs.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/sysfs.c b/fs/aufs/sysfs.c +new file mode 100644 +index 000000000000..519eb1fc6a7a +--- /dev/null ++++ b/fs/aufs/sysfs.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -31105,9 +31314,11 @@ diff -ruN a/fs/aufs/sysfs.c b/fs/aufs/sysfs.c + } + } +} -diff -ruN a/fs/aufs/sysrq.c b/fs/aufs/sysrq.c ---- a/fs/aufs/sysrq.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/sysrq.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/sysrq.c b/fs/aufs/sysrq.c +new file mode 100644 +index 000000000000..b801bf162d26 +--- /dev/null ++++ b/fs/aufs/sysrq.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -31258,9 +31469,11 @@ diff -ruN a/fs/aufs/sysrq.c b/fs/aufs/sysrq.c + if (unlikely(err)) + pr_err("err %d (ignored)\n", err); +} -diff -ruN a/fs/aufs/vdir.c b/fs/aufs/vdir.c ---- a/fs/aufs/vdir.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/vdir.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/vdir.c b/fs/aufs/vdir.c +new file mode 100644 +index 000000000000..a3f709ee7475 +--- /dev/null ++++ b/fs/aufs/vdir.c @@ -0,0 +1,896 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -32158,9 +32371,11 @@ diff -ruN a/fs/aufs/vdir.c b/fs/aufs/vdir.c + /* smp_mb(); */ + return 0; +} -diff -ruN a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c ---- a/fs/aufs/vfsub.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/vfsub.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c +new file mode 100644 +index 000000000000..ac71509ad602 +--- /dev/null ++++ b/fs/aufs/vfsub.c @@ -0,0 +1,918 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -33080,9 +33295,11 @@ diff -ruN a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c + + return err; +} -diff -ruN a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h ---- a/fs/aufs/vfsub.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/vfsub.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h +new file mode 100644 +index 000000000000..0c76f421b95f +--- /dev/null ++++ b/fs/aufs/vfsub.h @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -33442,9 +33659,11 @@ diff -ruN a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_VFSUB_H__ */ -diff -ruN a/fs/aufs/wbr_policy.c b/fs/aufs/wbr_policy.c ---- a/fs/aufs/wbr_policy.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/wbr_policy.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/wbr_policy.c b/fs/aufs/wbr_policy.c +new file mode 100644 +index 000000000000..309ae56b609b +--- /dev/null ++++ b/fs/aufs/wbr_policy.c @@ -0,0 +1,830 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -34276,9 +34495,11 @@ diff -ruN a/fs/aufs/wbr_policy.c b/fs/aufs/wbr_policy.c + .fin = au_wbr_create_fin_mfs + } +}; -diff -ruN a/fs/aufs/whout.c b/fs/aufs/whout.c ---- a/fs/aufs/whout.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/whout.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/whout.c b/fs/aufs/whout.c +new file mode 100644 +index 000000000000..a6d30f64b0c0 +--- /dev/null ++++ b/fs/aufs/whout.c @@ -0,0 +1,1072 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -35352,9 +35573,11 @@ diff -ruN a/fs/aufs/whout.c b/fs/aufs/whout.c + au_whtmp_rmdir_free(args); + } +} -diff -ruN a/fs/aufs/whout.h b/fs/aufs/whout.h ---- a/fs/aufs/whout.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/whout.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/whout.h b/fs/aufs/whout.h +new file mode 100644 +index 000000000000..d55c4f9a9659 +--- /dev/null ++++ b/fs/aufs/whout.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -35443,9 +35666,11 @@ diff -ruN a/fs/aufs/whout.h b/fs/aufs/whout.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_WHOUT_H__ */ -diff -ruN a/fs/aufs/wkq.c b/fs/aufs/wkq.c ---- a/fs/aufs/wkq.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/wkq.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/wkq.c b/fs/aufs/wkq.c +new file mode 100644 +index 000000000000..12b306f185d9 +--- /dev/null ++++ b/fs/aufs/wkq.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -35819,9 +36044,11 @@ diff -ruN a/fs/aufs/wkq.c b/fs/aufs/wkq.c + + return err; +} -diff -ruN a/fs/aufs/wkq.h b/fs/aufs/wkq.h ---- a/fs/aufs/wkq.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/wkq.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/wkq.h b/fs/aufs/wkq.h +new file mode 100644 +index 000000000000..d7d200e607bd +--- /dev/null ++++ b/fs/aufs/wkq.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* @@ -35912,9 +36139,11 @@ diff -ruN a/fs/aufs/wkq.h b/fs/aufs/wkq.h + +#endif /* __KERNEL__ */ +#endif /* __AUFS_WKQ_H__ */ -diff -ruN a/fs/aufs/xattr.c b/fs/aufs/xattr.c ---- a/fs/aufs/xattr.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/xattr.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/xattr.c b/fs/aufs/xattr.c +new file mode 100644 +index 000000000000..4839dd8f775e +--- /dev/null ++++ b/fs/aufs/xattr.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -36284,9 +36513,11 @@ diff -ruN a/fs/aufs/xattr.c b/fs/aufs/xattr.c +{ + sb->s_xattr = au_xattr_handlers; +} -diff -ruN a/fs/aufs/xino.c b/fs/aufs/xino.c ---- a/fs/aufs/xino.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/fs/aufs/xino.c 2022-03-09 15:19:00.000000000 +0300 +diff --git a/fs/aufs/xino.c b/fs/aufs/xino.c +new file mode 100644 +index 000000000000..4e7e39c98edf +--- /dev/null ++++ b/fs/aufs/xino.c @@ -0,0 +1,1926 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -38214,10 +38445,11 @@ diff -ruN a/fs/aufs/xino.c b/fs/aufs/xino.c +out: + return err; +} -diff -ruN a/fs/dcache.c b/fs/dcache.c ---- a/fs/dcache.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/dcache.c 2022-03-30 02:32:38.413760236 +0300 -@@ -1320,7 +1320,7 @@ +diff --git a/fs/dcache.c b/fs/dcache.c +index cf871a81f4fd..9508bd57a3bc 100644 +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -1320,7 +1320,7 @@ enum d_walk_ret { * * The @enter() callbacks are called with d_lock held. */ @@ -38226,7 +38458,7 @@ diff -ruN a/fs/dcache.c b/fs/dcache.c enum d_walk_ret (*enter)(void *, struct dentry *)) { struct dentry *this_parent; -@@ -1425,6 +1425,7 @@ +@@ -1425,6 +1425,7 @@ static void d_walk(struct dentry *parent, void *data, seq = 1; goto again; } @@ -38234,7 +38466,7 @@ diff -ruN a/fs/dcache.c b/fs/dcache.c struct check_mount { struct vfsmount *mnt; -@@ -2970,6 +2971,7 @@ +@@ -2970,6 +2971,7 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2) write_sequnlock(&rename_lock); } @@ -38242,10 +38474,11 @@ diff -ruN a/fs/dcache.c b/fs/dcache.c /** * d_ancestor - search for an ancestor -diff -ruN a/fs/exec.c b/fs/exec.c ---- a/fs/exec.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/exec.c 2022-03-30 02:32:38.414760235 +0300 -@@ -111,6 +111,7 @@ +diff --git a/fs/exec.c b/fs/exec.c +index 7d424337b4ec..2a997e92f0c6 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -111,6 +111,7 @@ bool path_noexec(const struct path *path) return (path->mnt->mnt_flags & MNT_NOEXEC) || (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); } @@ -38253,9 +38486,10 @@ diff -ruN a/fs/exec.c b/fs/exec.c #ifdef CONFIG_USELIB /* -diff -ruN a/fs/fcntl.c b/fs/fcntl.c ---- a/fs/fcntl.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/fcntl.c 2022-03-30 02:32:38.414760235 +0300 +diff --git a/fs/fcntl.c b/fs/fcntl.c +index 9c6c6a3e2de5..3418c60b9014 100644 +--- a/fs/fcntl.c ++++ b/fs/fcntl.c @@ -33,7 +33,7 @@ #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) @@ -38265,7 +38499,7 @@ diff -ruN a/fs/fcntl.c b/fs/fcntl.c { struct inode * inode = file_inode(filp); int error = 0; -@@ -64,6 +64,8 @@ +@@ -64,6 +64,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg) if (filp->f_op->check_flags) error = filp->f_op->check_flags(arg); @@ -38274,7 +38508,7 @@ diff -ruN a/fs/fcntl.c b/fs/fcntl.c if (error) return error; -@@ -84,6 +86,7 @@ +@@ -84,6 +86,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) out: return error; } @@ -38282,10 +38516,11 @@ diff -ruN a/fs/fcntl.c b/fs/fcntl.c static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int force) -diff -ruN a/fs/file_table.c b/fs/file_table.c ---- a/fs/file_table.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/file_table.c 2022-03-30 02:32:38.414760235 +0300 -@@ -161,6 +161,7 @@ +diff --git a/fs/file_table.c b/fs/file_table.c +index 6f297f9782fc..ec7f755127ea 100644 +--- a/fs/file_table.c ++++ b/fs/file_table.c +@@ -161,6 +161,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) } return ERR_PTR(-ENFILE); } @@ -38293,10 +38528,11 @@ diff -ruN a/fs/file_table.c b/fs/file_table.c /* * Variant of alloc_empty_file() that doesn't check and modify nr_files. -diff -ruN a/fs/inode.c b/fs/inode.c ---- a/fs/inode.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/inode.c 2022-03-30 02:33:39.114719874 +0300 -@@ -904,6 +904,8 @@ +diff --git a/fs/inode.c b/fs/inode.c +index 8279c700a2b7..ccf9f033eb68 100644 +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -903,6 +903,8 @@ unsigned int get_next_ino(void) unsigned int *p = &get_cpu_var(last_ino); unsigned int res = *p; @@ -38305,7 +38541,7 @@ diff -ruN a/fs/inode.c b/fs/inode.c #ifdef CONFIG_SMP if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { static atomic_t shared_last_ino; -@@ -916,7 +918,7 @@ +@@ -915,7 +917,7 @@ unsigned int get_next_ino(void) res++; /* get_next_ino should not provide a 0 inode number */ if (unlikely(!res)) @@ -38314,29 +38550,11 @@ diff -ruN a/fs/inode.c b/fs/inode.c *p = res; put_cpu_var(last_ino); return res; -diff -ruN a/fs/Kconfig b/fs/Kconfig ---- a/fs/Kconfig 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/Kconfig 2022-03-30 02:33:03.590744749 +0300 -@@ -312,6 +312,7 @@ - source "fs/ufs/Kconfig" - source "fs/erofs/Kconfig" - source "fs/vboxsf/Kconfig" -+source "fs/aufs/Kconfig" - - endif # MISC_FILESYSTEMS - -diff -ruN a/fs/Makefile b/fs/Makefile ---- a/fs/Makefile 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/Makefile 2022-03-30 02:33:03.591744749 +0300 -@@ -138,3 +138,4 @@ - obj-$(CONFIG_EROFS_FS) += erofs/ - obj-$(CONFIG_VBOXSF_FS) += vboxsf/ - obj-$(CONFIG_ZONEFS_FS) += zonefs/ -+obj-$(CONFIG_AUFS_FS) += aufs/ -diff -ruN a/fs/namespace.c b/fs/namespace.c ---- a/fs/namespace.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/namespace.c 2022-03-30 02:32:38.415760235 +0300 -@@ -439,6 +439,7 @@ +diff --git a/fs/namespace.c b/fs/namespace.c +index d946298691ed..6a31b6fddbe2 100644 +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -440,6 +440,7 @@ void __mnt_drop_write(struct vfsmount *mnt) mnt_dec_writers(real_mount(mnt)); preempt_enable(); } @@ -38344,7 +38562,7 @@ diff -ruN a/fs/namespace.c b/fs/namespace.c /** * mnt_drop_write - give up write access to a mount -@@ -808,6 +809,13 @@ +@@ -809,6 +810,13 @@ static inline int check_mnt(struct mount *mnt) return mnt->mnt_ns == current->nsproxy->mnt_ns; } @@ -38358,7 +38576,7 @@ diff -ruN a/fs/namespace.c b/fs/namespace.c /* * vfsmount lock must be held for write */ -@@ -2005,6 +2013,7 @@ +@@ -2011,6 +2019,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, } return 0; } @@ -38366,10 +38584,11 @@ diff -ruN a/fs/namespace.c b/fs/namespace.c static void lock_mnt_tree(struct mount *mnt) { -diff -ruN a/fs/notify/group.c b/fs/notify/group.c ---- a/fs/notify/group.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/notify/group.c 2022-03-30 02:32:38.415760235 +0300 -@@ -100,6 +100,7 @@ +diff --git a/fs/notify/group.c b/fs/notify/group.c +index fb89c351295d..460ad19c2570 100644 +--- a/fs/notify/group.c ++++ b/fs/notify/group.c +@@ -100,6 +100,7 @@ void fsnotify_get_group(struct fsnotify_group *group) { refcount_inc(&group->refcnt); } @@ -38377,10 +38596,11 @@ diff -ruN a/fs/notify/group.c b/fs/notify/group.c /* * Drop a reference to a group. Free it if it's through. -diff -ruN a/fs/open.c b/fs/open.c ---- a/fs/open.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/open.c 2022-03-30 02:32:38.415760235 +0300 -@@ -65,6 +65,7 @@ +diff --git a/fs/open.c b/fs/open.c +index 5e322f188e83..cb92b0da9c73 100644 +--- a/fs/open.c ++++ b/fs/open.c +@@ -66,6 +66,7 @@ int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry, inode_unlock(dentry->d_inode); return ret; } @@ -38388,10 +38608,11 @@ diff -ruN a/fs/open.c b/fs/open.c long vfs_truncate(const struct path *path, loff_t length) { -diff -ruN a/fs/proc/base.c b/fs/proc/base.c ---- a/fs/proc/base.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/proc/base.c 2022-03-30 02:33:13.307738286 +0300 -@@ -2186,7 +2186,7 @@ +diff --git a/fs/proc/base.c b/fs/proc/base.c +index 300d53ee7040..8c332d6f7dec 100644 +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -2217,7 +2217,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) rc = -ENOENT; vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { @@ -38400,10 +38621,11 @@ diff -ruN a/fs/proc/base.c b/fs/proc/base.c path_get(path); rc = 0; } -diff -ruN a/fs/proc/nommu.c b/fs/proc/nommu.c ---- a/fs/proc/nommu.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/proc/nommu.c 2022-03-30 02:33:13.307738286 +0300 -@@ -40,7 +40,10 @@ +diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c +index 13452b32e2bd..38acccfef9d4 100644 +--- a/fs/proc/nommu.c ++++ b/fs/proc/nommu.c +@@ -40,7 +40,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) file = region->vm_file; if (file) { @@ -38415,10 +38637,11 @@ diff -ruN a/fs/proc/nommu.c b/fs/proc/nommu.c dev = inode->i_sb->s_dev; ino = inode->i_ino; } -diff -ruN a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c ---- a/fs/proc/task_mmu.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/proc/task_mmu.c 2022-03-30 02:33:13.308738285 +0300 -@@ -280,7 +280,10 @@ +diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +index d9c07eecd787..ed52a7a3a15c 100644 +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -280,7 +280,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) const char *name = NULL; if (file) { @@ -38430,7 +38653,7 @@ diff -ruN a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; -@@ -1865,7 +1868,7 @@ +@@ -1889,7 +1892,7 @@ static int show_numa_map(struct seq_file *m, void *v) struct proc_maps_private *proc_priv = &numa_priv->proc_maps; struct vm_area_struct *vma = v; struct numa_maps *md = &numa_priv->md; @@ -38439,10 +38662,11 @@ diff -ruN a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c struct mm_struct *mm = vma->vm_mm; struct mempolicy *pol; char buffer[64]; -diff -ruN a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c ---- a/fs/proc/task_nommu.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/proc/task_nommu.c 2022-03-30 02:33:13.308738285 +0300 -@@ -155,7 +155,10 @@ +diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c +index a6d21fc0033c..02c2de31196e 100644 +--- a/fs/proc/task_nommu.c ++++ b/fs/proc/task_nommu.c +@@ -155,7 +155,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) file = vma->vm_file; if (file) { @@ -38454,10 +38678,11 @@ diff -ruN a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; -diff -ruN a/fs/read_write.c b/fs/read_write.c ---- a/fs/read_write.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/read_write.c 2022-03-30 02:32:38.415760235 +0300 -@@ -492,6 +492,7 @@ +diff --git a/fs/read_write.c b/fs/read_write.c +index 8d3ec975514d..8713a801dac3 100644 +--- a/fs/read_write.c ++++ b/fs/read_write.c +@@ -492,6 +492,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) inc_syscr(current); return ret; } @@ -38465,7 +38690,7 @@ diff -ruN a/fs/read_write.c b/fs/read_write.c static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { -@@ -602,6 +603,7 @@ +@@ -602,6 +603,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ file_end_write(file); return ret; } @@ -38473,10 +38698,11 @@ diff -ruN a/fs/read_write.c b/fs/read_write.c /* file_ppos returns &file->f_pos or NULL if file is stream */ static inline loff_t *file_ppos(struct file *file) -diff -ruN a/fs/splice.c b/fs/splice.c ---- a/fs/splice.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/splice.c 2022-03-30 02:32:38.415760235 +0300 -@@ -759,20 +759,21 @@ +diff --git a/fs/splice.c b/fs/splice.c +index 5dbce4dcc1a7..7c1be373eb7c 100644 +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -759,20 +759,21 @@ static int warn_unsupported(struct file *file, const char *op) /* * Attempt to initiate a splice from pipe to file. */ @@ -38503,7 +38729,7 @@ diff -ruN a/fs/splice.c b/fs/splice.c { unsigned int p_space; int ret; -@@ -795,6 +796,7 @@ +@@ -795,6 +796,7 @@ static long do_splice_to(struct file *in, loff_t *ppos, return warn_unsupported(in, "read"); return in->f_op->splice_read(in, ppos, pipe, len, flags); } @@ -38511,10 +38737,11 @@ diff -ruN a/fs/splice.c b/fs/splice.c /** * splice_direct_to_actor - splices data directly between two non-pipes -diff -ruN a/fs/xattr.c b/fs/xattr.c ---- a/fs/xattr.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/fs/xattr.c 2022-03-30 02:32:38.416760234 +0300 -@@ -384,6 +384,7 @@ +diff --git a/fs/xattr.c b/fs/xattr.c +index 7117cb253864..e9636f76ae84 100644 +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -386,6 +386,7 @@ vfs_getxattr_alloc(struct user_namespace *mnt_userns, struct dentry *dentry, *xattr_value = value; return error; } @@ -38522,10 +38749,11 @@ diff -ruN a/fs/xattr.c b/fs/xattr.c ssize_t __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, -diff -ruN a/include/linux/fs.h b/include/linux/fs.h ---- a/include/linux/fs.h 2021-10-31 23:53:10.000000000 +0300 -+++ b/include/linux/fs.h 2022-03-30 02:32:42.073758100 +0300 -@@ -1381,6 +1381,7 @@ +diff --git a/include/linux/fs.h b/include/linux/fs.h +index fd4c450dc612..2e0161c4456a 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1382,6 +1382,7 @@ extern void fasync_free(struct fasync_struct *); /* can be called from interrupts */ extern void kill_fasync(struct fasync_struct **, int, int); @@ -38533,7 +38761,7 @@ diff -ruN a/include/linux/fs.h b/include/linux/fs.h extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); extern int f_setown(struct file *filp, unsigned long arg, int force); extern void f_delown(struct file *filp); -@@ -2092,6 +2093,7 @@ +@@ -2016,6 +2017,7 @@ struct file_operations { ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); @@ -38541,7 +38769,7 @@ diff -ruN a/include/linux/fs.h b/include/linux/fs.h int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); -@@ -2224,6 +2226,10 @@ +@@ -2148,6 +2150,10 @@ struct super_operations { struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); @@ -38552,7 +38780,7 @@ diff -ruN a/include/linux/fs.h b/include/linux/fs.h }; /* -@@ -2613,6 +2619,7 @@ +@@ -2540,6 +2546,7 @@ extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); extern int generic_update_time(struct inode *, struct timespec64 *, int); @@ -38560,7 +38788,7 @@ diff -ruN a/include/linux/fs.h b/include/linux/fs.h /* /sys/fs */ extern struct kobject *fs_kobj; -@@ -2776,6 +2783,7 @@ +@@ -2718,6 +2725,7 @@ static inline bool sb_is_blkdev_sb(struct super_block *sb) } void emergency_thaw_all(void); @@ -38568,10 +38796,11 @@ diff -ruN a/include/linux/fs.h b/include/linux/fs.h extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; -diff -ruN a/include/linux/lockdep.h b/include/linux/lockdep.h ---- a/include/linux/lockdep.h 2021-10-31 23:53:10.000000000 +0300 -+++ b/include/linux/lockdep.h 2022-03-30 02:32:34.332762571 +0300 -@@ -248,6 +248,8 @@ +diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h +index aa0ecfc6cdb4..1935e4b24359 100644 +--- a/include/linux/lockdep.h ++++ b/include/linux/lockdep.h +@@ -252,6 +252,8 @@ static inline int lockdep_match_key(struct lockdep_map *lock, return lock->key == key; } @@ -38580,10 +38809,11 @@ diff -ruN a/include/linux/lockdep.h b/include/linux/lockdep.h /* * Acquire a lock. * -diff -ruN a/include/linux/mm.h b/include/linux/mm.h ---- a/include/linux/mm.h 2021-10-31 23:53:10.000000000 +0300 -+++ b/include/linux/mm.h 2022-03-30 02:33:13.308738285 +0300 -@@ -1806,6 +1806,28 @@ +diff --git a/include/linux/mm.h b/include/linux/mm.h +index e4e1817bb3b8..525d2dc20125 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1813,6 +1813,28 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, unmap_mapping_range(mapping, holebegin, holelen, 0); } @@ -38612,10 +38842,11 @@ diff -ruN a/include/linux/mm.h b/include/linux/mm.h extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, -diff -ruN a/include/linux/mm_types.h b/include/linux/mm_types.h ---- a/include/linux/mm_types.h 2021-10-31 23:53:10.000000000 +0300 -+++ b/include/linux/mm_types.h 2022-03-30 02:33:13.309738285 +0300 -@@ -294,6 +294,7 @@ +diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h +index 7f8ee09c711f..3a9a798a4ae1 100644 +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -294,6 +294,7 @@ struct vm_region { unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ @@ -38623,7 +38854,7 @@ diff -ruN a/include/linux/mm_types.h b/include/linux/mm_types.h int vm_usage; /* region usage count (access under nommu_region_sem) */ bool vm_icache_flushed : 1; /* true if the icache has been flushed for -@@ -373,6 +374,7 @@ +@@ -373,6 +374,7 @@ struct vm_area_struct { unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE units */ struct file * vm_file; /* File we map to (can be NULL). */ @@ -38631,10 +38862,11 @@ diff -ruN a/include/linux/mm_types.h b/include/linux/mm_types.h void * vm_private_data; /* was vm_pte (shared mem) */ #ifdef CONFIG_SWAP -diff -ruN a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h ---- a/include/linux/mnt_namespace.h 2021-10-31 23:53:10.000000000 +0300 -+++ b/include/linux/mnt_namespace.h 2022-03-30 02:32:34.333762571 +0300 -@@ -7,12 +7,15 @@ +diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h +index 8f882f5881e8..6b9808f09843 100644 +--- a/include/linux/mnt_namespace.h ++++ b/include/linux/mnt_namespace.h +@@ -7,12 +7,15 @@ struct mnt_namespace; struct fs_struct; struct user_namespace; struct ns_common; @@ -38650,10 +38882,11 @@ diff -ruN a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h extern const struct file_operations proc_mounts_operations; extern const struct file_operations proc_mountinfo_operations; extern const struct file_operations proc_mountstats_operations; -diff -ruN a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h ---- a/include/linux/shmem_fs.h 2021-10-31 23:53:10.000000000 +0300 -+++ b/include/linux/shmem_fs.h 2022-03-30 02:33:33.893723740 +0300 -@@ -28,10 +28,13 @@ +diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h +index 166158b6e917..0e9606171432 100644 +--- a/include/linux/shmem_fs.h ++++ b/include/linux/shmem_fs.h +@@ -28,10 +28,13 @@ struct shmem_inode_info { }; struct shmem_sb_info { @@ -38669,10 +38902,11 @@ diff -ruN a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ umode_t mode; /* Mount mode for root directory */ unsigned char huge; /* Whether to try for hugepages */ -diff -ruN a/include/linux/splice.h b/include/linux/splice.h ---- a/include/linux/splice.h 2021-10-31 23:53:10.000000000 +0300 -+++ b/include/linux/splice.h 2022-03-30 02:32:34.333762571 +0300 -@@ -93,4 +93,10 @@ +diff --git a/include/linux/splice.h b/include/linux/splice.h +index a55179fd60fc..8e21c53cf883 100644 +--- a/include/linux/splice.h ++++ b/include/linux/splice.h +@@ -93,4 +93,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *); extern const struct pipe_buf_operations page_cache_pipe_buf_ops; extern const struct pipe_buf_operations default_pipe_buf_ops; @@ -38683,9 +38917,18 @@ diff -ruN a/include/linux/splice.h b/include/linux/splice.h + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); #endif -diff -ruN a/include/uapi/linux/aufs_type.h b/include/uapi/linux/aufs_type.h ---- a/include/uapi/linux/aufs_type.h 1970-01-01 03:00:00.000000000 +0300 -+++ b/include/uapi/linux/aufs_type.h 2022-03-09 15:19:00.000000000 +0300 +diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild +new file mode 100644 +index 000000000000..e1b2bd375eaf +--- /dev/null ++++ b/include/uapi/linux/Kbuild +@@ -0,0 +1 @@ ++header-y = aufs_type.h +diff --git a/include/uapi/linux/aufs_type.h b/include/uapi/linux/aufs_type.h +new file mode 100644 +index 000000000000..6c937d529aa5 +--- /dev/null ++++ b/include/uapi/linux/aufs_type.h @@ -0,0 +1,452 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* @@ -39139,15 +39382,11 @@ diff -ruN a/include/uapi/linux/aufs_type.h b/include/uapi/linux/aufs_type.h +#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int) + +#endif /* __AUFS_TYPE_H__ */ -diff -ruN a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild ---- a/include/uapi/linux/Kbuild 1970-01-01 03:00:00.000000000 +0300 -+++ b/include/uapi/linux/Kbuild 2022-03-09 15:19:00.000000000 +0300 -@@ -0,0 +1 @@ -+header-y = aufs_type.h -diff -ruN a/kernel/fork.c b/kernel/fork.c ---- a/kernel/fork.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/kernel/fork.c 2022-03-30 02:33:13.309738285 +0300 -@@ -573,7 +573,7 @@ +diff --git a/kernel/fork.c b/kernel/fork.c +index 908ba3c93893..d85a6b319678 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -573,7 +573,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (file) { struct address_space *mapping = file->f_mapping; @@ -39156,10 +39395,11 @@ diff -ruN a/kernel/fork.c b/kernel/fork.c i_mmap_lock_write(mapping); if (tmp->vm_flags & VM_SHARED) mapping_allow_writable(mapping); -diff -ruN a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c ---- a/kernel/locking/lockdep.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/kernel/locking/lockdep.c 2022-03-30 02:32:38.417760234 +0300 -@@ -189,7 +189,7 @@ +diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c +index e6a282bc1665..5d08b4727ed4 100644 +--- a/kernel/locking/lockdep.c ++++ b/kernel/locking/lockdep.c +@@ -187,7 +187,7 @@ unsigned long max_lock_class_idx; struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS); @@ -39168,7 +39408,7 @@ diff -ruN a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c { unsigned int class_idx = hlock->class_idx; -@@ -210,6 +210,8 @@ +@@ -208,6 +208,8 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) */ return lock_classes + class_idx; } @@ -39177,53 +39417,20 @@ diff -ruN a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c #ifdef CONFIG_LOCK_STAT static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], cpu_lock_stats); -diff -ruN a/kernel/task_work.c b/kernel/task_work.c ---- a/kernel/task_work.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/kernel/task_work.c 2022-03-30 02:32:38.417760234 +0300 -@@ -167,3 +167,4 @@ +diff --git a/kernel/task_work.c b/kernel/task_work.c +index 1698fbe6f0e1..081b05acadf8 100644 +--- a/kernel/task_work.c ++++ b/kernel/task_work.c +@@ -167,3 +167,4 @@ void task_work_run(void) } while (work); } } +EXPORT_SYMBOL_GPL(task_work_run); -diff -ruN a/MAINTAINERS b/MAINTAINERS ---- a/MAINTAINERS 2021-10-31 23:53:10.000000000 +0300 -+++ b/MAINTAINERS 2022-03-30 02:32:34.329762573 +0300 -@@ -3122,6 +3122,19 @@ - F: kernel/audit* - F: lib/*audit.c - -+AUFS (advanced multi layered unification filesystem) FILESYSTEM -+M: "J. R. Okajima" -+L: aufs-users@lists.sourceforge.net (members only) -+L: linux-unionfs@vger.kernel.org -+S: Supported -+W: http://aufs.sourceforge.net -+T: git://github.com/sfjro/aufs4-linux.git -+F: Documentation/ABI/testing/debugfs-aufs -+F: Documentation/ABI/testing/sysfs-aufs -+F: Documentation/filesystems/aufs/ -+F: fs/aufs/ -+F: include/uapi/linux/aufs_type.h -+ - AUXILIARY DISPLAY DRIVERS - M: Miguel Ojeda - S: Maintained -diff -ruN a/mm/filemap.c b/mm/filemap.c ---- a/mm/filemap.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/mm/filemap.c 2022-03-30 02:33:13.310738284 +0300 -@@ -3350,7 +3350,7 @@ - vm_fault_t ret = VM_FAULT_LOCKED; - - sb_start_pagefault(mapping->host->i_sb); -- file_update_time(vmf->vma->vm_file); -+ vma_file_update_time(vmf->vma); - lock_page(page); - if (page->mapping != mapping) { - unlock_page(page); -diff -ruN a/mm/Makefile b/mm/Makefile ---- a/mm/Makefile 2021-10-31 23:53:10.000000000 +0300 -+++ b/mm/Makefile 2022-03-30 02:33:13.309738285 +0300 -@@ -52,7 +52,7 @@ +diff --git a/mm/Makefile b/mm/Makefile +index fc60a40ce954..c715b0138237 100644 +--- a/mm/Makefile ++++ b/mm/Makefile +@@ -52,7 +52,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ mm_init.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ @@ -39232,10 +39439,24 @@ diff -ruN a/mm/Makefile b/mm/Makefile # Give 'page_alloc' its own module-parameter namespace page-alloc-y := page_alloc.o -diff -ruN a/mm/mmap.c b/mm/mmap.c ---- a/mm/mmap.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/mm/mmap.c 2022-03-30 02:33:13.311738283 +0300 -@@ -183,7 +183,7 @@ +diff --git a/mm/filemap.c b/mm/filemap.c +index dbc461703ff4..f223c4cff231 100644 +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -3355,7 +3355,7 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) + vm_fault_t ret = VM_FAULT_LOCKED; + + sb_start_pagefault(mapping->host->i_sb); +- file_update_time(vmf->vma->vm_file); ++ vma_file_update_time(vmf->vma); + lock_page(page); + if (page->mapping != mapping) { + unlock_page(page); +diff --git a/mm/mmap.c b/mm/mmap.c +index a0a4eadc8779..cebc94cfa20d 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -183,7 +183,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); if (vma->vm_file) @@ -39244,7 +39465,7 @@ diff -ruN a/mm/mmap.c b/mm/mmap.c mpol_put(vma_policy(vma)); vm_area_free(vma); return next; -@@ -952,7 +952,7 @@ +@@ -952,7 +952,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, if (remove_next) { if (file) { uprobe_munmap(next, next->vm_start, next->vm_end); @@ -39253,16 +39474,16 @@ diff -ruN a/mm/mmap.c b/mm/mmap.c } if (next->anon_vma) anon_vma_merge(vma, next); -@@ -1873,7 +1873,7 @@ - return addr; - +@@ -1880,7 +1880,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, + if (vma->vm_ops && vma->vm_ops->close) + vma->vm_ops->close(vma); unmap_and_free_vma: - fput(vma->vm_file); + vma_fput(vma); vma->vm_file = NULL; /* Undo any partial mapping done by a device driver. */ -@@ -2731,7 +2731,7 @@ +@@ -2746,7 +2746,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, goto out_free_mpol; if (new->vm_file) @@ -39271,7 +39492,7 @@ diff -ruN a/mm/mmap.c b/mm/mmap.c if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); -@@ -2750,7 +2750,7 @@ +@@ -2765,7 +2765,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (new->vm_ops && new->vm_ops->close) new->vm_ops->close(new); if (new->vm_file) @@ -39280,7 +39501,7 @@ diff -ruN a/mm/mmap.c b/mm/mmap.c unlink_anon_vmas(new); out_free_mpol: mpol_put(vma_policy(new)); -@@ -2945,7 +2945,7 @@ +@@ -2960,7 +2960,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, struct vm_area_struct *vma; unsigned long populate = 0; unsigned long ret = -EINVAL; @@ -39289,7 +39510,7 @@ diff -ruN a/mm/mmap.c b/mm/mmap.c pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n", current->comm, current->pid); -@@ -3001,10 +3001,27 @@ +@@ -3016,10 +3016,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (vma->vm_flags & VM_LOCKED) flags |= MAP_LOCKED; @@ -39318,7 +39539,7 @@ diff -ruN a/mm/mmap.c b/mm/mmap.c out: mmap_write_unlock(mm); if (populate) -@@ -3285,7 +3302,7 @@ +@@ -3300,7 +3317,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (anon_vma_clone(new_vma, vma)) goto out_free_mempol; if (new_vma->vm_file) @@ -39327,10 +39548,11 @@ diff -ruN a/mm/mmap.c b/mm/mmap.c if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); -diff -ruN a/mm/nommu.c b/mm/nommu.c ---- a/mm/nommu.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/mm/nommu.c 2022-03-30 02:33:13.311738283 +0300 -@@ -523,7 +523,7 @@ +diff --git a/mm/nommu.c b/mm/nommu.c +index 02d2427b8f9e..a7419302ab4e 100644 +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -523,7 +523,7 @@ static void __put_nommu_region(struct vm_region *region) up_write(&nommu_region_sem); if (region->vm_file) @@ -39339,7 +39561,7 @@ diff -ruN a/mm/nommu.c b/mm/nommu.c /* IO memory and memory shared directly out of the pagecache * from ramfs/tmpfs mustn't be released here */ -@@ -655,7 +655,7 @@ +@@ -655,7 +655,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); if (vma->vm_file) @@ -39348,7 +39570,7 @@ diff -ruN a/mm/nommu.c b/mm/nommu.c put_nommu_region(vma->vm_region); vm_area_free(vma); } -@@ -1175,7 +1175,7 @@ +@@ -1175,7 +1175,7 @@ unsigned long do_mmap(struct file *file, goto error_just_free; } } @@ -39357,7 +39579,7 @@ diff -ruN a/mm/nommu.c b/mm/nommu.c kmem_cache_free(vm_region_jar, region); region = pregion; result = start; -@@ -1252,10 +1252,10 @@ +@@ -1252,10 +1252,10 @@ unsigned long do_mmap(struct file *file, up_write(&nommu_region_sem); error: if (region->vm_file) @@ -39370,9 +39592,11 @@ diff -ruN a/mm/nommu.c b/mm/nommu.c vm_area_free(vma); return ret; -diff -ruN a/mm/prfile.c b/mm/prfile.c ---- a/mm/prfile.c 1970-01-01 03:00:00.000000000 +0300 -+++ b/mm/prfile.c 2022-03-30 02:33:13.311738283 +0300 +diff --git a/mm/prfile.c b/mm/prfile.c +new file mode 100644 +index 000000000000..511543ab1b41 +--- /dev/null ++++ b/mm/prfile.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* @@ -39460,10 +39684,11 @@ diff -ruN a/mm/prfile.c b/mm/prfile.c + fput(pr); +} +#endif /* !CONFIG_MMU */ -diff -ruN a/mm/shmem.c b/mm/shmem.c ---- a/mm/shmem.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/mm/shmem.c 2022-03-30 02:33:33.894723739 +0300 -@@ -108,7 +108,7 @@ +diff --git a/mm/shmem.c b/mm/shmem.c +index 342d1bc72867..460e4c54483f 100644 +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -108,7 +108,7 @@ struct shmem_falloc { struct shmem_options { unsigned long long blocks; @@ -39472,7 +39697,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c struct mempolicy *mpol; kuid_t uid; kgid_t gid; -@@ -128,11 +128,14 @@ +@@ -128,11 +128,14 @@ static unsigned long shmem_default_max_blocks(void) return totalram_pages() / 2; } @@ -39489,7 +39714,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c } #endif -@@ -1160,6 +1163,11 @@ +@@ -1165,6 +1168,11 @@ static void shmem_evict_inode(struct inode *inode) simple_xattrs_free(&info->xattrs); WARN_ON(inode->i_blocks); @@ -39501,7 +39726,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c shmem_free_inode(inode->i_sb); clear_inode(inode); } -@@ -2331,6 +2339,25 @@ +@@ -2336,6 +2344,25 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode break; } @@ -39527,7 +39752,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c lockdep_annotate_inode_mutex_key(inode); } else shmem_free_inode(sb); -@@ -3245,8 +3272,7 @@ +@@ -3252,8 +3279,7 @@ static struct dentry *shmem_get_parent(struct dentry *child) static int shmem_match(struct inode *ino, void *vfh) { __u32 *fh = vfh; @@ -39537,7 +39762,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c return ino->i_ino == inum && fh[0] == ino->i_generation; } -@@ -3266,14 +3292,11 @@ +@@ -3273,14 +3299,11 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb, struct dentry *dentry = NULL; u64 inum; @@ -39555,7 +39780,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c if (inode) { dentry = shmem_find_alias(inode); iput(inode); -@@ -3285,30 +3308,15 @@ +@@ -3292,30 +3315,15 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb, static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, struct inode *parent) { @@ -39589,7 +39814,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c return 1; } -@@ -3387,7 +3395,7 @@ +@@ -3394,7 +3402,7 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) break; case Opt_nr_inodes: ctx->inodes = memparse(param->string, &rest); @@ -39598,7 +39823,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c goto bad_value; ctx->seen |= SHMEM_SEEN_INODES; break; -@@ -3497,7 +3505,7 @@ +@@ -3504,7 +3512,7 @@ static int shmem_reconfigure(struct fs_context *fc) { struct shmem_options *ctx = fc->fs_private; struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); @@ -39607,7 +39832,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c struct mempolicy *mpol = NULL; const char *err; -@@ -3566,7 +3574,7 @@ +@@ -3573,7 +3581,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",size=%luk", sbinfo->max_blocks << (PAGE_SHIFT - 10)); if (sbinfo->max_inodes != shmem_default_max_inodes()) @@ -39616,7 +39841,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c if (sbinfo->mode != (0777 | S_ISVTX)) seq_printf(seq, ",mode=%03ho", sbinfo->mode); if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) -@@ -3613,6 +3621,8 @@ +@@ -3620,6 +3628,8 @@ static void shmem_put_super(struct super_block *sb) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); @@ -39625,7 +39850,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c free_percpu(sbinfo->ino_batch); percpu_counter_destroy(&sbinfo->used_blocks); mpol_put(sbinfo->mpol); -@@ -3655,6 +3665,8 @@ +@@ -3662,6 +3672,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) #else sb->s_flags |= SB_NOUSER; #endif @@ -39634,7 +39859,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c sbinfo->max_blocks = ctx->blocks; sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes; if (sb->s_flags & SB_KERNMOUNT) { -@@ -3772,6 +3784,15 @@ +@@ -3779,6 +3791,15 @@ static void shmem_destroy_inodecache(void) kmem_cache_destroy(shmem_inode_cachep); } @@ -39650,7 +39875,7 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c const struct address_space_operations shmem_aops = { .writepage = shmem_writepage, .set_page_dirty = __set_page_dirty_no_writeback, -@@ -3913,6 +3934,7 @@ +@@ -3920,6 +3941,7 @@ int __init shmem_init(void) pr_err("Could not kern_mount tmpfs\n"); goto out1; } @@ -39658,10 +39883,11 @@ diff -ruN a/mm/shmem.c b/mm/shmem.c #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) -diff -ruN a/security/security.c b/security/security.c ---- a/security/security.c 2021-10-31 23:53:10.000000000 +0300 -+++ b/security/security.c 2022-03-30 02:32:38.417760234 +0300 -@@ -1147,6 +1147,7 @@ +diff --git a/security/security.c b/security/security.c +index 7b9f9d3fffe5..fcb7253edd18 100644 +--- a/security/security.c ++++ b/security/security.c +@@ -1162,6 +1162,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry) return 0; return call_int_hook(path_rmdir, 0, dir, dentry); } @@ -39669,7 +39895,7 @@ diff -ruN a/security/security.c b/security/security.c int security_path_unlink(const struct path *dir, struct dentry *dentry) { -@@ -1163,6 +1164,7 @@ +@@ -1178,6 +1179,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry, return 0; return call_int_hook(path_symlink, 0, dir, dentry, old_name); } @@ -39677,7 +39903,7 @@ diff -ruN a/security/security.c b/security/security.c int security_path_link(struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry) -@@ -1171,6 +1173,7 @@ +@@ -1186,6 +1188,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir, return 0; return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry); } @@ -39685,7 +39911,7 @@ diff -ruN a/security/security.c b/security/security.c int security_path_rename(const struct path *old_dir, struct dentry *old_dentry, const struct path *new_dir, struct dentry *new_dentry, -@@ -1198,6 +1201,7 @@ +@@ -1213,6 +1216,7 @@ int security_path_truncate(const struct path *path) return 0; return call_int_hook(path_truncate, 0, path); } @@ -39693,7 +39919,7 @@ diff -ruN a/security/security.c b/security/security.c int security_path_chmod(const struct path *path, umode_t mode) { -@@ -1205,6 +1209,7 @@ +@@ -1220,6 +1224,7 @@ int security_path_chmod(const struct path *path, umode_t mode) return 0; return call_int_hook(path_chmod, 0, path, mode); } @@ -39701,7 +39927,7 @@ diff -ruN a/security/security.c b/security/security.c int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid) { -@@ -1212,6 +1217,7 @@ +@@ -1227,6 +1232,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid) return 0; return call_int_hook(path_chown, 0, path, uid, gid); } @@ -39709,7 +39935,7 @@ diff -ruN a/security/security.c b/security/security.c int security_path_chroot(const struct path *path) { -@@ -1312,6 +1318,7 @@ +@@ -1327,6 +1333,7 @@ int security_inode_permission(struct inode *inode, int mask) return 0; return call_int_hook(inode_permission, 0, inode, mask); } @@ -39717,7 +39943,7 @@ diff -ruN a/security/security.c b/security/security.c int security_inode_setattr(struct dentry *dentry, struct iattr *attr) { -@@ -1509,6 +1516,7 @@ +@@ -1524,6 +1531,7 @@ int security_file_permission(struct file *file, int mask) return fsnotify_perm(file, mask); }