@@ -28,6 +28,7 @@ import (
2828 "strings"
2929 "time"
3030
31+ "github.com/containerd/containerd/v2/pkg/userns"
3132 "github.com/sirupsen/logrus"
3233 "golang.org/x/sys/unix"
3334)
@@ -95,6 +96,7 @@ func (m *Mount) mount(target string) (err error) {
9596 usernsFd * os.File
9697 options = m .Options
9798 )
99+
98100 opt := parseMountOptions (options )
99101 // The only remapping of both GID and UID is supported
100102 if opt .uidmap != "" && opt .gidmap != "" {
@@ -186,8 +188,21 @@ func (m *Mount) mount(target string) (err error) {
186188
187189 const broflags = unix .MS_BIND | unix .MS_RDONLY
188190 if oflags & broflags == broflags {
191+ // Preserve CL_UNPRIVILEGED "locked" flags of the
192+ // bind mount target when we remount to make the bind readonly.
193+ // This is necessary to ensure that
194+ // bind-mounting "with options" will not fail with user namespaces, due to
195+ // kernel restrictions that require user namespace mounts to preserve
196+ // CL_UNPRIVILEGED locked flags.
197+ var unprivFlags int
198+ if userns .RunningInUserNS () {
199+ unprivFlags , err = getUnprivilegedMountFlags (target )
200+ if err != nil {
201+ return err
202+ }
203+ }
189204 // Remount the bind to apply read only.
190- return unix .Mount ("" , target , "" , uintptr (oflags | unix .MS_REMOUNT ), "" )
205+ return unix .Mount ("" , target , "" , uintptr (oflags | unprivFlags | unix .MS_REMOUNT ), "" )
191206 }
192207
193208 // remap non-overlay mount point
@@ -199,6 +214,37 @@ func (m *Mount) mount(target string) (err error) {
199214 return nil
200215}
201216
217+ // Get the set of mount flags that are set on the mount that contains the given
218+ // path and are locked by CL_UNPRIVILEGED.
219+ //
220+ // From https://github.com/moby/moby/blob/v23.0.1/daemon/oci_linux.go#L430-L460
221+ func getUnprivilegedMountFlags (path string ) (int , error ) {
222+ var statfs unix.Statfs_t
223+ if err := unix .Statfs (path , & statfs ); err != nil {
224+ return 0 , err
225+ }
226+
227+ // The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
228+ unprivilegedFlags := []int {
229+ unix .MS_RDONLY ,
230+ unix .MS_NODEV ,
231+ unix .MS_NOEXEC ,
232+ unix .MS_NOSUID ,
233+ unix .MS_NOATIME ,
234+ unix .MS_RELATIME ,
235+ unix .MS_NODIRATIME ,
236+ }
237+
238+ var flags int
239+ for flag := range unprivilegedFlags {
240+ if int (statfs .Flags )& flag == flag {
241+ flags |= flag
242+ }
243+ }
244+
245+ return flags , nil
246+ }
247+
202248func doPrepareIDMappedOverlay (lowerDirs []string , usernsFd int ) (tmpLowerDirs []string , _ func (), _ error ) {
203249 td , err := os .MkdirTemp (tempMountLocation , "ovl-idmapped" )
204250 if err != nil {
0 commit comments