open.c 27.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9
/*
 *  linux/fs/open.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/string.h>
#include <linux/mm.h>
#include <linux/file.h>
Al Viro's avatar
Al Viro committed
10
#include <linux/fdtable.h>
Linus Torvalds's avatar
Linus Torvalds committed
11
#include <linux/quotaops.h>
Robert Love's avatar
Robert Love committed
12
#include <linux/fsnotify.h>
Linus Torvalds's avatar
Linus Torvalds committed
13 14 15 16 17
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/tty.h>
#include <linux/namei.h>
#include <linux/backing-dev.h>
18
#include <linux/capability.h>
19
#include <linux/securebits.h>
Linus Torvalds's avatar
Linus Torvalds committed
20 21 22
#include <linux/security.h>
#include <linux/mount.h>
#include <linux/vfs.h>
23
#include <linux/fcntl.h>
Linus Torvalds's avatar
Linus Torvalds committed
24 25
#include <asm/uaccess.h>
#include <linux/fs.h>
26
#include <linux/personality.h>
Linus Torvalds's avatar
Linus Torvalds committed
27 28
#include <linux/pagemap.h>
#include <linux/syscalls.h>
29
#include <linux/rcupdate.h>
30
#include <linux/audit.h>
31
#include <linux/falloc.h>
32
#include <linux/fs_struct.h>
Linus Torvalds's avatar
Linus Torvalds committed
33

34 35
#include "internal.h"

36
int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
Linus Torvalds's avatar
Linus Torvalds committed
37 38 39
{
	int retval = -ENODEV;

40
	if (dentry) {
Linus Torvalds's avatar
Linus Torvalds committed
41
		retval = -ENOSYS;
42
		if (dentry->d_sb->s_op->statfs) {
Linus Torvalds's avatar
Linus Torvalds committed
43
			memset(buf, 0, sizeof(*buf));
44
			retval = security_sb_statfs(dentry);
Linus Torvalds's avatar
Linus Torvalds committed
45 46
			if (retval)
				return retval;
47
			retval = dentry->d_sb->s_op->statfs(dentry, buf);
Linus Torvalds's avatar
Linus Torvalds committed
48 49 50 51 52 53 54 55 56
			if (retval == 0 && buf->f_frsize == 0)
				buf->f_frsize = buf->f_bsize;
		}
	}
	return retval;
}

EXPORT_SYMBOL(vfs_statfs);

57
static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
Linus Torvalds's avatar
Linus Torvalds committed
58 59 60 61
{
	struct kstatfs st;
	int retval;

62
	retval = vfs_statfs(dentry, &st);
Linus Torvalds's avatar
Linus Torvalds committed
63 64 65 66 67 68 69
	if (retval)
		return retval;

	if (sizeof(*buf) == sizeof(st))
		memcpy(buf, &st, sizeof(st));
	else {
		if (sizeof buf->f_blocks == 4) {
Jon Tollefson's avatar
Jon Tollefson committed
70 71
			if ((st.f_blocks | st.f_bfree | st.f_bavail |
			     st.f_bsize | st.f_frsize) &
Linus Torvalds's avatar
Linus Torvalds committed
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
			    0xffffffff00000000ULL)
				return -EOVERFLOW;
			/*
			 * f_files and f_ffree may be -1; it's okay to stuff
			 * that into 32 bits
			 */
			if (st.f_files != -1 &&
			    (st.f_files & 0xffffffff00000000ULL))
				return -EOVERFLOW;
			if (st.f_ffree != -1 &&
			    (st.f_ffree & 0xffffffff00000000ULL))
				return -EOVERFLOW;
		}

		buf->f_type = st.f_type;
		buf->f_bsize = st.f_bsize;
		buf->f_blocks = st.f_blocks;
		buf->f_bfree = st.f_bfree;
		buf->f_bavail = st.f_bavail;
		buf->f_files = st.f_files;
		buf->f_ffree = st.f_ffree;
		buf->f_fsid = st.f_fsid;
		buf->f_namelen = st.f_namelen;
		buf->f_frsize = st.f_frsize;
		memset(buf->f_spare, 0, sizeof(buf->f_spare));
	}
	return 0;
}

101
static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
Linus Torvalds's avatar
Linus Torvalds committed
102 103 104 105
{
	struct kstatfs st;
	int retval;

106
	retval = vfs_statfs(dentry, &st);
Linus Torvalds's avatar
Linus Torvalds committed
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
	if (retval)
		return retval;

	if (sizeof(*buf) == sizeof(st))
		memcpy(buf, &st, sizeof(st));
	else {
		buf->f_type = st.f_type;
		buf->f_bsize = st.f_bsize;
		buf->f_blocks = st.f_blocks;
		buf->f_bfree = st.f_bfree;
		buf->f_bavail = st.f_bavail;
		buf->f_files = st.f_files;
		buf->f_ffree = st.f_ffree;
		buf->f_fsid = st.f_fsid;
		buf->f_namelen = st.f_namelen;
		buf->f_frsize = st.f_frsize;
		memset(buf->f_spare, 0, sizeof(buf->f_spare));
	}
	return 0;
}

128
SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
Linus Torvalds's avatar
Linus Torvalds committed
129
{
130
	struct path path;
Linus Torvalds's avatar
Linus Torvalds committed
131 132
	int error;

133
	error = user_path(pathname, &path);
Linus Torvalds's avatar
Linus Torvalds committed
134 135
	if (!error) {
		struct statfs tmp;
136
		error = vfs_statfs_native(path.dentry, &tmp);
Linus Torvalds's avatar
Linus Torvalds committed
137 138
		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
			error = -EFAULT;
139
		path_put(&path);
Linus Torvalds's avatar
Linus Torvalds committed
140 141 142 143
	}
	return error;
}

144
SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
Linus Torvalds's avatar
Linus Torvalds committed
145
{
146
	struct path path;
Linus Torvalds's avatar
Linus Torvalds committed
147 148 149 150
	long error;

	if (sz != sizeof(*buf))
		return -EINVAL;
151
	error = user_path(pathname, &path);
Linus Torvalds's avatar
Linus Torvalds committed
152 153
	if (!error) {
		struct statfs64 tmp;
154
		error = vfs_statfs64(path.dentry, &tmp);
Linus Torvalds's avatar
Linus Torvalds committed
155 156
		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
			error = -EFAULT;
157
		path_put(&path);
Linus Torvalds's avatar
Linus Torvalds committed
158 159 160 161
	}
	return error;
}

162
SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
Linus Torvalds's avatar
Linus Torvalds committed
163 164 165 166 167 168 169 170 171
{
	struct file * file;
	struct statfs tmp;
	int error;

	error = -EBADF;
	file = fget(fd);
	if (!file)
		goto out;
172
	error = vfs_statfs_native(file->f_path.dentry, &tmp);
Linus Torvalds's avatar
Linus Torvalds committed
173 174 175 176 177 178 179
	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
		error = -EFAULT;
	fput(file);
out:
	return error;
}

180
SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
Linus Torvalds's avatar
Linus Torvalds committed
181 182 183 184 185 186 187 188 189 190 191 192
{
	struct file * file;
	struct statfs64 tmp;
	int error;

	if (sz != sizeof(*buf))
		return -EINVAL;

	error = -EBADF;
	file = fget(fd);
	if (!file)
		goto out;
193
	error = vfs_statfs64(file->f_path.dentry, &tmp);
Linus Torvalds's avatar
Linus Torvalds committed
194 195 196 197 198 199 200
	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
		error = -EFAULT;
	fput(file);
out:
	return error;
}

201 202
int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
	struct file *filp)
Linus Torvalds's avatar
Linus Torvalds committed
203
{
204
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
205 206 207 208 209 210 211
	struct iattr newattrs;

	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
	if (length < 0)
		return -EINVAL;

	newattrs.ia_size = length;
212
	newattrs.ia_valid = ATTR_SIZE | time_attrs;
213 214 215 216
	if (filp) {
		newattrs.ia_file = filp;
		newattrs.ia_valid |= ATTR_FILE;
	}
Linus Torvalds's avatar
Linus Torvalds committed
217

218
	/* Remove suid/sgid on truncate too */
219 220 221
	ret = should_remove_suid(dentry);
	if (ret)
		newattrs.ia_valid |= ret | ATTR_FORCE;
222

223
	mutex_lock(&dentry->d_inode->i_mutex);
224
	ret = notify_change(dentry, &newattrs);
225
	mutex_unlock(&dentry->d_inode->i_mutex);
226
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
227 228
}

229
static long do_sys_truncate(const char __user *pathname, loff_t length)
Linus Torvalds's avatar
Linus Torvalds committed
230
{
231 232
	struct path path;
	struct inode *inode;
Linus Torvalds's avatar
Linus Torvalds committed
233 234 235 236 237 238
	int error;

	error = -EINVAL;
	if (length < 0)	/* sorry, but loff_t says... */
		goto out;

239
	error = user_path(pathname, &path);
Linus Torvalds's avatar
Linus Torvalds committed
240 241
	if (error)
		goto out;
242
	inode = path.dentry->d_inode;
Linus Torvalds's avatar
Linus Torvalds committed
243 244 245 246 247 248 249 250 251 252

	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
	error = -EISDIR;
	if (S_ISDIR(inode->i_mode))
		goto dput_and_out;

	error = -EINVAL;
	if (!S_ISREG(inode->i_mode))
		goto dput_and_out;

253
	error = mnt_want_write(path.mnt);
Linus Torvalds's avatar
Linus Torvalds committed
254 255 256
	if (error)
		goto dput_and_out;

257
	error = inode_permission(inode, MAY_WRITE);
258 259
	if (error)
		goto mnt_drop_write_and_out;
Linus Torvalds's avatar
Linus Torvalds committed
260 261

	error = -EPERM;
262
	if (IS_APPEND(inode))
263
		goto mnt_drop_write_and_out;
Linus Torvalds's avatar
Linus Torvalds committed
264

265
	error = get_write_access(inode);
Linus Torvalds's avatar
Linus Torvalds committed
266
	if (error)
267
		goto mnt_drop_write_and_out;
Linus Torvalds's avatar
Linus Torvalds committed
268

269 270 271 272 273
	/*
	 * Make sure that there are no leases.  get_write_access() protects
	 * against the truncate racing with a lease-granting setlease().
	 */
	error = break_lease(inode, FMODE_WRITE);
Linus Torvalds's avatar
Linus Torvalds committed
274
	if (error)
275
		goto put_write_and_out;
Linus Torvalds's avatar
Linus Torvalds committed
276 277

	error = locks_verify_truncate(inode, NULL, length);
278 279
	if (!error)
		error = security_path_truncate(&path, length, 0);
Linus Torvalds's avatar
Linus Torvalds committed
280
	if (!error) {
281
		vfs_dq_init(inode);
282
		error = do_truncate(path.dentry, length, 0, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
283 284
	}

285 286
put_write_and_out:
	put_write_access(inode);
287
mnt_drop_write_and_out:
288
	mnt_drop_write(path.mnt);
Linus Torvalds's avatar
Linus Torvalds committed
289
dput_and_out:
290
	path_put(&path);
Linus Torvalds's avatar
Linus Torvalds committed
291 292 293 294
out:
	return error;
}

295
SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
Linus Torvalds's avatar
Linus Torvalds committed
296
{
297
	return do_sys_truncate(path, length);
Linus Torvalds's avatar
Linus Torvalds committed
298 299
}

300
static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
Linus Torvalds's avatar
Linus Torvalds committed
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
{
	struct inode * inode;
	struct dentry *dentry;
	struct file * file;
	int error;

	error = -EINVAL;
	if (length < 0)
		goto out;
	error = -EBADF;
	file = fget(fd);
	if (!file)
		goto out;

	/* explicitly opened as large or we are on 64-bit box */
	if (file->f_flags & O_LARGEFILE)
		small = 0;

319
	dentry = file->f_path.dentry;
Linus Torvalds's avatar
Linus Torvalds committed
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
	inode = dentry->d_inode;
	error = -EINVAL;
	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
		goto out_putf;

	error = -EINVAL;
	/* Cannot ftruncate over 2^31 bytes without large file support */
	if (small && length > MAX_NON_LFS)
		goto out_putf;

	error = -EPERM;
	if (IS_APPEND(inode))
		goto out_putf;

	error = locks_verify_truncate(inode, file, length);
335 336 337
	if (!error)
		error = security_path_truncate(&file->f_path, length,
					       ATTR_MTIME|ATTR_CTIME);
Linus Torvalds's avatar
Linus Torvalds committed
338
	if (!error)
339
		error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
Linus Torvalds's avatar
Linus Torvalds committed
340 341 342 343 344 345
out_putf:
	fput(file);
out:
	return error;
}

346
SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
Linus Torvalds's avatar
Linus Torvalds committed
347
{
348
	long ret = do_sys_ftruncate(fd, length, 1);
349
	/* avoid REGPARM breakage on x86: */
350
	asmlinkage_protect(2, ret, fd, length);
351
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
352 353 354 355
}

/* LFS versions of truncate are only needed on 32 bit machines */
#if BITS_PER_LONG == 32
356
SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length)
Linus Torvalds's avatar
Linus Torvalds committed
357 358 359
{
	return do_sys_truncate(path, length);
}
360 361 362 363 364 365 366
#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
asmlinkage long SyS_truncate64(long path, loff_t length)
{
	return SYSC_truncate64((const char __user *) path, length);
}
SYSCALL_ALIAS(sys_truncate64, SyS_truncate64);
#endif
Linus Torvalds's avatar
Linus Torvalds committed
367

368
SYSCALL_DEFINE(ftruncate64)(unsigned int fd, loff_t length)
Linus Torvalds's avatar
Linus Torvalds committed
369
{
370
	long ret = do_sys_ftruncate(fd, length, 0);
371
	/* avoid REGPARM breakage on x86: */
372
	asmlinkage_protect(2, ret, fd, length);
373
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
374
}
375 376 377 378 379 380
#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
asmlinkage long SyS_ftruncate64(long fd, loff_t length)
{
	return SYSC_ftruncate64((unsigned int) fd, length);
}
SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
Linus Torvalds's avatar
Linus Torvalds committed
381
#endif
382
#endif /* BITS_PER_LONG == 32 */
Linus Torvalds's avatar
Linus Torvalds committed
383

384 385

int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
386
{
387 388
	struct inode *inode = file->f_path.dentry->d_inode;
	long ret;
389 390

	if (offset < 0 || len <= 0)
391
		return -EINVAL;
392 393 394

	/* Return error if mode is not supported */
	if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
395
		return -EOPNOTSUPP;
396 397

	if (!(file->f_mode & FMODE_WRITE))
398
		return -EBADF;
399 400 401 402 403 404
	/*
	 * Revalidate the write permissions, in case security policy has
	 * changed since the files were opened.
	 */
	ret = security_file_permission(file, MAY_WRITE);
	if (ret)
405
		return ret;
406 407

	if (S_ISFIFO(inode->i_mode))
408
		return -ESPIPE;
409 410 411 412 413 414

	/*
	 * Let individual file system decide if it supports preallocation
	 * for directories or not.
	 */
	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
415
		return -ENODEV;
416 417 418

	/* Check for wrap through zero too */
	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
419
		return -EFBIG;
420

421 422
	if (!inode->i_op->fallocate)
		return -EOPNOTSUPP;
423

424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
	return inode->i_op->fallocate(inode, mode, offset, len);
}

SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
{
	struct file *file;
	int error = -EBADF;

	file = fget(fd);
	if (file) {
		error = do_fallocate(file, mode, offset, len);
		fput(file);
	}

	return error;
439
}
440

441 442 443 444 445 446 447
#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
{
	return SYSC_fallocate((int)fd, (int)mode, offset, len);
}
SYSCALL_ALIAS(sys_fallocate, SyS_fallocate);
#endif
448

Linus Torvalds's avatar
Linus Torvalds committed
449 450 451 452 453
/*
 * access() needs to use the real uid/gid, not the effective uid/gid.
 * We do this by temporarily clearing all FS-related capabilities and
 * switching the fsuid/fsgid around to the real ones.
 */
454
SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
Linus Torvalds's avatar
Linus Torvalds committed
455
{
456 457
	const struct cred *old_cred;
	struct cred *override_cred;
458
	struct path path;
459
	struct inode *inode;
Linus Torvalds's avatar
Linus Torvalds committed
460 461 462 463 464
	int res;

	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
		return -EINVAL;

465 466 467
	override_cred = prepare_creds();
	if (!override_cred)
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
468

469 470
	override_cred->fsuid = override_cred->uid;
	override_cred->fsgid = override_cred->gid;
Linus Torvalds's avatar
Linus Torvalds committed
471

472
	if (!issecure(SECURE_NO_SETUID_FIXUP)) {
David Howells's avatar
David Howells committed
473
		/* Clear the capabilities if we switch to a non-root user */
474 475
		if (override_cred->uid)
			cap_clear(override_cred->cap_effective);
476
		else
477 478
			override_cred->cap_effective =
				override_cred->cap_permitted;
479
	}
Linus Torvalds's avatar
Linus Torvalds committed
480

481 482
	old_cred = override_creds(override_cred);

483
	res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
484 485 486
	if (res)
		goto out;

487
	inode = path.dentry->d_inode;
488 489

	if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
490 491 492 493 494
		/*
		 * MAY_EXEC on regular files is denied if the fs is mounted
		 * with the "noexec" flag.
		 */
		res = -EACCES;
495
		if (path.mnt->mnt_flags & MNT_NOEXEC)
496 497 498
			goto out_path_release;
	}

499
	res = inode_permission(inode, mode | MAY_ACCESS);
500
	/* SuS v2 requires we report a read only fs too */
501
	if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
502
		goto out_path_release;
503 504 505 506 507 508 509 510 511 512
	/*
	 * This is a rare case where using __mnt_is_readonly()
	 * is OK without a mnt_want/drop_write() pair.  Since
	 * no actual write to the fs is performed here, we do
	 * not need to telegraph to that to anyone.
	 *
	 * By doing this, we accept that this access is
	 * inherently racy and know that the fs may change
	 * state before we even see this result.
	 */
513
	if (__mnt_is_readonly(path.mnt))
514
		res = -EROFS;
Linus Torvalds's avatar
Linus Torvalds committed
515

516
out_path_release:
517
	path_put(&path);
518
out:
519 520
	revert_creds(old_cred);
	put_cred(override_cred);
Linus Torvalds's avatar
Linus Torvalds committed
521 522 523
	return res;
}

524
SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
525 526 527 528
{
	return sys_faccessat(AT_FDCWD, filename, mode);
}

529
SYSCALL_DEFINE1(chdir, const char __user *, filename)
Linus Torvalds's avatar
Linus Torvalds committed
530
{
531
	struct path path;
Linus Torvalds's avatar
Linus Torvalds committed
532 533
	int error;

534
	error = user_path_dir(filename, &path);
Linus Torvalds's avatar
Linus Torvalds committed
535 536 537
	if (error)
		goto out;

538
	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
Linus Torvalds's avatar
Linus Torvalds committed
539 540 541
	if (error)
		goto dput_and_out;

542
	set_fs_pwd(current->fs, &path);
Linus Torvalds's avatar
Linus Torvalds committed
543 544

dput_and_out:
545
	path_put(&path);
Linus Torvalds's avatar
Linus Torvalds committed
546 547 548 549
out:
	return error;
}

550
SYSCALL_DEFINE1(fchdir, unsigned int, fd)
Linus Torvalds's avatar
Linus Torvalds committed
551 552 553 554 555 556 557 558 559 560
{
	struct file *file;
	struct inode *inode;
	int error;

	error = -EBADF;
	file = fget(fd);
	if (!file)
		goto out;

561
	inode = file->f_path.dentry->d_inode;
Linus Torvalds's avatar
Linus Torvalds committed
562 563 564 565 566

	error = -ENOTDIR;
	if (!S_ISDIR(inode->i_mode))
		goto out_putf;

567
	error = inode_permission(inode, MAY_EXEC | MAY_ACCESS);
Linus Torvalds's avatar
Linus Torvalds committed
568
	if (!error)
569
		set_fs_pwd(current->fs, &file->f_path);
Linus Torvalds's avatar
Linus Torvalds committed
570 571 572 573 574 575
out_putf:
	fput(file);
out:
	return error;
}

576
SYSCALL_DEFINE1(chroot, const char __user *, filename)
Linus Torvalds's avatar
Linus Torvalds committed
577
{
578
	struct path path;
Linus Torvalds's avatar
Linus Torvalds committed
579 580
	int error;

581
	error = user_path_dir(filename, &path);
Linus Torvalds's avatar
Linus Torvalds committed
582 583 584
	if (error)
		goto out;

585
	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
Linus Torvalds's avatar
Linus Torvalds committed
586 587 588 589 590 591
	if (error)
		goto dput_and_out;

	error = -EPERM;
	if (!capable(CAP_SYS_CHROOT))
		goto dput_and_out;
592 593 594
	error = security_path_chroot(&path);
	if (error)
		goto dput_and_out;
Linus Torvalds's avatar
Linus Torvalds committed
595

596
	set_fs_root(current->fs, &path);
Linus Torvalds's avatar
Linus Torvalds committed
597 598
	error = 0;
dput_and_out:
599
	path_put(&path);
Linus Torvalds's avatar
Linus Torvalds committed
600 601 602 603
out:
	return error;
}

604
SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
Linus Torvalds's avatar
Linus Torvalds committed
605 606 607 608 609 610 611 612 613 614 615
{
	struct inode * inode;
	struct dentry * dentry;
	struct file * file;
	int err = -EBADF;
	struct iattr newattrs;

	file = fget(fd);
	if (!file)
		goto out;

616
	dentry = file->f_path.dentry;
Linus Torvalds's avatar
Linus Torvalds committed
617 618
	inode = dentry->d_inode;

619
	audit_inode(NULL, dentry);
620

621
	err = mnt_want_write_file(file);
622
	if (err)
Linus Torvalds's avatar
Linus Torvalds committed
623
		goto out_putf;
624
	mutex_lock(&inode->i_mutex);
625 626
	err = security_path_chmod(dentry, file->f_vfsmnt, mode);
	if (err)
627
		goto out_unlock;
Linus Torvalds's avatar
Linus Torvalds committed
628 629 630 631 632
	if (mode == (mode_t) -1)
		mode = inode->i_mode;
	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
	err = notify_change(dentry, &newattrs);
633
out_unlock:
634
	mutex_unlock(&inode->i_mutex);
635
	mnt_drop_write(file->f_path.mnt);
Linus Torvalds's avatar
Linus Torvalds committed
636 637 638 639 640 641
out_putf:
	fput(file);
out:
	return err;
}

642
SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
Linus Torvalds's avatar
Linus Torvalds committed
643
{
644 645
	struct path path;
	struct inode *inode;
Linus Torvalds's avatar
Linus Torvalds committed
646 647 648
	int error;
	struct iattr newattrs;

649
	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
Linus Torvalds's avatar
Linus Torvalds committed
650 651
	if (error)
		goto out;
652
	inode = path.dentry->d_inode;
Linus Torvalds's avatar
Linus Torvalds committed
653

654
	error = mnt_want_write(path.mnt);
655
	if (error)
Linus Torvalds's avatar
Linus Torvalds committed
656
		goto dput_and_out;
657
	mutex_lock(&inode->i_mutex);
658 659
	error = security_path_chmod(path.dentry, path.mnt, mode);
	if (error)
660
		goto out_unlock;
Linus Torvalds's avatar
Linus Torvalds committed
661 662 663 664
	if (mode == (mode_t) -1)
		mode = inode->i_mode;
	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
665
	error = notify_change(path.dentry, &newattrs);
666
out_unlock:
667
	mutex_unlock(&inode->i_mutex);
668
	mnt_drop_write(path.mnt);
Linus Torvalds's avatar
Linus Torvalds committed
669
dput_and_out:
670
	path_put(&path);
Linus Torvalds's avatar
Linus Torvalds committed
671 672 673 674
out:
	return error;
}

675
SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode)
676 677 678 679
{
	return sys_fchmodat(AT_FDCWD, filename, mode);
}

680
static int chown_common(struct path *path, uid_t user, gid_t group)
Linus Torvalds's avatar
Linus Torvalds committed
681
{
682
	struct inode *inode = path->dentry->d_inode;
Linus Torvalds's avatar
Linus Torvalds committed
683 684 685 686 687 688 689 690 691 692 693 694 695
	int error;
	struct iattr newattrs;

	newattrs.ia_valid =  ATTR_CTIME;
	if (user != (uid_t) -1) {
		newattrs.ia_valid |= ATTR_UID;
		newattrs.ia_uid = user;
	}
	if (group != (gid_t) -1) {
		newattrs.ia_valid |= ATTR_GID;
		newattrs.ia_gid = group;
	}
	if (!S_ISDIR(inode->i_mode))
696 697
		newattrs.ia_valid |=
			ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
698
	mutex_lock(&inode->i_mutex);
699 700 701
	error = security_path_chown(path, user, group);
	if (!error)
		error = notify_change(path->dentry, &newattrs);
702
	mutex_unlock(&inode->i_mutex);
703

Linus Torvalds's avatar
Linus Torvalds committed
704 705 706
	return error;
}

707
SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
Linus Torvalds's avatar
Linus Torvalds committed
708
{
709
	struct path path;
Linus Torvalds's avatar
Linus Torvalds committed
710 711
	int error;

712
	error = user_path(filename, &path);
713 714
	if (error)
		goto out;
715
	error = mnt_want_write(path.mnt);
716 717
	if (error)
		goto out_release;
718
	error = chown_common(&path, user, group);
719
	mnt_drop_write(path.mnt);
720
out_release:
721
	path_put(&path);
722
out:
Linus Torvalds's avatar
Linus Torvalds committed
723 724 725
	return error;
}

726 727
SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
		gid_t, group, int, flag)
728
{
729
	struct path path;
730 731 732 733 734 735 736
	int error = -EINVAL;
	int follow;

	if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
		goto out;

	follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
737
	error = user_path_at(dfd, filename, follow, &path);
738 739
	if (error)
		goto out;
740
	error = mnt_want_write(path.mnt);
741 742
	if (error)
		goto out_release;
743
	error = chown_common(&path, user, group);
744
	mnt_drop_write(path.mnt);
745
out_release:
746
	path_put(&path);
747 748 749 750
out:
	return error;
}

751
SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
Linus Torvalds's avatar
Linus Torvalds committed
752
{
753
	struct path path;
Linus Torvalds's avatar
Linus Torvalds committed
754 755
	int error;

756
	error = user_lpath(filename, &path);
757 758
	if (error)
		goto out;
759
	error = mnt_want_write(path.mnt);
760 761
	if (error)
		goto out_release;
762
	error = chown_common(&path, user, group);
763
	mnt_drop_write(path.mnt);
764
out_release:
765
	path_put(&path);
766
out:
Linus Torvalds's avatar
Linus Torvalds committed
767 768 769
	return error;
}

770
SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
Linus Torvalds's avatar
Linus Torvalds committed
771 772 773
{
	struct file * file;
	int error = -EBADF;
774
	struct dentry * dentry;
Linus Torvalds's avatar
Linus Torvalds committed
775 776

	file = fget(fd);
777 778 779
	if (!file)
		goto out;

780
	error = mnt_want_write_file(file);
781 782
	if (error)
		goto out_fput;
783
	dentry = file->f_path.dentry;
784
	audit_inode(NULL, dentry);
785
	error = chown_common(&file->f_path, user, group);
786 787
	mnt_drop_write(file->f_path.mnt);
out_fput:
788 789
	fput(file);
out:
Linus Torvalds's avatar
Linus Torvalds committed
790 791 792
	return error;
}

793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
/*
 * You have to be very careful that these write
 * counts get cleaned up in error cases and
 * upon __fput().  This should probably never
 * be called outside of __dentry_open().
 */
static inline int __get_file_write_access(struct inode *inode,
					  struct vfsmount *mnt)
{
	int error;
	error = get_write_access(inode);
	if (error)
		return error;
	/*
	 * Do not take mount writer counts on
	 * special files since no writes to
	 * the mount itself will occur.
	 */
	if (!special_file(inode->i_mode)) {
		/*
		 * Balanced in __fput()
		 */
		error = mnt_want_write(mnt);
		if (error)
			put_write_access(inode);
	}
	return error;
}

822
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
823
					int flags, struct file *f,
824 825
					int (*open)(struct inode *, struct file *),
					const struct cred *cred)
Linus Torvalds's avatar
Linus Torvalds committed
826 827 828 829 830
{
	struct inode *inode;
	int error;

	f->f_flags = flags;
831
	f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
832
				FMODE_PREAD | FMODE_PWRITE;
Linus Torvalds's avatar
Linus Torvalds committed
833 834
	inode = dentry->d_inode;
	if (f->f_mode & FMODE_WRITE) {
835
		error = __get_file_write_access(inode, mnt);
Linus Torvalds's avatar
Linus Torvalds committed
836 837
		if (error)
			goto cleanup_file;
838 839
		if (!special_file(inode->i_mode))
			file_take_write(f);
Linus Torvalds's avatar
Linus Torvalds committed
840 841 842
	}

	f->f_mapping = inode->i_mapping;
843 844
	f->f_path.dentry = dentry;
	f->f_path.mnt = mnt;
Linus Torvalds's avatar
Linus Torvalds committed
845 846 847 848
	f->f_pos = 0;
	f->f_op = fops_get(inode->i_fop);
	file_move(f, &inode->i_sb->s_files);

849
	error = security_dentry_open(f, cred);
850 851 852
	if (error)
		goto cleanup_all;

853 854 855 856
	if (!open && f->f_op)
		open = f->f_op->open;
	if (open) {
		error = open(inode, f);
Linus Torvalds's avatar
Linus Torvalds committed
857 858 859
		if (error)
			goto cleanup_all;
	}
860

Linus Torvalds's avatar
Linus Torvalds committed
861 862 863 864 865 866
	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);

	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);

	/* NB: we're sure to have correct a_ops only after f_op->open */
	if (f->f_flags & O_DIRECT) {
867 868
		if (!f->f_mapping->a_ops ||
		    ((!f->f_mapping->a_ops->direct_IO) &&
869
		    (!f->f_mapping->a_ops->get_xip_mem))) {
Linus Torvalds's avatar
Linus Torvalds committed
870 871 872 873 874 875 876 877 878
			fput(f);
			f = ERR_PTR(-EINVAL);
		}
	}

	return f;

cleanup_all:
	fops_put(f->f_op);
879
	if (f->f_mode & FMODE_WRITE) {
Linus Torvalds's avatar
Linus Torvalds committed
880
		put_write_access(inode);
881 882 883 884 885 886 887 888
		if (!special_file(inode->i_mode)) {
			/*
			 * We don't consider this a real
			 * mnt_want/drop_write() pair
			 * because it all happenend right
			 * here, so just reset the state.
			 */
			file_reset_write(f);
889
			mnt_drop_write(mnt);
890
		}
891
	}
Linus Torvalds's avatar
Linus Torvalds committed
892
	file_kill(f);
893 894
	f->f_path.dentry = NULL;
	f->f_path.mnt = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
895 896 897 898 899 900 901
cleanup_file:
	put_filp(f);
	dput(dentry);
	mntput(mnt);
	return ERR_PTR(error);
}

902 903 904 905 906 907 908 909 910 911
/**
 * lookup_instantiate_filp - instantiates the open intent filp
 * @nd: pointer to nameidata
 * @dentry: pointer to dentry
 * @open: open callback
 *
 * Helper for filesystems that want to use lookup open intents and pass back
 * a fully instantiated struct file to the caller.
 * This function is meant to be called from within a filesystem's
 * lookup method.
912 913 914 915
 * Beware of calling it for non-regular files! Those ->open methods might block
 * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
 * leading to a deadlock, as nobody can open that fifo anymore, because
 * another process to open fifo will block on locked parent when doing lookup).
916 917 918 919 920 921 922 923
 * Note that in case of error, nd->intent.open.file is destroyed, but the
 * path information remains valid.
 * If the open callback is set to NULL, then the standard f_op->open()
 * filesystem callback is substituted.
 */
struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
		int (*open)(struct inode *, struct file *))
{
924 925
	const struct cred *cred = current_cred();

926 927 928 929
	if (IS_ERR(nd->intent.open.file))
		goto out;
	if (IS_ERR(dentry))
		goto out_err;
930
	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
931 932
					     nd->intent.open.flags - 1,
					     nd->intent.open.file,
933
					     open, cred);
934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951
out:
	return nd->intent.open.file;
out_err:
	release_open_intent(nd);
	nd->intent.open.file = (struct file *)dentry;
	goto out;
}
EXPORT_SYMBOL_GPL(lookup_instantiate_filp);

/**
 * nameidata_to_filp - convert a nameidata to an open filp.
 * @nd: pointer to nameidata
 * @flags: open flags
 *
 * Note that this function destroys the original nameidata
 */
struct file *nameidata_to_filp(struct nameidata *nd, int flags)
{
952
	const struct cred *cred = current_cred();
953 954 955 956 957
	struct file *filp;

	/* Pick up the filp from the open intent */
	filp = nd->intent.open.file;
	/* Has the filesystem initialised the file for us? */
958
	if (filp->f_path.dentry == NULL)
959
		filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
960
				     NULL, cred);
961
	else
Jan Blunck's avatar
Jan Blunck committed
962
		path_put(&nd->path);
963 964 965
	return filp;
}

966 967 968 969
/*
 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
 * error.
 */
970 971
struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
			 const struct cred *cred)
972 973 974 975
{
	int error;
	struct file *f;

976 977
	validate_creds(cred);

978 979 980 981 982 983 984 985 986 987 988 989
	/*
	 * We must always pass in a valid mount pointer.   Historically
	 * callers got away with not passing it, but we must enforce this at
	 * the earliest possible point now to avoid strange problems deep in the
	 * filesystem stack.
	 */
	if (!mnt) {
		printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__);
		dump_stack();
		return ERR_PTR(-EINVAL);
	}

990 991
	error = -ENFILE;
	f = get_empty_filp();
992 993 994
	if (f == NULL) {
		dput(dentry);
		mntput(mnt);
995
		return ERR_PTR(error);
996
	}
997

998
	return __dentry_open(dentry, mnt, flags, f, NULL, cred);
999
}
Linus Torvalds's avatar
Linus Torvalds committed
1000 1001
EXPORT_SYMBOL(dentry_open);

1002
static void __put_unused_fd(struct files_struct *files, unsigned int fd)
Linus Torvalds's avatar
Linus Torvalds committed
1003
{
1004 1005
	struct fdtable *fdt = files_fdtable(files);
	__FD_CLR(fd, fdt->open_fds);
1006 1007
	if (fd < files->next_fd)
		files->next_fd = fd;
Linus Torvalds's avatar
Linus Torvalds committed
1008 1009
}

1010
void put_unused_fd(unsigned int fd)
Linus Torvalds's avatar
Linus Torvalds committed
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
{
	struct files_struct *files = current->files;
	spin_lock(&files->file_lock);
	__put_unused_fd(files, fd);
	spin_unlock(&files->file_lock);
}

EXPORT_SYMBOL(put_unused_fd);

/*
1021
 * Install a file pointer in the fd array.
Linus Torvalds's avatar
Linus Torvalds committed
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
 *
 * The VFS is full of places where we drop the files lock between
 * setting the open_fds bitmap and installing the file in the file
 * array.  At any such point, we are vulnerable to a dup2() race
 * installing a file in the array before us.  We need to detect this and
 * fput() the struct file we are about to overwrite in this case.
 *
 * It should never happen - if we allow dup2() do it, _really_ bad things
 * will follow.
 */

1033
void fd_install(unsigned int fd, struct file *file)
Linus Torvalds's avatar
Linus Torvalds committed
1034 1035
{
	struct files_struct *files = current->files;
1036
	struct fdtable *fdt;
Linus Torvalds's avatar
Linus Torvalds committed
1037
	spin_lock(&files->file_lock);