inode.c 19.5 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * hugetlbpage-backed filesystem.  Based on ramfs.
 *
 * William Irwin, 2002
 *
 * Copyright (C) 2002 Linus Torvalds.
 */

#include <linux/module.h>
#include <linux/thread_info.h>
#include <asm/current.h>
#include <linux/sched.h>		/* remove ASAP */
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/file.h>
#include <linux/writeback.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/string.h>
21
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include <linux/backing-dev.h>
#include <linux/hugetlb.h>
#include <linux/pagevec.h>
#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/dnotify.h>
#include <linux/statfs.h>
#include <linux/security.h>

#include <asm/uaccess.h>

/* some random number */
#define HUGETLBFS_MAGIC	0x958458f6

static struct super_operations hugetlbfs_ops;
static struct address_space_operations hugetlbfs_aops;
38
const struct file_operations hugetlbfs_file_operations;
Linus Torvalds's avatar
Linus Torvalds committed
39
40
41
42
43
44
45
46
47
48
static struct inode_operations hugetlbfs_dir_inode_operations;
static struct inode_operations hugetlbfs_inode_operations;

static struct backing_dev_info hugetlbfs_backing_dev_info = {
	.ra_pages	= 0,	/* No readahead */
	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
};

int sysctl_hugetlb_shm_group;

49
50
51
52
53
54
55
56
57
58
static void huge_pagevec_release(struct pagevec *pvec)
{
	int i;

	for (i = 0; i < pagevec_count(pvec); ++i)
		put_page(pvec->pages[i]);

	pagevec_reinit(pvec);
}

Linus Torvalds's avatar
Linus Torvalds committed
59
60
61
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
	struct inode *inode = file->f_dentry->d_inode;
62
	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
Linus Torvalds's avatar
Linus Torvalds committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
	loff_t len, vma_len;
	int ret;

	if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1))
		return -EINVAL;

	if (vma->vm_start & ~HPAGE_MASK)
		return -EINVAL;

	if (vma->vm_end & ~HPAGE_MASK)
		return -EINVAL;

	if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
		return -EINVAL;

	vma_len = (loff_t)(vma->vm_end - vma->vm_start);

80
	mutex_lock(&inode->i_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
81
82
83
84
85
86
87
88
89
	file_accessed(file);
	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
	vma->vm_ops = &hugetlb_vm_ops;

	ret = -ENOMEM;
	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
	if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
		goto out;

90
91
92
93
	if (vma->vm_flags & VM_MAYSHARE)
		if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0)
			goto out;

94
95
	ret = 0;
	hugetlb_prefault_arch_hook(vma->vm_mm);
Linus Torvalds's avatar
Linus Torvalds committed
96
97
98
	if (inode->i_size < len)
		inode->i_size = len;
out:
99
	mutex_unlock(&inode->i_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
100
101
102
103
104

	return ret;
}

/*
105
 * Called under down_write(mmap_sem).
Linus Torvalds's avatar
Linus Torvalds committed
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
 */

#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
		unsigned long len, unsigned long pgoff, unsigned long flags);
#else
static unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
		unsigned long len, unsigned long pgoff, unsigned long flags)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	unsigned long start_addr;

	if (len & ~HPAGE_MASK)
		return -EINVAL;
	if (len > TASK_SIZE)
		return -ENOMEM;

	if (addr) {
		addr = ALIGN(addr, HPAGE_SIZE);
		vma = find_vma(mm, addr);
		if (TASK_SIZE - len >= addr &&
		    (!vma || addr + len <= vma->vm_start))
			return addr;
	}

	start_addr = mm->free_area_cache;

135
136
137
	if (len <= mm->cached_hole_size)
		start_addr = TASK_UNMAPPED_BASE;

Linus Torvalds's avatar
Linus Torvalds committed
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
full_search:
	addr = ALIGN(start_addr, HPAGE_SIZE);

	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
		/* At this point:  (!vma || addr < vma->vm_end). */
		if (TASK_SIZE - len < addr) {
			/*
			 * Start a new search - just in case we missed
			 * some holes.
			 */
			if (start_addr != TASK_UNMAPPED_BASE) {
				start_addr = TASK_UNMAPPED_BASE;
				goto full_search;
			}
			return -ENOMEM;
		}

		if (!vma || addr + len <= vma->vm_start)
			return addr;
		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
	}
}
#endif

/*
 * Read a page. Again trivial. If it didn't already exist
 * in the page cache, it is zero-filled.
 */
static int hugetlbfs_readpage(struct file *file, struct page * page)
{
	unlock_page(page);
	return -EINVAL;
}

static int hugetlbfs_prepare_write(struct file *file,
			struct page *page, unsigned offset, unsigned to)
{
	return -EINVAL;
}

static int hugetlbfs_commit_write(struct file *file,
			struct page *page, unsigned offset, unsigned to)
{
	return -EINVAL;
}

static void truncate_huge_page(struct page *page)
{
	clear_page_dirty(page);
	ClearPageUptodate(page);
	remove_from_page_cache(page);
	put_page(page);
}

192
static void truncate_hugepages(struct inode *inode, loff_t lstart)
Linus Torvalds's avatar
Linus Torvalds committed
193
{
194
	struct address_space *mapping = &inode->i_data;
Linus Torvalds's avatar
Linus Torvalds committed
195
196
197
198
199
	const pgoff_t start = lstart >> HPAGE_SHIFT;
	struct pagevec pvec;
	pgoff_t next;
	int i;

200
201
202
203
	hugetlb_truncate_reservation(HUGETLBFS_I(inode),
				     lstart >> HPAGE_SHIFT);
	if (!mapping->nrpages)
		return;
Linus Torvalds's avatar
Linus Torvalds committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
	pagevec_init(&pvec, 0);
	next = start;
	while (1) {
		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
			if (next == start)
				break;
			next = start;
			continue;
		}

		for (i = 0; i < pagevec_count(&pvec); ++i) {
			struct page *page = pvec.pages[i];

			lock_page(page);
			if (page->index > next)
				next = page->index;
			++next;
			truncate_huge_page(page);
			unlock_page(page);
			hugetlb_put_quota(mapping);
		}
		huge_pagevec_release(&pvec);
	}
	BUG_ON(!lstart && mapping->nrpages);
}

static void hugetlbfs_delete_inode(struct inode *inode)
{
232
	truncate_hugepages(inode, 0);
233
234
235
	clear_inode(inode);
}

Linus Torvalds's avatar
Linus Torvalds committed
236
237
static void hugetlbfs_forget_inode(struct inode *inode)
{
238
239
240
241
242
243
244
245
246
247
248
	struct super_block *sb = inode->i_sb;

	if (!hlist_unhashed(&inode->i_hash)) {
		if (!(inode->i_state & (I_DIRTY|I_LOCK)))
			list_move(&inode->i_list, &inode_unused);
		inodes_stat.nr_unused++;
		if (!sb || (sb->s_flags & MS_ACTIVE)) {
			spin_unlock(&inode_lock);
			return;
		}
		inode->i_state |= I_WILL_FREE;
Linus Torvalds's avatar
Linus Torvalds committed
249
		spin_unlock(&inode_lock);
250
251
252
253
254
255
256
257
258
		/*
		 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
		 * in our backing_dev_info.
		 */
		write_inode_now(inode, 1);
		spin_lock(&inode_lock);
		inode->i_state &= ~I_WILL_FREE;
		inodes_stat.nr_unused--;
		hlist_del_init(&inode->i_hash);
Linus Torvalds's avatar
Linus Torvalds committed
259
260
261
262
263
264
	}
	list_del_init(&inode->i_list);
	list_del_init(&inode->i_sb_list);
	inode->i_state |= I_FREEING;
	inodes_stat.nr_inodes--;
	spin_unlock(&inode_lock);
265
	truncate_hugepages(inode, 0);
Linus Torvalds's avatar
Linus Torvalds committed
266
267
268
269
270
271
272
	clear_inode(inode);
	destroy_inode(inode);
}

static void hugetlbfs_drop_inode(struct inode *inode)
{
	if (!inode->i_nlink)
273
		generic_delete_inode(inode);
Linus Torvalds's avatar
Linus Torvalds committed
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
	else
		hugetlbfs_forget_inode(inode);
}

/*
 * h_pgoff is in HPAGE_SIZE units.
 * vma->vm_pgoff is in PAGE_SIZE units.
 */
static inline void
hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
{
	struct vm_area_struct *vma;
	struct prio_tree_iter iter;

	vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) {
		unsigned long h_vm_pgoff;
		unsigned long v_offset;

		h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
		v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT;
		/*
		 * Is this VMA fully outside the truncation point?
		 */
		if (h_vm_pgoff >= h_pgoff)
			v_offset = 0;

300
301
		unmap_hugepage_range(vma,
				vma->vm_start + v_offset, vma->vm_end);
Linus Torvalds's avatar
Linus Torvalds committed
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
	}
}

/*
 * Expanding truncates are not allowed.
 */
static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
{
	unsigned long pgoff;
	struct address_space *mapping = inode->i_mapping;

	if (offset > inode->i_size)
		return -EINVAL;

	BUG_ON(offset & ~HPAGE_MASK);
	pgoff = offset >> HPAGE_SHIFT;

	inode->i_size = offset;
	spin_lock(&mapping->i_mmap_lock);
	if (!prio_tree_empty(&mapping->i_mmap))
		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
	spin_unlock(&mapping->i_mmap_lock);
324
	truncate_hugepages(inode, offset);
Linus Torvalds's avatar
Linus Torvalds committed
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
	return 0;
}

static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
{
	struct inode *inode = dentry->d_inode;
	int error;
	unsigned int ia_valid = attr->ia_valid;

	BUG_ON(!inode);

	error = inode_change_ok(inode, attr);
	if (error)
		goto out;

	if (ia_valid & ATTR_SIZE) {
		error = -EINVAL;
		if (!(attr->ia_size & ~HPAGE_MASK))
			error = hugetlb_vmtruncate(inode, attr->ia_size);
		if (error)
			goto out;
		attr->ia_valid &= ~ATTR_SIZE;
	}
	error = inode_setattr(inode, attr);
out:
	return error;
}

static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 
					gid_t gid, int mode, dev_t dev)
{
	struct inode *inode;

	inode = new_inode(sb);
	if (inode) {
		struct hugetlbfs_inode_info *info;
		inode->i_mode = mode;
		inode->i_uid = uid;
		inode->i_gid = gid;
		inode->i_blksize = HPAGE_SIZE;
		inode->i_blocks = 0;
		inode->i_mapping->a_ops = &hugetlbfs_aops;
		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
		info = HUGETLBFS_I(inode);
370
		mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
		switch (mode & S_IFMT) {
		default:
			init_special_inode(inode, mode, dev);
			break;
		case S_IFREG:
			inode->i_op = &hugetlbfs_inode_operations;
			inode->i_fop = &hugetlbfs_file_operations;
			break;
		case S_IFDIR:
			inode->i_op = &hugetlbfs_dir_inode_operations;
			inode->i_fop = &simple_dir_operations;

			/* directory inodes start off with i_nlink == 2 (for "." entry) */
			inode->i_nlink++;
			break;
		case S_IFLNK:
			inode->i_op = &page_symlink_inode_operations;
			break;
		}
	}
	return inode;
}

/*
 * File creation. Allocate an inode, and we're done..
 */
static int hugetlbfs_mknod(struct inode *dir,
			struct dentry *dentry, int mode, dev_t dev)
{
	struct inode *inode;
	int error = -ENOSPC;
	gid_t gid;

	if (dir->i_mode & S_ISGID) {
		gid = dir->i_gid;
		if (S_ISDIR(mode))
			mode |= S_ISGID;
	} else {
		gid = current->fsgid;
	}
	inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev);
	if (inode) {
		dir->i_ctime = dir->i_mtime = CURRENT_TIME;
		d_instantiate(dentry, inode);
		dget(dentry);	/* Extra count - pin the dentry in core */
		error = 0;
	}
	return error;
}

static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
	int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
	if (!retval)
		dir->i_nlink++;
	return retval;
}

static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
{
	return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
}

static int hugetlbfs_symlink(struct inode *dir,
			struct dentry *dentry, const char *symname)
{
	struct inode *inode;
	int error = -ENOSPC;
	gid_t gid;

	if (dir->i_mode & S_ISGID)
		gid = dir->i_gid;
	else
		gid = current->fsgid;

	inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid,
					gid, S_IFLNK|S_IRWXUGO, 0);
	if (inode) {
		int l = strlen(symname)+1;
		error = page_symlink(inode, symname, l);
		if (!error) {
			d_instantiate(dentry, inode);
			dget(dentry);
		} else
			iput(inode);
	}
	dir->i_ctime = dir->i_mtime = CURRENT_TIME;

	return error;
}

/*
 * For direct-IO reads into hugetlb pages
 */
static int hugetlbfs_set_page_dirty(struct page *page)
{
	return 0;
}

470
static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
Linus Torvalds's avatar
Linus Torvalds committed
471
{
472
	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
Linus Torvalds's avatar
Linus Torvalds committed
473
474
475
476
477

	buf->f_type = HUGETLBFS_MAGIC;
	buf->f_bsize = HPAGE_SIZE;
	if (sbinfo) {
		spin_lock(&sbinfo->stat_lock);
478
479
480
481
482
483
484
485
		/* If no limits set, just report 0 for max/free/used
		 * blocks, like simple_statfs() */
		if (sbinfo->max_blocks >= 0) {
			buf->f_blocks = sbinfo->max_blocks;
			buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
			buf->f_files = sbinfo->max_inodes;
			buf->f_ffree = sbinfo->free_inodes;
		}
Linus Torvalds's avatar
Linus Torvalds committed
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
		spin_unlock(&sbinfo->stat_lock);
	}
	buf->f_namelen = NAME_MAX;
	return 0;
}

static void hugetlbfs_put_super(struct super_block *sb)
{
	struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb);

	if (sbi) {
		sb->s_fs_info = NULL;
		kfree(sbi);
	}
}

502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo)
{
	if (sbinfo->free_inodes >= 0) {
		spin_lock(&sbinfo->stat_lock);
		if (unlikely(!sbinfo->free_inodes)) {
			spin_unlock(&sbinfo->stat_lock);
			return 0;
		}
		sbinfo->free_inodes--;
		spin_unlock(&sbinfo->stat_lock);
	}

	return 1;
}

static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
{
	if (sbinfo->free_inodes >= 0) {
		spin_lock(&sbinfo->stat_lock);
		sbinfo->free_inodes++;
		spin_unlock(&sbinfo->stat_lock);
	}
}


Linus Torvalds's avatar
Linus Torvalds committed
527
528
529
530
static kmem_cache_t *hugetlbfs_inode_cachep;

static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
{
531
	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb);
Linus Torvalds's avatar
Linus Torvalds committed
532
533
	struct hugetlbfs_inode_info *p;

534
535
	if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
536
	p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL);
537
538
	if (unlikely(!p)) {
		hugetlbfs_inc_free_inodes(sbinfo);
Linus Torvalds's avatar
Linus Torvalds committed
539
		return NULL;
540
	}
541
	p->prereserved_hpages = 0;
Linus Torvalds's avatar
Linus Torvalds committed
542
543
544
545
546
	return &p->vfs_inode;
}

static void hugetlbfs_destroy_inode(struct inode *inode)
{
547
	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
Linus Torvalds's avatar
Linus Torvalds committed
548
549
550
551
552
553
554
555
556
557
558
	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
}

static struct address_space_operations hugetlbfs_aops = {
	.readpage	= hugetlbfs_readpage,
	.prepare_write	= hugetlbfs_prepare_write,
	.commit_write	= hugetlbfs_commit_write,
	.set_page_dirty	= hugetlbfs_set_page_dirty,
};

559
560
561
562
563
564
565
566
567
568

static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
{
	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;

	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
	    SLAB_CTOR_CONSTRUCTOR)
		inode_init_once(&ei->vfs_inode);
}

569
const struct file_operations hugetlbfs_file_operations = {
Linus Torvalds's avatar
Linus Torvalds committed
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
	.mmap			= hugetlbfs_file_mmap,
	.fsync			= simple_sync_file,
	.get_unmapped_area	= hugetlb_get_unmapped_area,
};

static struct inode_operations hugetlbfs_dir_inode_operations = {
	.create		= hugetlbfs_create,
	.lookup		= simple_lookup,
	.link		= simple_link,
	.unlink		= simple_unlink,
	.symlink	= hugetlbfs_symlink,
	.mkdir		= hugetlbfs_mkdir,
	.rmdir		= simple_rmdir,
	.mknod		= hugetlbfs_mknod,
	.rename		= simple_rename,
	.setattr	= hugetlbfs_setattr,
};

static struct inode_operations hugetlbfs_inode_operations = {
	.setattr	= hugetlbfs_setattr,
};

static struct super_operations hugetlbfs_ops = {
	.alloc_inode    = hugetlbfs_alloc_inode,
	.destroy_inode  = hugetlbfs_destroy_inode,
	.statfs		= hugetlbfs_statfs,
596
	.delete_inode	= hugetlbfs_delete_inode,
Linus Torvalds's avatar
Linus Torvalds committed
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
	.drop_inode	= hugetlbfs_drop_inode,
	.put_super	= hugetlbfs_put_super,
};

static int
hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
{
	char *opt, *value, *rest;

	if (!options)
		return 0;
	while ((opt = strsep(&options, ",")) != NULL) {
		if (!*opt)
			continue;

		value = strchr(opt, '=');
		if (!value || !*value)
			return -EINVAL;
		else
			*value++ = '\0';

		if (!strcmp(opt, "uid"))
			pconfig->uid = simple_strtoul(value, &value, 0);
		else if (!strcmp(opt, "gid"))
			pconfig->gid = simple_strtoul(value, &value, 0);
		else if (!strcmp(opt, "mode"))
			pconfig->mode = simple_strtoul(value,&value,0) & 0777U;
		else if (!strcmp(opt, "size")) {
			unsigned long long size = memparse(value, &rest);
			if (*rest == '%') {
				size <<= HPAGE_SHIFT;
				size *= max_huge_pages;
				do_div(size, 100);
				rest++;
			}
			size &= HPAGE_MASK;
			pconfig->nr_blocks = (size >> HPAGE_SHIFT);
			value = rest;
		} else if (!strcmp(opt,"nr_inodes")) {
			pconfig->nr_inodes = memparse(value, &rest);
			value = rest;
		} else
			return -EINVAL;

		if (*value)
			return -EINVAL;
	}
	return 0;
}

static int
hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
{
	struct inode * inode;
	struct dentry * root;
	int ret;
	struct hugetlbfs_config config;
	struct hugetlbfs_sb_info *sbinfo;

	config.nr_blocks = -1; /* No limit on size by default */
	config.nr_inodes = -1; /* No limit on number of inodes by default */
	config.uid = current->fsuid;
	config.gid = current->fsgid;
	config.mode = 0755;
	ret = hugetlbfs_parse_options(data, &config);

	if (ret)
		return ret;

	sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL);
	if (!sbinfo)
		return -ENOMEM;
	sb->s_fs_info = sbinfo;
	spin_lock_init(&sbinfo->stat_lock);
	sbinfo->max_blocks = config.nr_blocks;
	sbinfo->free_blocks = config.nr_blocks;
	sbinfo->max_inodes = config.nr_inodes;
	sbinfo->free_inodes = config.nr_inodes;
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_blocksize = HPAGE_SIZE;
	sb->s_blocksize_bits = HPAGE_SHIFT;
	sb->s_magic = HUGETLBFS_MAGIC;
	sb->s_op = &hugetlbfs_ops;
	sb->s_time_gran = 1;
	inode = hugetlbfs_get_inode(sb, config.uid, config.gid,
					S_IFDIR | config.mode, 0);
	if (!inode)
		goto out_free;

	root = d_alloc_root(inode);
	if (!root) {
		iput(inode);
		goto out_free;
	}
	sb->s_root = root;
	return 0;
out_free:
	kfree(sbinfo);
	return -ENOMEM;
}

int hugetlb_get_quota(struct address_space *mapping)
{
	int ret = 0;
	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);

	if (sbinfo->free_blocks > -1) {
		spin_lock(&sbinfo->stat_lock);
		if (sbinfo->free_blocks > 0)
			sbinfo->free_blocks--;
		else
			ret = -ENOMEM;
		spin_unlock(&sbinfo->stat_lock);
	}

	return ret;
}

void hugetlb_put_quota(struct address_space *mapping)
{
	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);

	if (sbinfo->free_blocks > -1) {
		spin_lock(&sbinfo->stat_lock);
		sbinfo->free_blocks++;
		spin_unlock(&sbinfo->stat_lock);
	}
}

726
727
static int hugetlbfs_get_sb(struct file_system_type *fs_type,
	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
Linus Torvalds's avatar
Linus Torvalds committed
728
{
729
	return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt);
Linus Torvalds's avatar
Linus Torvalds committed
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
}

static struct file_system_type hugetlbfs_fs_type = {
	.name		= "hugetlbfs",
	.get_sb		= hugetlbfs_get_sb,
	.kill_sb	= kill_litter_super,
};

static struct vfsmount *hugetlbfs_vfsmount;

static int can_do_hugetlb_shm(void)
{
	return likely(capable(CAP_IPC_LOCK) ||
			in_group_p(sysctl_hugetlb_shm_group) ||
			can_do_mlock());
}

struct file *hugetlb_zero_setup(size_t size)
{
	int error = -ENOMEM;
	struct file *file;
	struct inode *inode;
	struct dentry *dentry, *root;
	struct qstr quick_string;
	char buf[16];
755
	static atomic_t counter;
Linus Torvalds's avatar
Linus Torvalds committed
756
757
758
759
760
761
762
763

	if (!can_do_hugetlb_shm())
		return ERR_PTR(-EPERM);

	if (!user_shm_lock(size, current->user))
		return ERR_PTR(-ENOMEM);

	root = hugetlbfs_vfsmount->mnt_root;
764
	snprintf(buf, 16, "%u", atomic_inc_return(&counter));
Linus Torvalds's avatar
Linus Torvalds committed
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
	quick_string.name = buf;
	quick_string.len = strlen(quick_string.name);
	quick_string.hash = 0;
	dentry = d_alloc(root, &quick_string);
	if (!dentry)
		goto out_shm_unlock;

	error = -ENFILE;
	file = get_empty_filp();
	if (!file)
		goto out_dentry;

	error = -ENOSPC;
	inode = hugetlbfs_get_inode(root->d_sb, current->fsuid,
				current->fsgid, S_IFREG | S_IRWXUGO, 0);
	if (!inode)
		goto out_file;

783
784
785
786
787
	error = -ENOMEM;
	if (hugetlb_extend_reservation(HUGETLBFS_I(inode),
				       size >> HPAGE_SHIFT) != 0)
		goto out_inode;

Linus Torvalds's avatar
Linus Torvalds committed
788
789
790
791
792
793
794
795
796
797
	d_instantiate(dentry, inode);
	inode->i_size = size;
	inode->i_nlink = 0;
	file->f_vfsmnt = mntget(hugetlbfs_vfsmount);
	file->f_dentry = dentry;
	file->f_mapping = inode->i_mapping;
	file->f_op = &hugetlbfs_file_operations;
	file->f_mode = FMODE_WRITE | FMODE_READ;
	return file;

798
799
out_inode:
	iput(inode);
Linus Torvalds's avatar
Linus Torvalds committed
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
out_file:
	put_filp(file);
out_dentry:
	dput(dentry);
out_shm_unlock:
	user_shm_unlock(size, current->user);
	return ERR_PTR(error);
}

static int __init init_hugetlbfs_fs(void)
{
	int error;
	struct vfsmount *vfsmount;

	hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
					sizeof(struct hugetlbfs_inode_info),
					0, 0, init_once, NULL);
	if (hugetlbfs_inode_cachep == NULL)
		return -ENOMEM;

	error = register_filesystem(&hugetlbfs_fs_type);
	if (error)
		goto out;

	vfsmount = kern_mount(&hugetlbfs_fs_type);

	if (!IS_ERR(vfsmount)) {
		hugetlbfs_vfsmount = vfsmount;
		return 0;
	}

	error = PTR_ERR(vfsmount);

 out:
	if (error)
		kmem_cache_destroy(hugetlbfs_inode_cachep);
	return error;
}

static void __exit exit_hugetlbfs_fs(void)
{
	kmem_cache_destroy(hugetlbfs_inode_cachep);
	unregister_filesystem(&hugetlbfs_fs_type);
}

module_init(init_hugetlbfs_fs)
module_exit(exit_hugetlbfs_fs)

MODULE_LICENSE("GPL");