root/morphix/trunk/mini_fo/main.c

Revision 2, 22.0 kB (checked in by nextime, 2 years ago)

Initial import, branching from morphix svn

Line 
1 /*
2  * Copyright (c) 1997-2004 Erez Zadok <ezk@cs.stonybrook.edu>
3  * Copyright (c) 2001-2004 Stony Brook University
4  *
5  * For specific licensing information, see the COPYING file distributed with
6  * this package, or get one from ftp://ftp.filesystems.org/pub/fistgen/COPYING.
7  *
8  * This Copyright notice must be kept intact and distributed with all
9  * fistgen sources INCLUDING sources generated by fistgen.
10  */
11 /*
12  * Copyright (C) 2004 Markus Klotzbuecher <mk@creamnet.de>
13  *
14  * This program is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU General Public License
16  * as published by the Free Software Foundation; either version
17  * 2 of the License, or (at your option) any later version.
18  */
19
20 /*
21  *  $Id: main.c 1345 2004-11-30 23:18:53Z alextreme $
22  */
23
24 #ifdef HAVE_CONFIG_H
25 # include <config.h>
26 #endif /* HAVE_CONFIG_H */
27 #ifdef FISTGEN
28 # include "fist_mini_fo.h"
29 #endif /* FISTGEN */
30 #include "fist.h"
31 #include "mini_fo.h"
32 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
33 #include <linux/module.h>
34 #endif
35
36 /* This definition must only appear after we include <linux/module.h> */
37 #ifndef MODULE_LICENSE
38 #define MODULE_LICENSE(bison)
39 #endif /* not MODULE_LICENSE */
40
41
42
43 /* sb we pass is mini_fo's super_block */
44 /*
45  * This is the mini_fo tri interpose function, which extends the functionality
46  * of the regular interpose by interposing a higher level inode on top of two
47  * lower level ones: the base filesystem inode and the storage filesystem inode.
48  *
49  * The regular interpose functions is still needed, as not every lower base
50  * inode has a corresponding storage inode. This is due to the approach used of
51  * structuring the meta-data.
52  *
53  *  sb we pass is mini_fo's super_block
54  */
55 int
56 mini_fo_tri_interpose(dentry_t *lower_dentry, dentry_t *lower_sto_dentry, dentry_t *dentry, super_block_t *sb, int flag)
57 {
58         inode_t *lower_inode = NULL;
59         inode_t *lower_sto_inode = NULL; /* store corresponding storage inode */
60         int err = 0;
61         inode_t *inode;
62
63         /* Pointer to lower_sto_inode if exists, else to lower_inode.
64          * This is used to copy the attributes of the correct inode. */
65         inode_t *master_inode;
66
67         print_entry_location();
68
69         if(lower_dentry)
70             lower_inode = lower_dentry->d_inode;
71         if(lower_sto_dentry)
72             lower_sto_inode = lower_sto_dentry->d_inode;
73
74         ASSERT(dentry->d_inode == NULL);
75
76         /* mk: One of the inodes associated with the dentrys is likely to
77          * be NULL, so carefull:
78          */
79         ASSERT((lower_inode != NULL) || (lower_sto_inode != NULL));
80
81         if(lower_sto_inode)
82             master_inode = lower_sto_inode;
83         else
84             master_inode = lower_inode;
85
86         /*
87          * We allocate our new inode below, by calling iget.
88          * iget will call our read_inode which will initialize some
89          * of the new inode's fields
90          */
91
92         /* check that the lower file system didn't cross a mount point
93          *
94          * mk: taken this out, not reasonable for fanout:
95          */
96
97         /*
98          * original: inode = iget(sb, lower_inode->i_ino);
99          */
100         inode = iget(sb, iunique(sb, 25));
101         if (!inode) {
102                 err = -EACCES;          /* should be impossible??? */
103                 goto out;
104         }
105
106         /*
107          * interpose the inode if not already interposed
108          *   this is possible if the inode is being reused
109          * XXX: what happens if we get_empty_inode() but there's another already?
110          * for now, ASSERT() that this can't happen; fix later.
111          */
112         if (INODE_TO_LOWER(inode) != NULL) {
113                 printk(KERN_CRIT "MINI_FO: WARNING: mini_fo_interpose INODE_TO_LOWER(inode) is not NULL\n");
114         }
115         if (INODE_TO_LOWER_STO(inode) != NULL) {
116                 printk(KERN_CRIT "MINI_FO: WARNING: mini_fo_interpose INODE_TO_LOWER_STO(inode) is not NULL\n");
117         }
118
119         /* mk: Carefull, igrab can't handle NULL inodes (ok, why should it?), so
120          * we need to check here:
121          */
122         if(lower_inode)
123             INODE_TO_LOWER(inode) = igrab(lower_inode);
124         else
125             INODE_TO_LOWER(inode) = NULL;
126
127         if(lower_sto_inode)
128             INODE_TO_LOWER_STO(inode) = igrab(lower_sto_inode);
129         else
130             INODE_TO_LOWER_STO(inode) = NULL;
131
132
133         /* Use different set of inode ops for symlinks & directories*/
134         if (S_ISLNK(master_inode->i_mode))
135                 inode->i_op = &mini_fo_symlink_iops;
136         else if (S_ISDIR(master_inode->i_mode))
137                 inode->i_op = &mini_fo_dir_iops;
138
139         /* Use different set of file ops for directories */
140         if (S_ISDIR(master_inode->i_mode))
141                 inode->i_fop = &mini_fo_dir_fops;
142
143         /* properly initialize special inodes */
144         if (S_ISBLK(master_inode->i_mode) || S_ISCHR(master_inode->i_mode) ||
145             S_ISFIFO(master_inode->i_mode) || S_ISSOCK(master_inode->i_mode)) {
146                 init_special_inode(inode, master_inode->i_mode, master_inode->i_rdev);
147         }
148
149         /* Fix our inode's address operations to that of the lower inode */
150         if (inode->i_mapping->a_ops != master_inode->i_mapping->a_ops) {
151                 fist_dprint(7, "fixing inode 0x%x a_ops (0x%x -> 0x%x)\n",
152                             (int) inode, (int) inode->i_mapping->a_ops,
153                             (int) master_inode->i_mapping->a_ops);
154                 inode->i_mapping->a_ops = master_inode->i_mapping->a_ops;
155         }
156
157         /* only (our) lookup wants to do a d_add */
158         if (flag)
159                 d_add(dentry, inode);
160         else
161                 d_instantiate(dentry, inode);
162
163
164         ASSERT(DENTRY_TO_PRIVATE(dentry) != NULL);
165
166         /* all well, copy inode attributes */
167         fist_copy_attr_all(inode, master_inode);
168
169  out:
170         print_exit_status(err);
171         return err;
172 }
173
174 /*
175  * This is the original interpose function
176  *
177  * sb we pass is mini_fo's super_block
178  */
179 int
180 mini_fo_interpose(dentry_t *lower_dentry, dentry_t *dentry, super_block_t *sb, int flag)
181 {
182         inode_t *lower_inode;
183         int err = 0;
184         inode_t *inode;
185
186         print_entry_location();
187
188         lower_inode = lower_dentry->d_inode; /* CPW: moved after print_entry_location */
189
190         ASSERT(lower_inode != NULL);
191         ASSERT(dentry->d_inode == NULL);
192
193         /*
194          * We allocate our new inode below, by calling iget.
195          * iget will call our read_inode which will initialize some
196          * of the new inode's fields
197          */
198         /* check that the lower file system didn't cross a mount point */
199 // mk: take this out, not reasonable for fanout:
200 //      if (lower_inode->i_sb != SUPERBLOCK_TO_LOWER(sb)) {
201 //              err = -EXDEV;
202 //              goto out;
203 //      }
204         /* mk
205          * original: inode = iget(sb, hidden_inode->i_ino);
206          *
207          * this seems to work
208          */
209         //inode = iget(sb, iunique(sb, 25));
210         inode = iget(sb, lower_inode->i_ino);
211        
212         if (!inode) {
213                 err = -EACCES;          /* should be impossible??? */
214                 goto out;
215         }
216
217         /*
218          * interpose the inode if not already interposed
219          * this is possible if the inode is being reused
220          * XXX: what happens if we get_empty_inode() but there's another already?
221          * for now, ASSERT() that this can't happen; fix later.
222          */
223         if (INODE_TO_LOWER(inode) == NULL)
224                 INODE_TO_LOWER(inode) = igrab(lower_inode);
225
226         /* Use different set of inode ops for symlinks & directories*/
227         if (S_ISLNK(lower_inode->i_mode))
228                 inode->i_op = &mini_fo_symlink_iops;
229         else if (S_ISDIR(lower_inode->i_mode))
230                 inode->i_op = &mini_fo_dir_iops;
231         /* Use different set of file ops for directories */
232         if (S_ISDIR(lower_inode->i_mode))
233                 inode->i_fop = &mini_fo_dir_fops;
234
235         /* properly initialize special inodes */
236         if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) ||
237             S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) {
238                 init_special_inode(inode, lower_inode->i_mode, lower_inode->i_rdev);
239         }
240
241         /* Fix our inode's address operations to that of the lower inode */
242         if (inode->i_mapping->a_ops != lower_inode->i_mapping->a_ops) {
243                 fist_dprint(7, "fixing inode 0x%x a_ops (0x%x -> 0x%x)\n",
244                             (int) inode, (int) inode->i_mapping->a_ops,
245                             (int) lower_inode->i_mapping->a_ops);
246                 inode->i_mapping->a_ops = lower_inode->i_mapping->a_ops;
247         }
248
249         /* only (our) lookup wants to do a d_add */
250         if (flag)
251                 d_add(dentry, inode);
252         else
253                 d_instantiate(dentry, inode);
254
255         ASSERT(DENTRY_TO_PRIVATE(dentry) != NULL);
256
257         /* all well, copy inode attributes */
258         fist_copy_attr_all(inode, lower_inode);
259
260 out:
261         print_exit_status(err);
262         return err;
263 }
264
265
266 /* find lower dentry given this mini_fo dentry */
267 dentry_t *
268 __mini_fo_lower_dentry(const char *file, const char *func, int line, dentry_t *dentry)
269 {
270         dentry_t *lower_dentry;
271
272         ASSERT2(dentry != NULL);
273         ASSERT2(dentry->d_op != NULL);
274         ASSERT2(dentry->d_op == &mini_fo_dops);
275
276         ASSERT2(dentry->d_sb->s_op == &mini_fo_sops);
277         if (dentry->d_inode) {
278                 ASSERT2(dentry->d_inode->i_op == &mini_fo_main_iops ||
279                         dentry->d_inode->i_op == &mini_fo_dir_iops ||
280                         dentry->d_inode->i_op == &mini_fo_symlink_iops);
281         }
282         lower_dentry = DENTRY_TO_LOWER(dentry);
283         ASSERT2(lower_dentry != NULL);
284         return lower_dentry;
285 }
286
287
288 /* find lower storage dentry given this mini_fo dentry */
289 dentry_t *
290 __mini_fo_lower_sto_dentry(const char *file, const char *func, int line, dentry_t *dentry)
291 {
292         dentry_t *lower_sto_dentry;
293
294         ASSERT2(dentry != NULL);
295         ASSERT2(dentry->d_op != NULL);
296         ASSERT2(dentry->d_op == &mini_fo_dops);
297
298         ASSERT2(dentry->d_sb->s_op == &mini_fo_sops);
299         if (dentry->d_inode) {
300                 ASSERT2(dentry->d_inode->i_op == &mini_fo_main_iops ||
301                         dentry->d_inode->i_op == &mini_fo_dir_iops ||
302                         dentry->d_inode->i_op == &mini_fo_symlink_iops);
303         }
304         lower_sto_dentry = DENTRY_TO_LOWER_STO(dentry);
305         ASSERT2(lower_sto_dentry != NULL);
306         return lower_sto_dentry;
307 }
308
309 /*
310  * Parse mount options: dir=XXX and debug=N
311  *
312  * Returns the dentry object of the lower-level (lower) directory;
313  * We want to mount our stackable file system on top of that lower directory.
314  *
315  * Sets default debugging level to N, if any.
316  * dir1 = readonly directory
317  * dir2 = storage directory
318  * mount target is the union directory
319  */
320 dentry_t *
321 mini_fo_parse_options(super_block_t *sb, char *options)
322 {
323         dentry_t *lower_root   = ERR_PTR(-EINVAL);
324         dentry_t *lower_root2 = ERR_PTR(-EINVAL);
325         struct nameidata nd, nd2;
326         char *name, *tmp, *end;
327         int err = -1;
328
329         print_entry_location();
330
331         /* We don't want to go off the end of our arguments later on. */
332         for (end = options; *end; end++);
333
334         while (options < end) {
335                 tmp = options;
336                 while (*tmp && *tmp != ',')
337                         tmp++;
338                 *tmp = '\0';
339                 if (!strncmp("dir=", options, 4)) {
340                         /* note: the name passed need not be encoded */
341                         name = options + 4;
342                         printk(KERN_CRIT "base dir: %s, ", name);
343 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
344                         if (path_init(name, LOOKUP_FOLLOW, &nd))
345                                 err = path_walk(name, &nd);
346 #else
347                         err = path_lookup(name, LOOKUP_FOLLOW, &nd);
348 #endif
349                         if (err) {
350                                 printk("mini_fo: error accessing lower directory '%s'\n", name);
351                                 lower_root = ERR_PTR(err);
352                                 goto out;
353                         }
354                         lower_root = nd.dentry;
355                         SUPERBLOCK_TO_PRIVATE(sb)->lower_mnt = nd.mnt;
356                         fist_dprint(6, "parse_options: new s_root, inode: 0x%lx, 0x%lx\n", (long) lower_root, (long) lower_root->d_inode);
357
358                         fist_print_dentry("lower_root: ", lower_root);
359                         fist_print_dentry("nd.mnt->mnt_root: ", nd.mnt->mnt_root);
360                         fist_print_inode("lower_inode:", lower_root->d_inode);
361                         fist_print_sb("lower_super:", lower_root->d_sb);
362
363                 }else if(!strncmp("dir2=", options, 5)) {
364                         /* parse the storage dir */
365                         name = options + 5;
366                         printk(KERN_CRIT "storage dir: %s\n", name);
367 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
368                         if(path_init(name, LOOKUP_FOLLOW, &nd2))
369                                 err = path_walk(name, &nd2);
370 #else
371                         err = path_lookup(name, LOOKUP_FOLLOW, &nd2);
372 #endif
373                         if(err) {
374                                 printk("mini_fo: error accessing lower storage directory '%s'\n", name);
375                                 lower_root2 = ERR_PTR(err);
376                                 goto out;
377                         }
378                         lower_root2 = nd2.dentry;
379                         SUPERBLOCK_TO_PRIVATE(sb)->lower_mnt2 = nd2.mnt;
380                         SUPERBLOCK_TO_LOWER_STO(sb) = lower_root2->d_sb;
381
382                         /* mk: save the storage_dir_dentry, ugly*/
383                         SUPERBLOCK_TO_PRIVATE(sb)->storage_dir_dentry = lower_root2;
384
385                         fist_print_dentry("lower_root2: ", lower_root2);
386                         fist_print_dentry("nd2.mnt->mnt_root: ", nd2.mnt->mnt_root);
387                         fist_print_inode("lower_inode2:", lower_root2->d_inode);
388                         fist_print_sb("lower_super2:", lower_root2->d_sb);
389
390                         /* validate storage dir, this is done in mini_fo_read_super for
391                          * the base directory.
392                          */
393                         if (IS_ERR(lower_root2)) {
394                                 printk(KERN_WARNING "mini_fo_parse_options: storage dentry lookup failed (err = %ld)\n", PTR_ERR(lower_root2));
395                                 goto out;
396                         }
397                         if (!lower_root2->d_inode) {
398                                 printk(KERN_WARNING "mini_fo_parse_options: no storage dir to interpose on.\n");
399                                 goto out;
400                         }
401                         SUPERBLOCK_TO_LOWER_STO(sb) = lower_root2->d_sb;
402                         // deprecated: mini_fo_storage_dentry = lower_root2;
403
404
405
406                 }else if (!strncmp("debug=", options, 6)) {
407                         int debug = simple_strtoul(options + 6, NULL, 0);
408                         fist_set_debug_value(debug);
409                 } else {
410                         printk(KERN_WARNING "mini_fo: unrecognized option '%s'\n", options);
411                 }
412                 options = tmp + 1;
413         }
414
415 out:
416         print_exit_location();
417         return lower_root;
418 }
419
420 #ifdef FIST_MALLOC_DEBUG
421 /* for malloc debugging */
422 static atomic_t mini_fo_malloc_counter;
423
424 void *
425 mini_fo_KMALLOC(size_t len, int flag, int line, const char *file)
426 {
427         void *ptr = (void *) KMALLOC(len, flag);
428         if (ptr) {
429                 atomic_inc(&mini_fo_malloc_counter);
430                 printk("KM:%d:%p:%d:%s\n", atomic_read(&mini_fo_malloc_counter),ptr, line, file);
431         }
432         return ptr;
433 }
434
435 void
436 mini_fo_KFREE(void *ptr, int line, const char *file)
437 {
438         atomic_inc(&mini_fo_malloc_counter);
439         printk("KF:%d:%p:%d:%s\n", atomic_read(&mini_fo_malloc_counter), ptr, line, file);
440         KFREE(ptr);
441 }
442 #endif /* FIST_MALLOC_DEBUG */
443
444 /**
445  * Not really clean but people want both version 2.4 and 2.6
446  */
447 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
448 /* for attach mode, we use a different ->read_super() in attach.c */
449 super_block_t *
450 mini_fo_read_super(super_block_t *sb, void *raw_data, int silent)
451 {
452         super_block_t *ret_sb = NULL;
453         dentry_t *lower_root;
454
455         print_entry_location();
456
457
458         if (!raw_data) {
459                 printk(KERN_WARNING "mini_fo_read_super: missing data argument\n");
460                 goto out;
461         }
462         /*
463          * Allocate superblock private data
464          */
465         SUPERBLOCK_TO_PRIVATE(sb) = kmalloc(sizeof(struct mini_fo_sb_info), GFP_KERNEL);
466         if (!SUPERBLOCK_TO_PRIVATE(sb)) {
467                 printk(KERN_WARNING "%s: out of memory\n", __FUNCTION__);
468                 goto out;
469         }
470         SUPERBLOCK_TO_LOWER(sb) = NULL;
471
472         lower_root = mini_fo_parse_options(sb, raw_data);
473         if (IS_ERR(lower_root)) {
474                 printk(KERN_WARNING "mini_fo_read_super: lookup_dentry failed (err = %ld)\n", PTR_ERR(lower_root));
475                 goto out_free;
476         }
477         if (!lower_root->d_inode) {
478                 printk(KERN_WARNING "mini_fo_read_super: no directory to interpose on\n");
479                 goto out_free;
480         }
481         SUPERBLOCK_TO_LOWER(sb) = lower_root->d_sb;
482
483         /*
484          * Linux 2.4.2-ac3 and beyond has code in
485          * mm/filemap.c:generic_file_write() that requires sb->s_maxbytes
486          * to be populated.  If not set, all write()s under that sb will
487          * return 0.
488          *
489          * Linux 2.4.4+ automatically sets s_maxbytes to MAX_NON_LFS;
490          * the filesystem should override it only if it supports LFS.
491          */
492         /* non-SCA code is good to go with LFS */
493         sb->s_maxbytes = lower_root->d_sb->s_maxbytes;
494
495         sb->s_op = &mini_fo_sops;
496         /*
497          * we can't use d_alloc_root if we want to use
498          * our own interpose function unchanged,
499          * so we simply replicate *most* of the code in d_alloc_root here
500          */
501         sb->s_root = d_alloc(NULL, &(const struct qstr) { "/", 1, 0 });
502         if (IS_ERR(sb->s_root)) {
503                 printk(KERN_WARNING "mini_fo_read_super: d_alloc failed\n");
504                 goto out_dput;
505         }
506
507         sb->s_root->d_op = &mini_fo_dops;
508         sb->s_root->d_sb = sb;
509         sb->s_root->d_parent = sb->s_root;
510         /* link the upper and lower dentries */
511         DENTRY_TO_PRIVATE(sb->s_root) = (struct mini_fo_dentry_info *) kmalloc(sizeof(struct mini_fo_dentry_info), GFP_KERNEL);
512         if (!DENTRY_TO_PRIVATE(sb->s_root)) {
513                 goto out_dput2;
514         }
515         DENTRY_TO_PRIVATE(sb->s_root)->state = 1;
516         DENTRY_TO_LOWER(sb->s_root) = lower_root;
517         /* fanout relevant, interpose on storage root dentry too */
518         DENTRY_TO_LOWER_STO(sb->s_root) = SUPERBLOCK_TO_PRIVATE(sb)->storage_dir_dentry;
519         /* ...and call tri-interpose to interpose root dir inodes
520          * if (mini_fo_interpose(lower_root, sb->s_root, sb, 0))
521          */
522         if(mini_fo_tri_interpose(lower_root, DENTRY_TO_LOWER_STO(sb->s_root), sb->s_root, sb, 0))
523                 goto