diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.14/0_config-i686 b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.14/0_config-i686 index b134a9dd0..2cc6c512b 100644 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.14/0_config-i686 +++ b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.14/0_config-i686 @@ -4499,8 +4499,8 @@ CONFIG_SQUASHFS=m # CONFIG_SQUASHFS_FILE_CACHE is not set CONFIG_SQUASHFS_FILE_DIRECT=y # CONFIG_SQUASHFS_DECOMP_SINGLE is not set -# CONFIG_SQUASHFS_DECOMP_MULTI is not set -CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y +CONFIG_SQUASHFS_DECOMP_MULTI=y +# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set # CONFIG_SQUASHFS_XATTR is not set CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZO=y diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.14/0_config-x86_64 b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.14/0_config-x86_64 index a90c817c7..c894ccb0f 100644 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.14/0_config-x86_64 +++ b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.14/0_config-x86_64 @@ -4367,8 +4367,8 @@ CONFIG_SQUASHFS=m # CONFIG_SQUASHFS_FILE_CACHE is not set CONFIG_SQUASHFS_FILE_DIRECT=y # CONFIG_SQUASHFS_DECOMP_SINGLE is not set -# CONFIG_SQUASHFS_DECOMP_MULTI is not set -CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y +CONFIG_SQUASHFS_DECOMP_MULTI=y +# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set # CONFIG_SQUASHFS_XATTR is not set CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZO=y diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/.calculate_directory b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/.calculate_directory deleted file mode 100644 index 3ca58dff5..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/.calculate_directory +++ /dev/null @@ -1 +0,0 @@ -# Calculate append=skip merge(sys-kernel/calculate-sources)>=3.15&&merge(sys-kernel/calculate-sources)<3.16 diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r5-3.15.patch b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r5-3.15.patch deleted file mode 100644 index e841b5e72..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r5-3.15.patch +++ /dev/null @@ -1,105 +0,0 @@ -# Calculate format=diff os_linux_system==desktop -From 2220cb873d952611ace9c36105c23b0c8425bd0a Mon Sep 17 00:00:00 2001 -From: Paolo Valente -Date: Thu, 22 May 2014 11:59:35 +0200 -Subject: [PATCH 1/3] block: cgroups, kconfig, build bits for BFQ-v7r5-3.15 - -Update Kconfig.iosched and do the related Makefile changes to include -kernel configuration options for BFQ. Also add the bfqio controller -to the cgroups subsystem. - -Signed-off-by: Paolo Valente -Signed-off-by: Arianna Avanzini ---- - block/Kconfig.iosched | 32 ++++++++++++++++++++++++++++++++ - block/Makefile | 1 + - include/linux/cgroup_subsys.h | 4 ++++ - 3 files changed, 37 insertions(+) - -diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched -index 421bef9..0ee5f0f 100644 ---- a/block/Kconfig.iosched -+++ b/block/Kconfig.iosched -@@ -39,6 +39,27 @@ config CFQ_GROUP_IOSCHED - ---help--- - Enable group IO scheduling in CFQ. - -+config IOSCHED_BFQ -+ tristate "BFQ I/O scheduler" -+ default n -+ ---help--- -+ The BFQ I/O scheduler tries to distribute bandwidth among -+ all processes according to their weights. -+ It aims at distributing the bandwidth as desired, independently of -+ the disk parameters and with any workload. It also tries to -+ guarantee low latency to interactive and soft real-time -+ applications. If compiled built-in (saying Y here), BFQ can -+ be configured to support hierarchical scheduling. -+ -+config CGROUP_BFQIO -+ bool "BFQ hierarchical scheduling support" -+ depends on CGROUPS && IOSCHED_BFQ=y -+ default n -+ ---help--- -+ Enable hierarchical scheduling in BFQ, using the cgroups -+ filesystem interface. The name of the subsystem will be -+ bfqio. -+ - choice - prompt "Default I/O scheduler" - default DEFAULT_CFQ -@@ -52,6 +73,16 @@ choice - config DEFAULT_CFQ - bool "CFQ" if IOSCHED_CFQ=y - -+ config DEFAULT_BFQ -+ bool "BFQ" if IOSCHED_BFQ=y -+ help -+ Selects BFQ as the default I/O scheduler which will be -+ used by default for all block devices. -+ The BFQ I/O scheduler aims at distributing the bandwidth -+ as desired, independently of the disk parameters and with -+ any workload. It also tries to guarantee low latency to -+ interactive and soft real-time applications. -+ - config DEFAULT_NOOP - bool "No-op" - -@@ -61,6 +92,7 @@ config DEFAULT_IOSCHED - string - default "deadline" if DEFAULT_DEADLINE - default "cfq" if DEFAULT_CFQ -+ default "bfq" if DEFAULT_BFQ - default "noop" if DEFAULT_NOOP - - endmenu -diff --git a/block/Makefile b/block/Makefile -index 20645e8..cbd83fb 100644 ---- a/block/Makefile -+++ b/block/Makefile -@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o - obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o - obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o - obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o -+obj-$(CONFIG_IOSCHED_BFQ) += bfq-iosched.o - - obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o - obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o -diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h -index 768fe44..cdd2528 100644 ---- a/include/linux/cgroup_subsys.h -+++ b/include/linux/cgroup_subsys.h -@@ -39,6 +39,10 @@ SUBSYS(net_cls) - SUBSYS(blkio) - #endif - -+#if IS_ENABLED(CONFIG_CGROUP_BFQIO) -+SUBSYS(bfqio) -+#endif -+ - #if IS_ENABLED(CONFIG_CGROUP_PERF) - SUBSYS(perf_event) - #endif --- -1.9.3 - diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0002-block-introduce-the-BFQ-v7r5-I-O-sched-for-3.15.patch b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0002-block-introduce-the-BFQ-v7r5-I-O-sched-for-3.15.patch deleted file mode 100644 index e5f04e34f..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0002-block-introduce-the-BFQ-v7r5-I-O-sched-for-3.15.patch +++ /dev/null @@ -1,6636 +0,0 @@ -# Calculate format=diff os_linux_system==desktop -From 2d7017409acf7cba1065409819581239379098be Mon Sep 17 00:00:00 2001 -From: Paolo Valente -Date: Thu, 9 May 2013 19:10:02 +0200 -Subject: [PATCH 2/3] block: introduce the BFQ-v7r5 I/O sched for 3.15 - -Add the BFQ-v7r5 I/O scheduler to 3.15. -The general structure is borrowed from CFQ, as much of the code for -handling I/O contexts. Over time, several useful features have been -ported from CFQ as well (details in the changelog in README.BFQ). A -(bfq_)queue is associated to each task doing I/O on a device, and each -time a scheduling decision has to be made a queue is selected and served -until it expires. - - - Slices are given in the service domain: tasks are assigned - budgets, measured in number of sectors. Once got the disk, a task - must however consume its assigned budget within a configurable - maximum time (by default, the maximum possible value of the - budgets is automatically computed to comply with this timeout). - This allows the desired latency vs "throughput boosting" tradeoff - to be set. - - - Budgets are scheduled according to a variant of WF2Q+, implemented - using an augmented rb-tree to take eligibility into account while - preserving an O(log N) overall complexity. - - - A low-latency tunable is provided; if enabled, both interactive - and soft real-time applications are guaranteed a very low latency. - - - Latency guarantees are preserved also in the presence of NCQ. - - - Also with flash-based devices, a high throughput is achieved - while still preserving latency guarantees. - - - BFQ features Early Queue Merge (EQM), a sort of fusion of the - cooperating-queue-merging and the preemption mechanisms present - in CFQ. EQM is in fact a unified mechanism that tries to get a - sequential read pattern, and hence a high throughput, with any - set of processes performing interleaved I/O over a contiguous - sequence of sectors. - - - BFQ supports full hierarchical scheduling, exporting a cgroups - interface. Since each node has a full scheduler, each group can - be assigned its own weight. - - - If the cgroups interface is not used, only I/O priorities can be - assigned to processes, with ioprio values mapped to weights - with the relation weight = IOPRIO_BE_NR - ioprio. - - - ioprio classes are served in strict priority order, i.e., lower - priority queues are not served as long as there are higher - priority queues. Among queues in the same class the bandwidth is - distributed in proportion to the weight of each queue. A very - thin extra bandwidth is however guaranteed to the Idle class, to - prevent it from starving. - -Signed-off-by: Paolo Valente -Signed-off-by: Arianna Avanzini ---- - block/bfq-cgroup.c | 930 +++++++++++++ - block/bfq-ioc.c | 36 + - block/bfq-iosched.c | 3617 +++++++++++++++++++++++++++++++++++++++++++++++++++ - block/bfq-sched.c | 1207 +++++++++++++++++ - block/bfq.h | 742 +++++++++++ - 5 files changed, 6532 insertions(+) - create mode 100644 block/bfq-cgroup.c - create mode 100644 block/bfq-ioc.c - create mode 100644 block/bfq-iosched.c - create mode 100644 block/bfq-sched.c - create mode 100644 block/bfq.h - -diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c -new file mode 100644 -index 0000000..f742806 ---- /dev/null -+++ block/bfq-cgroup.c -@@ -0,0 +1,930 @@ -+/* -+ * BFQ: CGROUPS support. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ * -+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ -+ * file. -+ */ -+ -+#ifdef CONFIG_CGROUP_BFQIO -+ -+static DEFINE_MUTEX(bfqio_mutex); -+ -+static bool bfqio_is_removed(struct bfqio_cgroup *bgrp) -+{ -+ return bgrp ? !bgrp->online : false; -+} -+ -+static struct bfqio_cgroup bfqio_root_cgroup = { -+ .weight = BFQ_DEFAULT_GRP_WEIGHT, -+ .ioprio = BFQ_DEFAULT_GRP_IOPRIO, -+ .ioprio_class = BFQ_DEFAULT_GRP_CLASS, -+}; -+ -+static inline void bfq_init_entity(struct bfq_entity *entity, -+ struct bfq_group *bfqg) -+{ -+ entity->weight = entity->new_weight; -+ entity->orig_weight = entity->new_weight; -+ entity->ioprio = entity->new_ioprio; -+ entity->ioprio_class = entity->new_ioprio_class; -+ entity->parent = bfqg->my_entity; -+ entity->sched_data = &bfqg->sched_data; -+} -+ -+static struct bfqio_cgroup *css_to_bfqio(struct cgroup_subsys_state *css) -+{ -+ return css ? container_of(css, struct bfqio_cgroup, css) : NULL; -+} -+ -+/* -+ * Search the bfq_group for bfqd into the hash table (by now only a list) -+ * of bgrp. Must be called under rcu_read_lock(). -+ */ -+static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, -+ struct bfq_data *bfqd) -+{ -+ struct bfq_group *bfqg; -+ void *key; -+ -+ hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) { -+ key = rcu_dereference(bfqg->bfqd); -+ if (key == bfqd) -+ return bfqg; -+ } -+ -+ return NULL; -+} -+ -+static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, -+ struct bfq_group *bfqg) -+{ -+ struct bfq_entity *entity = &bfqg->entity; -+ -+ /* -+ * If the weight of the entity has never been set via the sysfs -+ * interface, then bgrp->weight == 0. In this case we initialize -+ * the weight from the current ioprio value. Otherwise, the group -+ * weight, if set, has priority over the ioprio value. -+ */ -+ if (bgrp->weight == 0) { -+ entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); -+ entity->new_ioprio = bgrp->ioprio; -+ } else { -+ entity->new_weight = bgrp->weight; -+ entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); -+ } -+ entity->orig_weight = entity->weight = entity->new_weight; -+ entity->ioprio = entity->new_ioprio; -+ entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; -+ entity->my_sched_data = &bfqg->sched_data; -+ bfqg->active_entities = 0; -+} -+ -+static inline void bfq_group_set_parent(struct bfq_group *bfqg, -+ struct bfq_group *parent) -+{ -+ struct bfq_entity *entity; -+ -+ BUG_ON(parent == NULL); -+ BUG_ON(bfqg == NULL); -+ -+ entity = &bfqg->entity; -+ entity->parent = parent->my_entity; -+ entity->sched_data = &parent->sched_data; -+} -+ -+/** -+ * bfq_group_chain_alloc - allocate a chain of groups. -+ * @bfqd: queue descriptor. -+ * @css: the leaf cgroup_subsys_state this chain starts from. -+ * -+ * Allocate a chain of groups starting from the one belonging to -+ * @cgroup up to the root cgroup. Stop if a cgroup on the chain -+ * to the root has already an allocated group on @bfqd. -+ */ -+static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, -+ struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp; -+ struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; -+ -+ for (; css != NULL; css = css->parent) { -+ bgrp = css_to_bfqio(css); -+ -+ bfqg = bfqio_lookup_group(bgrp, bfqd); -+ if (bfqg != NULL) { -+ /* -+ * All the cgroups in the path from there to the -+ * root must have a bfq_group for bfqd, so we don't -+ * need any more allocations. -+ */ -+ break; -+ } -+ -+ bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); -+ if (bfqg == NULL) -+ goto cleanup; -+ -+ bfq_group_init_entity(bgrp, bfqg); -+ bfqg->my_entity = &bfqg->entity; -+ -+ if (leaf == NULL) { -+ leaf = bfqg; -+ prev = leaf; -+ } else { -+ bfq_group_set_parent(prev, bfqg); -+ /* -+ * Build a list of allocated nodes using the bfqd -+ * filed, that is still unused and will be -+ * initialized only after the node will be -+ * connected. -+ */ -+ prev->bfqd = bfqg; -+ prev = bfqg; -+ } -+ } -+ -+ return leaf; -+ -+cleanup: -+ while (leaf != NULL) { -+ prev = leaf; -+ leaf = leaf->bfqd; -+ kfree(prev); -+ } -+ -+ return NULL; -+} -+ -+/** -+ * bfq_group_chain_link - link an allocated group chain to a cgroup -+ * hierarchy. -+ * @bfqd: the queue descriptor. -+ * @css: the leaf cgroup_subsys_state to start from. -+ * @leaf: the leaf group (to be associated to @cgroup). -+ * -+ * Try to link a chain of groups to a cgroup hierarchy, connecting the -+ * nodes bottom-up, so we can be sure that when we find a cgroup in the -+ * hierarchy that already as a group associated to @bfqd all the nodes -+ * in the path to the root cgroup have one too. -+ * -+ * On locking: the queue lock protects the hierarchy (there is a hierarchy -+ * per device) while the bfqio_cgroup lock protects the list of groups -+ * belonging to the same cgroup. -+ */ -+static void bfq_group_chain_link(struct bfq_data *bfqd, -+ struct cgroup_subsys_state *css, -+ struct bfq_group *leaf) -+{ -+ struct bfqio_cgroup *bgrp; -+ struct bfq_group *bfqg, *next, *prev = NULL; -+ unsigned long flags; -+ -+ assert_spin_locked(bfqd->queue->queue_lock); -+ -+ for (; css != NULL && leaf != NULL; css = css->parent) { -+ bgrp = css_to_bfqio(css); -+ next = leaf->bfqd; -+ -+ bfqg = bfqio_lookup_group(bgrp, bfqd); -+ BUG_ON(bfqg != NULL); -+ -+ spin_lock_irqsave(&bgrp->lock, flags); -+ -+ rcu_assign_pointer(leaf->bfqd, bfqd); -+ hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); -+ hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); -+ -+ spin_unlock_irqrestore(&bgrp->lock, flags); -+ -+ prev = leaf; -+ leaf = next; -+ } -+ -+ BUG_ON(css == NULL && leaf != NULL); -+ if (css != NULL && prev != NULL) { -+ bgrp = css_to_bfqio(css); -+ bfqg = bfqio_lookup_group(bgrp, bfqd); -+ bfq_group_set_parent(prev, bfqg); -+ } -+} -+ -+/** -+ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. -+ * @bfqd: queue descriptor. -+ * @cgroup: cgroup being searched for. -+ * -+ * Return a group associated to @bfqd in @cgroup, allocating one if -+ * necessary. When a group is returned all the cgroups in the path -+ * to the root have a group associated to @bfqd. -+ * -+ * If the allocation fails, return the root group: this breaks guarantees -+ * but is a safe fallback. If this loss becomes a problem it can be -+ * mitigated using the equivalent weight (given by the product of the -+ * weights of the groups in the path from @group to the root) in the -+ * root scheduler. -+ * -+ * We allocate all the missing nodes in the path from the leaf cgroup -+ * to the root and we connect the nodes only after all the allocations -+ * have been successful. -+ */ -+static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, -+ struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); -+ struct bfq_group *bfqg; -+ -+ bfqg = bfqio_lookup_group(bgrp, bfqd); -+ if (bfqg != NULL) -+ return bfqg; -+ -+ bfqg = bfq_group_chain_alloc(bfqd, css); -+ if (bfqg != NULL) -+ bfq_group_chain_link(bfqd, css, bfqg); -+ else -+ bfqg = bfqd->root_group; -+ -+ return bfqg; -+} -+ -+/** -+ * bfq_bfqq_move - migrate @bfqq to @bfqg. -+ * @bfqd: queue descriptor. -+ * @bfqq: the queue to move. -+ * @entity: @bfqq's entity. -+ * @bfqg: the group to move to. -+ * -+ * Move @bfqq to @bfqg, deactivating it from its old group and reactivating -+ * it on the new one. Avoid putting the entity on the old group idle tree. -+ * -+ * Must be called under the queue lock; the cgroup owning @bfqg must -+ * not disappear (by now this just means that we are called under -+ * rcu_read_lock()). -+ */ -+static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct bfq_entity *entity, struct bfq_group *bfqg) -+{ -+ int busy, resume; -+ -+ busy = bfq_bfqq_busy(bfqq); -+ resume = !RB_EMPTY_ROOT(&bfqq->sort_list); -+ -+ BUG_ON(resume && !entity->on_st); -+ BUG_ON(busy && !resume && entity->on_st && -+ bfqq != bfqd->in_service_queue); -+ -+ if (busy) { -+ BUG_ON(atomic_read(&bfqq->ref) < 2); -+ -+ if (!resume) -+ bfq_del_bfqq_busy(bfqd, bfqq, 0); -+ else -+ bfq_deactivate_bfqq(bfqd, bfqq, 0); -+ } else if (entity->on_st) -+ bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); -+ -+ /* -+ * Here we use a reference to bfqg. We don't need a refcounter -+ * as the cgroup reference will not be dropped, so that its -+ * destroy() callback will not be invoked. -+ */ -+ entity->parent = bfqg->my_entity; -+ entity->sched_data = &bfqg->sched_data; -+ -+ if (busy && resume) -+ bfq_activate_bfqq(bfqd, bfqq); -+ -+ if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+} -+ -+/** -+ * __bfq_bic_change_cgroup - move @bic to @cgroup. -+ * @bfqd: the queue descriptor. -+ * @bic: the bic to move. -+ * @cgroup: the cgroup to move to. -+ * -+ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller -+ * has to make sure that the reference to cgroup is valid across the call. -+ * -+ * NOTE: an alternative approach might have been to store the current -+ * cgroup in bfqq and getting a reference to it, reducing the lookup -+ * time here, at the price of slightly more complex code. -+ */ -+static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, -+ struct bfq_io_cq *bic, -+ struct cgroup_subsys_state *css) -+{ -+ struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); -+ struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); -+ struct bfq_entity *entity; -+ struct bfq_group *bfqg; -+ struct bfqio_cgroup *bgrp; -+ -+ bgrp = css_to_bfqio(css); -+ -+ bfqg = bfq_find_alloc_group(bfqd, css); -+ if (async_bfqq != NULL) { -+ entity = &async_bfqq->entity; -+ -+ if (entity->sched_data != &bfqg->sched_data) { -+ bic_set_bfqq(bic, NULL, 0); -+ bfq_log_bfqq(bfqd, async_bfqq, -+ "bic_change_group: %p %d", -+ async_bfqq, atomic_read(&async_bfqq->ref)); -+ bfq_put_queue(async_bfqq); -+ } -+ } -+ -+ if (sync_bfqq != NULL) { -+ entity = &sync_bfqq->entity; -+ if (entity->sched_data != &bfqg->sched_data) -+ bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); -+ } -+ -+ return bfqg; -+} -+ -+/** -+ * bfq_bic_change_cgroup - move @bic to @cgroup. -+ * @bic: the bic being migrated. -+ * @cgroup: the destination cgroup. -+ * -+ * When the task owning @bic is moved to @cgroup, @bic is immediately -+ * moved into its new parent group. -+ */ -+static void bfq_bic_change_cgroup(struct bfq_io_cq *bic, -+ struct cgroup_subsys_state *css) -+{ -+ struct bfq_data *bfqd; -+ unsigned long uninitialized_var(flags); -+ -+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), -+ &flags); -+ if (bfqd != NULL) { -+ __bfq_bic_change_cgroup(bfqd, bic, css); -+ bfq_put_bfqd_unlock(bfqd, &flags); -+ } -+} -+ -+/** -+ * bfq_bic_update_cgroup - update the cgroup of @bic. -+ * @bic: the @bic to update. -+ * -+ * Make sure that @bic is enqueued in the cgroup of the current task. -+ * We need this in addition to moving bics during the cgroup attach -+ * phase because the task owning @bic could be at its first disk -+ * access or we may end up in the root cgroup as the result of a -+ * memory allocation failure and here we try to move to the right -+ * group. -+ * -+ * Must be called under the queue lock. It is safe to use the returned -+ * value even after the rcu_read_unlock() as the migration/destruction -+ * paths act under the queue lock too. IOW it is impossible to race with -+ * group migration/destruction and end up with an invalid group as: -+ * a) here cgroup has not yet been destroyed, nor its destroy callback -+ * has started execution, as current holds a reference to it, -+ * b) if it is destroyed after rcu_read_unlock() [after current is -+ * migrated to a different cgroup] its attach() callback will have -+ * taken care of remove all the references to the old cgroup data. -+ */ -+static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic) -+{ -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ struct bfq_group *bfqg; -+ struct cgroup_subsys_state *css; -+ -+ BUG_ON(bfqd == NULL); -+ -+ rcu_read_lock(); -+ css = task_css(current, bfqio_cgrp_id); -+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, css); -+ rcu_read_unlock(); -+ -+ return bfqg; -+} -+ -+/** -+ * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. -+ * @st: the service tree being flushed. -+ */ -+static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *entity = st->first_idle; -+ -+ for (; entity != NULL; entity = st->first_idle) -+ __bfq_deactivate_entity(entity, 0); -+} -+ -+/** -+ * bfq_reparent_leaf_entity - move leaf entity to the root_group. -+ * @bfqd: the device data structure with the root group. -+ * @entity: the entity to move. -+ */ -+static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ BUG_ON(bfqq == NULL); -+ bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); -+ return; -+} -+ -+/** -+ * bfq_reparent_active_entities - move to the root group all active -+ * entities. -+ * @bfqd: the device data structure with the root group. -+ * @bfqg: the group to move from. -+ * @st: the service tree with the entities. -+ * -+ * Needs queue_lock to be taken and reference to be valid over the call. -+ */ -+static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, -+ struct bfq_service_tree *st) -+{ -+ struct rb_root *active = &st->active; -+ struct bfq_entity *entity = NULL; -+ -+ if (!RB_EMPTY_ROOT(&st->active)) -+ entity = bfq_entity_of(rb_first(active)); -+ -+ for (; entity != NULL; entity = bfq_entity_of(rb_first(active))) -+ bfq_reparent_leaf_entity(bfqd, entity); -+ -+ if (bfqg->sched_data.in_service_entity != NULL) -+ bfq_reparent_leaf_entity(bfqd, -+ bfqg->sched_data.in_service_entity); -+ -+ return; -+} -+ -+/** -+ * bfq_destroy_group - destroy @bfqg. -+ * @bgrp: the bfqio_cgroup containing @bfqg. -+ * @bfqg: the group being destroyed. -+ * -+ * Destroy @bfqg, making sure that it is not referenced from its parent. -+ */ -+static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) -+{ -+ struct bfq_data *bfqd; -+ struct bfq_service_tree *st; -+ struct bfq_entity *entity = bfqg->my_entity; -+ unsigned long uninitialized_var(flags); -+ int i; -+ -+ hlist_del(&bfqg->group_node); -+ -+ /* -+ * Empty all service_trees belonging to this group before -+ * deactivating the group itself. -+ */ -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { -+ st = bfqg->sched_data.service_tree + i; -+ -+ /* -+ * The idle tree may still contain bfq_queues belonging -+ * to exited task because they never migrated to a different -+ * cgroup from the one being destroyed now. No one else -+ * can access them so it's safe to act without any lock. -+ */ -+ bfq_flush_idle_tree(st); -+ -+ /* -+ * It may happen that some queues are still active -+ * (busy) upon group destruction (if the corresponding -+ * processes have been forced to terminate). We move -+ * all the leaf entities corresponding to these queues -+ * to the root_group. -+ * Also, it may happen that the group has an entity -+ * in service, which is disconnected from the active -+ * tree: it must be moved, too. -+ * There is no need to put the sync queues, as the -+ * scheduler has taken no reference. -+ */ -+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); -+ if (bfqd != NULL) { -+ bfq_reparent_active_entities(bfqd, bfqg, st); -+ bfq_put_bfqd_unlock(bfqd, &flags); -+ } -+ BUG_ON(!RB_EMPTY_ROOT(&st->active)); -+ BUG_ON(!RB_EMPTY_ROOT(&st->idle)); -+ } -+ BUG_ON(bfqg->sched_data.next_in_service != NULL); -+ BUG_ON(bfqg->sched_data.in_service_entity != NULL); -+ -+ /* -+ * We may race with device destruction, take extra care when -+ * dereferencing bfqg->bfqd. -+ */ -+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); -+ if (bfqd != NULL) { -+ hlist_del(&bfqg->bfqd_node); -+ __bfq_deactivate_entity(entity, 0); -+ bfq_put_async_queues(bfqd, bfqg); -+ bfq_put_bfqd_unlock(bfqd, &flags); -+ } -+ BUG_ON(entity->tree != NULL); -+ -+ /* -+ * No need to defer the kfree() to the end of the RCU grace -+ * period: we are called from the destroy() callback of our -+ * cgroup, so we can be sure that no one is a) still using -+ * this cgroup or b) doing lookups in it. -+ */ -+ kfree(bfqg); -+} -+ -+static void bfq_end_wr_async(struct bfq_data *bfqd) -+{ -+ struct hlist_node *tmp; -+ struct bfq_group *bfqg; -+ -+ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) -+ bfq_end_wr_async_queues(bfqd, bfqg); -+ bfq_end_wr_async_queues(bfqd, bfqd->root_group); -+} -+ -+/** -+ * bfq_disconnect_groups - disconnect @bfqd from all its groups. -+ * @bfqd: the device descriptor being exited. -+ * -+ * When the device exits we just make sure that no lookup can return -+ * the now unused group structures. They will be deallocated on cgroup -+ * destruction. -+ */ -+static void bfq_disconnect_groups(struct bfq_data *bfqd) -+{ -+ struct hlist_node *tmp; -+ struct bfq_group *bfqg; -+ -+ bfq_log(bfqd, "disconnect_groups beginning"); -+ hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) { -+ hlist_del(&bfqg->bfqd_node); -+ -+ __bfq_deactivate_entity(bfqg->my_entity, 0); -+ -+ /* -+ * Don't remove from the group hash, just set an -+ * invalid key. No lookups can race with the -+ * assignment as bfqd is being destroyed; this -+ * implies also that new elements cannot be added -+ * to the list. -+ */ -+ rcu_assign_pointer(bfqg->bfqd, NULL); -+ -+ bfq_log(bfqd, "disconnect_groups: put async for group %p", -+ bfqg); -+ bfq_put_async_queues(bfqd, bfqg); -+ } -+} -+ -+static inline void bfq_free_root_group(struct bfq_data *bfqd) -+{ -+ struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; -+ struct bfq_group *bfqg = bfqd->root_group; -+ -+ bfq_put_async_queues(bfqd, bfqg); -+ -+ spin_lock_irq(&bgrp->lock); -+ hlist_del_rcu(&bfqg->group_node); -+ spin_unlock_irq(&bgrp->lock); -+ -+ /* -+ * No need to synchronize_rcu() here: since the device is gone -+ * there cannot be any read-side access to its root_group. -+ */ -+ kfree(bfqg); -+} -+ -+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) -+{ -+ struct bfq_group *bfqg; -+ struct bfqio_cgroup *bgrp; -+ int i; -+ -+ bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node); -+ if (bfqg == NULL) -+ return NULL; -+ -+ bfqg->entity.parent = NULL; -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) -+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; -+ -+ bgrp = &bfqio_root_cgroup; -+ spin_lock_irq(&bgrp->lock); -+ rcu_assign_pointer(bfqg->bfqd, bfqd); -+ hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); -+ spin_unlock_irq(&bgrp->lock); -+ -+ return bfqg; -+} -+ -+#define SHOW_FUNCTION(__VAR) \ -+static u64 bfqio_cgroup_##__VAR##_read(struct cgroup_subsys_state *css, \ -+ struct cftype *cftype) \ -+{ \ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \ -+ u64 ret = -ENODEV; \ -+ \ -+ mutex_lock(&bfqio_mutex); \ -+ if (bfqio_is_removed(bgrp)) \ -+ goto out_unlock; \ -+ \ -+ spin_lock_irq(&bgrp->lock); \ -+ ret = bgrp->__VAR; \ -+ spin_unlock_irq(&bgrp->lock); \ -+ \ -+out_unlock: \ -+ mutex_unlock(&bfqio_mutex); \ -+ return ret; \ -+} -+ -+SHOW_FUNCTION(weight); -+SHOW_FUNCTION(ioprio); -+SHOW_FUNCTION(ioprio_class); -+#undef SHOW_FUNCTION -+ -+#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ -+static int bfqio_cgroup_##__VAR##_write(struct cgroup_subsys_state *css,\ -+ struct cftype *cftype, \ -+ u64 val) \ -+{ \ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \ -+ struct bfq_group *bfqg; \ -+ int ret = -EINVAL; \ -+ \ -+ if (val < (__MIN) || val > (__MAX)) \ -+ return ret; \ -+ \ -+ ret = -ENODEV; \ -+ mutex_lock(&bfqio_mutex); \ -+ if (bfqio_is_removed(bgrp)) \ -+ goto out_unlock; \ -+ ret = 0; \ -+ \ -+ spin_lock_irq(&bgrp->lock); \ -+ bgrp->__VAR = (unsigned short)val; \ -+ hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) { \ -+ /* \ -+ * Setting the ioprio_changed flag of the entity \ -+ * to 1 with new_##__VAR == ##__VAR would re-set \ -+ * the value of the weight to its ioprio mapping. \ -+ * Set the flag only if necessary. \ -+ */ \ -+ if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ -+ bfqg->entity.new_##__VAR = (unsigned short)val; \ -+ /* \ -+ * Make sure that the above new value has been \ -+ * stored in bfqg->entity.new_##__VAR before \ -+ * setting the ioprio_changed flag. In fact, \ -+ * this flag may be read asynchronously (in \ -+ * critical sections protected by a different \ -+ * lock than that held here), and finding this \ -+ * flag set may cause the execution of the code \ -+ * for updating parameters whose value may \ -+ * depend also on bfqg->entity.new_##__VAR (in \ -+ * __bfq_entity_update_weight_prio). \ -+ * This barrier makes sure that the new value \ -+ * of bfqg->entity.new_##__VAR is correctly \ -+ * seen in that code. \ -+ */ \ -+ smp_wmb(); \ -+ bfqg->entity.ioprio_changed = 1; \ -+ } \ -+ } \ -+ spin_unlock_irq(&bgrp->lock); \ -+ \ -+out_unlock: \ -+ mutex_unlock(&bfqio_mutex); \ -+ return ret; \ -+} -+ -+STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); -+STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); -+STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); -+#undef STORE_FUNCTION -+ -+static struct cftype bfqio_files[] = { -+ { -+ .name = "weight", -+ .read_u64 = bfqio_cgroup_weight_read, -+ .write_u64 = bfqio_cgroup_weight_write, -+ }, -+ { -+ .name = "ioprio", -+ .read_u64 = bfqio_cgroup_ioprio_read, -+ .write_u64 = bfqio_cgroup_ioprio_write, -+ }, -+ { -+ .name = "ioprio_class", -+ .read_u64 = bfqio_cgroup_ioprio_class_read, -+ .write_u64 = bfqio_cgroup_ioprio_class_write, -+ }, -+ { }, /* terminate */ -+}; -+ -+static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys_state -+ *parent_css) -+{ -+ struct bfqio_cgroup *bgrp; -+ -+ if (parent_css != NULL) { -+ bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); -+ if (bgrp == NULL) -+ return ERR_PTR(-ENOMEM); -+ } else -+ bgrp = &bfqio_root_cgroup; -+ -+ spin_lock_init(&bgrp->lock); -+ INIT_HLIST_HEAD(&bgrp->group_data); -+ bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; -+ bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; -+ -+ return &bgrp->css; -+} -+ -+/* -+ * We cannot support shared io contexts, as we have no means to support -+ * two tasks with the same ioc in two different groups without major rework -+ * of the main bic/bfqq data structures. By now we allow a task to change -+ * its cgroup only if it's the only owner of its ioc; the drawback of this -+ * behavior is that a group containing a task that forked using CLONE_IO -+ * will not be destroyed until the tasks sharing the ioc die. -+ */ -+static int bfqio_can_attach(struct cgroup_subsys_state *css, -+ struct cgroup_taskset *tset) -+{ -+ struct task_struct *task; -+ struct io_context *ioc; -+ int ret = 0; -+ -+ cgroup_taskset_for_each(task, tset) { -+ /* -+ * task_lock() is needed to avoid races with -+ * exit_io_context() -+ */ -+ task_lock(task); -+ ioc = task->io_context; -+ if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) -+ /* -+ * ioc == NULL means that the task is either too -+ * young or exiting: if it has still no ioc the -+ * ioc can't be shared, if the task is exiting the -+ * attach will fail anyway, no matter what we -+ * return here. -+ */ -+ ret = -EINVAL; -+ task_unlock(task); -+ if (ret) -+ break; -+ } -+ -+ return ret; -+} -+ -+static void bfqio_attach(struct cgroup_subsys_state *css, -+ struct cgroup_taskset *tset) -+{ -+ struct task_struct *task; -+ struct io_context *ioc; -+ struct io_cq *icq; -+ -+ /* -+ * IMPORTANT NOTE: The move of more than one process at a time to a -+ * new group has not yet been tested. -+ */ -+ cgroup_taskset_for_each(task, tset) { -+ ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); -+ if (ioc) { -+ /* -+ * Handle cgroup change here. -+ */ -+ rcu_read_lock(); -+ hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node) -+ if (!strncmp( -+ icq->q->elevator->type->elevator_name, -+ "bfq", ELV_NAME_MAX)) -+ bfq_bic_change_cgroup(icq_to_bic(icq), -+ css); -+ rcu_read_unlock(); -+ put_io_context(ioc); -+ } -+ } -+} -+ -+static void bfqio_destroy(struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); -+ struct hlist_node *tmp; -+ struct bfq_group *bfqg; -+ -+ /* -+ * Since we are destroying the cgroup, there are no more tasks -+ * referencing it, and all the RCU grace periods that may have -+ * referenced it are ended (as the destruction of the parent -+ * cgroup is RCU-safe); bgrp->group_data will not be accessed by -+ * anything else and we don't need any synchronization. -+ */ -+ hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node) -+ bfq_destroy_group(bgrp, bfqg); -+ -+ BUG_ON(!hlist_empty(&bgrp->group_data)); -+ -+ kfree(bgrp); -+} -+ -+static int bfqio_css_online(struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); -+ -+ mutex_lock(&bfqio_mutex); -+ bgrp->online = true; -+ mutex_unlock(&bfqio_mutex); -+ -+ return 0; -+} -+ -+static void bfqio_css_offline(struct cgroup_subsys_state *css) -+{ -+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); -+ -+ mutex_lock(&bfqio_mutex); -+ bgrp->online = false; -+ mutex_unlock(&bfqio_mutex); -+} -+ -+struct cgroup_subsys bfqio_cgrp_subsys = { -+ .css_alloc = bfqio_create, -+ .css_online = bfqio_css_online, -+ .css_offline = bfqio_css_offline, -+ .can_attach = bfqio_can_attach, -+ .attach = bfqio_attach, -+ .css_free = bfqio_destroy, -+ .base_cftypes = bfqio_files, -+}; -+#else -+static inline void bfq_init_entity(struct bfq_entity *entity, -+ struct bfq_group *bfqg) -+{ -+ entity->weight = entity->new_weight; -+ entity->orig_weight = entity->new_weight; -+ entity->ioprio = entity->new_ioprio; -+ entity->ioprio_class = entity->new_ioprio_class; -+ entity->sched_data = &bfqg->sched_data; -+} -+ -+static inline struct bfq_group * -+bfq_bic_update_cgroup(struct bfq_io_cq *bic) -+{ -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ return bfqd->root_group; -+} -+ -+static inline void bfq_bfqq_move(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct bfq_entity *entity, -+ struct bfq_group *bfqg) -+{ -+} -+ -+static void bfq_end_wr_async(struct bfq_data *bfqd) -+{ -+ bfq_end_wr_async_queues(bfqd, bfqd->root_group); -+} -+ -+static inline void bfq_disconnect_groups(struct bfq_data *bfqd) -+{ -+ bfq_put_async_queues(bfqd, bfqd->root_group); -+} -+ -+static inline void bfq_free_root_group(struct bfq_data *bfqd) -+{ -+ kfree(bfqd->root_group); -+} -+ -+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) -+{ -+ struct bfq_group *bfqg; -+ int i; -+ -+ bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); -+ if (bfqg == NULL) -+ return NULL; -+ -+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) -+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; -+ -+ return bfqg; -+} -+#endif -diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c -new file mode 100644 -index 0000000..7f6b000 ---- /dev/null -+++ block/bfq-ioc.c -@@ -0,0 +1,36 @@ -+/* -+ * BFQ: I/O context handling. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ */ -+ -+/** -+ * icq_to_bic - convert iocontext queue structure to bfq_io_cq. -+ * @icq: the iocontext queue. -+ */ -+static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq) -+{ -+ /* bic->icq is the first member, %NULL will convert to %NULL */ -+ return container_of(icq, struct bfq_io_cq, icq); -+} -+ -+/** -+ * bfq_bic_lookup - search into @ioc a bic associated to @bfqd. -+ * @bfqd: the lookup key. -+ * @ioc: the io_context of the process doing I/O. -+ * -+ * Queue lock must be held. -+ */ -+static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd, -+ struct io_context *ioc) -+{ -+ if (ioc) -+ return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue)); -+ return NULL; -+} -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -new file mode 100644 -index 0000000..6cf7bca ---- /dev/null -+++ block/bfq-iosched.c -@@ -0,0 +1,3617 @@ -+/* -+ * Budget Fair Queueing (BFQ) disk scheduler. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ * -+ * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ -+ * file. -+ * -+ * BFQ is a proportional-share storage-I/O scheduling algorithm based on -+ * the slice-by-slice service scheme of CFQ. But BFQ assigns budgets, -+ * measured in number of sectors, to processes instead of time slices. The -+ * device is not granted to the in-service process for a given time slice, -+ * but until it has exhausted its assigned budget. This change from the time -+ * to the service domain allows BFQ to distribute the device throughput -+ * among processes as desired, without any distortion due to ZBR, workload -+ * fluctuations or other factors. BFQ uses an ad hoc internal scheduler, -+ * called B-WF2Q+, to schedule processes according to their budgets. More -+ * precisely, BFQ schedules queues associated to processes. Thanks to the -+ * accurate policy of B-WF2Q+, BFQ can afford to assign high budgets to -+ * I/O-bound processes issuing sequential requests (to boost the -+ * throughput), and yet guarantee a low latency to interactive and soft -+ * real-time applications. -+ * -+ * BFQ is described in [1], where also a reference to the initial, more -+ * theoretical paper on BFQ can be found. The interested reader can find -+ * in the latter paper full details on the main algorithm, as well as -+ * formulas of the guarantees and formal proofs of all the properties. -+ * With respect to the version of BFQ presented in these papers, this -+ * implementation adds a few more heuristics, such as the one that -+ * guarantees a low latency to soft real-time applications, and a -+ * hierarchical extension based on H-WF2Q+. -+ * -+ * B-WF2Q+ is based on WF2Q+, that is described in [2], together with -+ * H-WF2Q+, while the augmented tree used to implement B-WF2Q+ with O(log N) -+ * complexity derives from the one introduced with EEVDF in [3]. -+ * -+ * [1] P. Valente and M. Andreolini, ``Improving Application Responsiveness -+ * with the BFQ Disk I/O Scheduler'', -+ * Proceedings of the 5th Annual International Systems and Storage -+ * Conference (SYSTOR '12), June 2012. -+ * -+ * http://algogroup.unimo.it/people/paolo/disk_sched/bf1-v1-suite-results.pdf -+ * -+ * [2] Jon C.R. Bennett and H. Zhang, ``Hierarchical Packet Fair Queueing -+ * Algorithms,'' IEEE/ACM Transactions on Networking, 5(5):675-689, -+ * Oct 1997. -+ * -+ * http://www.cs.cmu.edu/~hzhang/papers/TON-97-Oct.ps.gz -+ * -+ * [3] I. Stoica and H. Abdel-Wahab, ``Earliest Eligible Virtual Deadline -+ * First: A Flexible and Accurate Mechanism for Proportional Share -+ * Resource Allocation,'' technical report. -+ * -+ * http://www.cs.berkeley.edu/~istoica/papers/eevdf-tr-95.pdf -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "bfq.h" -+#include "blk.h" -+ -+/* Max number of dispatches in one round of service. */ -+static const int bfq_quantum = 4; -+ -+/* Expiration time of sync (0) and async (1) requests, in jiffies. */ -+static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; -+ -+/* Maximum backwards seek, in KiB. */ -+static const int bfq_back_max = 16 * 1024; -+ -+/* Penalty of a backwards seek, in number of sectors. */ -+static const int bfq_back_penalty = 2; -+ -+/* Idling period duration, in jiffies. */ -+static int bfq_slice_idle = HZ / 125; -+ -+/* Default maximum budget values, in sectors and number of requests. */ -+static const int bfq_default_max_budget = 16 * 1024; -+static const int bfq_max_budget_async_rq = 4; -+ -+/* -+ * Async to sync throughput distribution is controlled as follows: -+ * when an async request is served, the entity is charged the number -+ * of sectors of the request, multiplied by the factor below -+ */ -+static const int bfq_async_charge_factor = 10; -+ -+/* Default timeout values, in jiffies, approximating CFQ defaults. */ -+static const int bfq_timeout_sync = HZ / 8; -+static int bfq_timeout_async = HZ / 25; -+ -+struct kmem_cache *bfq_pool; -+ -+/* Below this threshold (in ms), we consider thinktime immediate. */ -+#define BFQ_MIN_TT 2 -+ -+/* hw_tag detection: parallel requests threshold and min samples needed. */ -+#define BFQ_HW_QUEUE_THRESHOLD 4 -+#define BFQ_HW_QUEUE_SAMPLES 32 -+ -+#define BFQQ_SEEK_THR (sector_t)(8 * 1024) -+#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) -+ -+/* Min samples used for peak rate estimation (for autotuning). */ -+#define BFQ_PEAK_RATE_SAMPLES 32 -+ -+/* Shift used for peak rate fixed precision calculations. */ -+#define BFQ_RATE_SHIFT 16 -+ -+/* -+ * By default, BFQ computes the duration of the weight raising for -+ * interactive applications automatically, using the following formula: -+ * duration = (R / r) * T, where r is the peak rate of the device, and -+ * R and T are two reference parameters. -+ * In particular, R is the peak rate of the reference device (see below), -+ * and T is a reference time: given the systems that are likely to be -+ * installed on the reference device according to its speed class, T is -+ * about the maximum time needed, under BFQ and while reading two files in -+ * parallel, to load typical large applications on these systems. -+ * In practice, the slower/faster the device at hand is, the more/less it -+ * takes to load applications with respect to the reference device. -+ * Accordingly, the longer/shorter BFQ grants weight raising to interactive -+ * applications. -+ * -+ * BFQ uses four different reference pairs (R, T), depending on: -+ * . whether the device is rotational or non-rotational; -+ * . whether the device is slow, such as old or portable HDDs, as well as -+ * SD cards, or fast, such as newer HDDs and SSDs. -+ * -+ * The device's speed class is dynamically (re)detected in -+ * bfq_update_peak_rate() every time the estimated peak rate is updated. -+ * -+ * In the following definitions, R_slow[0]/R_fast[0] and T_slow[0]/T_fast[0] -+ * are the reference values for a slow/fast rotational device, whereas -+ * R_slow[1]/R_fast[1] and T_slow[1]/T_fast[1] are the reference values for -+ * a slow/fast non-rotational device. Finally, device_speed_thresh are the -+ * thresholds used to switch between speed classes. -+ * Both the reference peak rates and the thresholds are measured in -+ * sectors/usec, left-shifted by BFQ_RATE_SHIFT. -+ */ -+static int R_slow[2] = {1536, 10752}; -+static int R_fast[2] = {17415, 34791}; -+/* -+ * To improve readability, a conversion function is used to initialize the -+ * following arrays, which entails that they can be initialized only in a -+ * function. -+ */ -+static int T_slow[2]; -+static int T_fast[2]; -+static int device_speed_thresh[2]; -+ -+#define BFQ_SERVICE_TREE_INIT ((struct bfq_service_tree) \ -+ { RB_ROOT, RB_ROOT, NULL, NULL, 0, 0 }) -+ -+#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0]) -+#define RQ_BFQQ(rq) ((rq)->elv.priv[1]) -+ -+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd); -+ -+#include "bfq-ioc.c" -+#include "bfq-sched.c" -+#include "bfq-cgroup.c" -+ -+#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\ -+ IOPRIO_CLASS_IDLE) -+#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\ -+ IOPRIO_CLASS_RT) -+ -+#define bfq_sample_valid(samples) ((samples) > 80) -+ -+/* -+ * We regard a request as SYNC, if either it's a read or has the SYNC bit -+ * set (in which case it could also be a direct WRITE). -+ */ -+static inline int bfq_bio_sync(struct bio *bio) -+{ -+ if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) -+ return 1; -+ -+ return 0; -+} -+ -+/* -+ * Scheduler run of queue, if there are requests pending and no one in the -+ * driver that will restart queueing. -+ */ -+static inline void bfq_schedule_dispatch(struct bfq_data *bfqd) -+{ -+ if (bfqd->queued != 0) { -+ bfq_log(bfqd, "schedule dispatch"); -+ kblockd_schedule_work(bfqd->queue, &bfqd->unplug_work); -+ } -+} -+ -+/* -+ * Lifted from AS - choose which of rq1 and rq2 that is best served now. -+ * We choose the request that is closesr to the head right now. Distance -+ * behind the head is penalized and only allowed to a certain extent. -+ */ -+static struct request *bfq_choose_req(struct bfq_data *bfqd, -+ struct request *rq1, -+ struct request *rq2, -+ sector_t last) -+{ -+ sector_t s1, s2, d1 = 0, d2 = 0; -+ unsigned long back_max; -+#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ -+#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ -+ unsigned wrap = 0; /* bit mask: requests behind the disk head? */ -+ -+ if (rq1 == NULL || rq1 == rq2) -+ return rq2; -+ if (rq2 == NULL) -+ return rq1; -+ -+ if (rq_is_sync(rq1) && !rq_is_sync(rq2)) -+ return rq1; -+ else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) -+ return rq2; -+ if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) -+ return rq1; -+ else if ((rq2->cmd_flags & REQ_META) && !(rq1->cmd_flags & REQ_META)) -+ return rq2; -+ -+ s1 = blk_rq_pos(rq1); -+ s2 = blk_rq_pos(rq2); -+ -+ /* -+ * By definition, 1KiB is 2 sectors. -+ */ -+ back_max = bfqd->bfq_back_max * 2; -+ -+ /* -+ * Strict one way elevator _except_ in the case where we allow -+ * short backward seeks which are biased as twice the cost of a -+ * similar forward seek. -+ */ -+ if (s1 >= last) -+ d1 = s1 - last; -+ else if (s1 + back_max >= last) -+ d1 = (last - s1) * bfqd->bfq_back_penalty; -+ else -+ wrap |= BFQ_RQ1_WRAP; -+ -+ if (s2 >= last) -+ d2 = s2 - last; -+ else if (s2 + back_max >= last) -+ d2 = (last - s2) * bfqd->bfq_back_penalty; -+ else -+ wrap |= BFQ_RQ2_WRAP; -+ -+ /* Found required data */ -+ -+ /* -+ * By doing switch() on the bit mask "wrap" we avoid having to -+ * check two variables for all permutations: --> faster! -+ */ -+ switch (wrap) { -+ case 0: /* common case for CFQ: rq1 and rq2 not wrapped */ -+ if (d1 < d2) -+ return rq1; -+ else if (d2 < d1) -+ return rq2; -+ else { -+ if (s1 >= s2) -+ return rq1; -+ else -+ return rq2; -+ } -+ -+ case BFQ_RQ2_WRAP: -+ return rq1; -+ case BFQ_RQ1_WRAP: -+ return rq2; -+ case (BFQ_RQ1_WRAP|BFQ_RQ2_WRAP): /* both rqs wrapped */ -+ default: -+ /* -+ * Since both rqs are wrapped, -+ * start with the one that's further behind head -+ * (--> only *one* back seek required), -+ * since back seek takes more time than forward. -+ */ -+ if (s1 <= s2) -+ return rq1; -+ else -+ return rq2; -+ } -+} -+ -+static struct bfq_queue * -+bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root, -+ sector_t sector, struct rb_node **ret_parent, -+ struct rb_node ***rb_link) -+{ -+ struct rb_node **p, *parent; -+ struct bfq_queue *bfqq = NULL; -+ -+ parent = NULL; -+ p = &root->rb_node; -+ while (*p) { -+ struct rb_node **n; -+ -+ parent = *p; -+ bfqq = rb_entry(parent, struct bfq_queue, pos_node); -+ -+ /* -+ * Sort strictly based on sector. Smallest to the left, -+ * largest to the right. -+ */ -+ if (sector > blk_rq_pos(bfqq->next_rq)) -+ n = &(*p)->rb_right; -+ else if (sector < blk_rq_pos(bfqq->next_rq)) -+ n = &(*p)->rb_left; -+ else -+ break; -+ p = n; -+ bfqq = NULL; -+ } -+ -+ *ret_parent = parent; -+ if (rb_link) -+ *rb_link = p; -+ -+ bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", -+ (long long unsigned)sector, -+ bfqq != NULL ? bfqq->pid : 0); -+ -+ return bfqq; -+} -+ -+static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ struct rb_node **p, *parent; -+ struct bfq_queue *__bfqq; -+ -+ if (bfqq->pos_root != NULL) { -+ rb_erase(&bfqq->pos_node, bfqq->pos_root); -+ bfqq->pos_root = NULL; -+ } -+ -+ if (bfq_class_idle(bfqq)) -+ return; -+ if (!bfqq->next_rq) -+ return; -+ -+ bfqq->pos_root = &bfqd->rq_pos_tree; -+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root, -+ blk_rq_pos(bfqq->next_rq), &parent, &p); -+ if (__bfqq == NULL) { -+ rb_link_node(&bfqq->pos_node, parent, p); -+ rb_insert_color(&bfqq->pos_node, bfqq->pos_root); -+ } else -+ bfqq->pos_root = NULL; -+} -+ -+/* -+ * Tell whether there are active queues or groups with differentiated weights. -+ */ -+static inline bool bfq_differentiated_weights(struct bfq_data *bfqd) -+{ -+ BUG_ON(!bfqd->hw_tag); -+ /* -+ * For weights to differ, at least one of the trees must contain -+ * at least two nodes. -+ */ -+ return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) && -+ (bfqd->queue_weights_tree.rb_node->rb_left || -+ bfqd->queue_weights_tree.rb_node->rb_right) -+#ifdef CONFIG_CGROUP_BFQIO -+ ) || -+ (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) && -+ (bfqd->group_weights_tree.rb_node->rb_left || -+ bfqd->group_weights_tree.rb_node->rb_right) -+#endif -+ ); -+} -+ -+/* -+ * If the weight-counter tree passed as input contains no counter for -+ * the weight of the input entity, then add that counter; otherwise just -+ * increment the existing counter. -+ * -+ * Note that weight-counter trees contain few nodes in mostly symmetric -+ * scenarios. For example, if all queues have the same weight, then the -+ * weight-counter tree for the queues may contain at most one node. -+ * This holds even if low_latency is on, because weight-raised queues -+ * are not inserted in the tree. -+ * In most scenarios, the rate at which nodes are created/destroyed -+ * should be low too. -+ */ -+static void bfq_weights_tree_add(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root) -+{ -+ struct rb_node **new = &(root->rb_node), *parent = NULL; -+ -+ /* -+ * Do not insert if: -+ * - the device does not support queueing; -+ * - the entity is already associated with a counter, which happens if: -+ * 1) the entity is associated with a queue, 2) a request arrival -+ * has caused the queue to become both non-weight-raised, and hence -+ * change its weight, and backlogged; in this respect, each -+ * of the two events causes an invocation of this function, -+ * 3) this is the invocation of this function caused by the second -+ * event. This second invocation is actually useless, and we handle -+ * this fact by exiting immediately. More efficient or clearer -+ * solutions might possibly be adopted. -+ */ -+ if (!bfqd->hw_tag || entity->weight_counter) -+ return; -+ -+ while (*new) { -+ struct bfq_weight_counter *__counter = container_of(*new, -+ struct bfq_weight_counter, -+ weights_node); -+ parent = *new; -+ -+ if (entity->weight == __counter->weight) { -+ entity->weight_counter = __counter; -+ goto inc_counter; -+ } -+ if (entity->weight < __counter->weight) -+ new = &((*new)->rb_left); -+ else -+ new = &((*new)->rb_right); -+ } -+ -+ entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter), -+ GFP_ATOMIC); -+ entity->weight_counter->weight = entity->weight; -+ rb_link_node(&entity->weight_counter->weights_node, parent, new); -+ rb_insert_color(&entity->weight_counter->weights_node, root); -+ -+inc_counter: -+ entity->weight_counter->num_active++; -+} -+ -+/* -+ * Decrement the weight counter associated with the entity, and, if the -+ * counter reaches 0, remove the counter from the tree. -+ * See the comments to the function bfq_weights_tree_add() for considerations -+ * about overhead. -+ */ -+static void bfq_weights_tree_remove(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root) -+{ -+ /* -+ * Check whether the entity is actually associated with a counter. -+ * In fact, the device may not be considered NCQ-capable for a while, -+ * which implies that no insertion in the weight trees is performed, -+ * after which the device may start to be deemed NCQ-capable, and hence -+ * this function may start to be invoked. This may cause the function -+ * to be invoked for entities that are not associated with any counter. -+ */ -+ if (!entity->weight_counter) -+ return; -+ -+ BUG_ON(RB_EMPTY_ROOT(root)); -+ BUG_ON(entity->weight_counter->weight != entity->weight); -+ -+ BUG_ON(!entity->weight_counter->num_active); -+ entity->weight_counter->num_active--; -+ if (entity->weight_counter->num_active > 0) -+ goto reset_entity_pointer; -+ -+ rb_erase(&entity->weight_counter->weights_node, root); -+ kfree(entity->weight_counter); -+ -+reset_entity_pointer: -+ entity->weight_counter = NULL; -+} -+ -+static struct request *bfq_find_next_rq(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct request *last) -+{ -+ struct rb_node *rbnext = rb_next(&last->rb_node); -+ struct rb_node *rbprev = rb_prev(&last->rb_node); -+ struct request *next = NULL, *prev = NULL; -+ -+ BUG_ON(RB_EMPTY_NODE(&last->rb_node)); -+ -+ if (rbprev != NULL) -+ prev = rb_entry_rq(rbprev); -+ -+ if (rbnext != NULL) -+ next = rb_entry_rq(rbnext); -+ else { -+ rbnext = rb_first(&bfqq->sort_list); -+ if (rbnext && rbnext != &last->rb_node) -+ next = rb_entry_rq(rbnext); -+ } -+ -+ return bfq_choose_req(bfqd, next, prev, blk_rq_pos(last)); -+} -+ -+/* see the definition of bfq_async_charge_factor for details */ -+static inline unsigned long bfq_serv_to_charge(struct request *rq, -+ struct bfq_queue *bfqq) -+{ -+ return blk_rq_sectors(rq) * -+ (1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) * -+ bfq_async_charge_factor)); -+} -+ -+/** -+ * bfq_updated_next_req - update the queue after a new next_rq selection. -+ * @bfqd: the device data the queue belongs to. -+ * @bfqq: the queue to update. -+ * -+ * If the first request of a queue changes we make sure that the queue -+ * has enough budget to serve at least its first request (if the -+ * request has grown). We do this because if the queue has not enough -+ * budget for its first request, it has to go through two dispatch -+ * rounds to actually get it dispatched. -+ */ -+static void bfq_updated_next_req(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ struct request *next_rq = bfqq->next_rq; -+ unsigned long new_budget; -+ -+ if (next_rq == NULL) -+ return; -+ -+ if (bfqq == bfqd->in_service_queue) -+ /* -+ * In order not to break guarantees, budgets cannot be -+ * changed after an entity has been selected. -+ */ -+ return; -+ -+ BUG_ON(entity->tree != &st->active); -+ BUG_ON(entity == entity->sched_data->in_service_entity); -+ -+ new_budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(next_rq, bfqq)); -+ if (entity->budget != new_budget) { -+ entity->budget = new_budget; -+ bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu", -+ new_budget); -+ bfq_activate_bfqq(bfqd, bfqq); -+ } -+} -+ -+static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd) -+{ -+ u64 dur; -+ -+ if (bfqd->bfq_wr_max_time > 0) -+ return bfqd->bfq_wr_max_time; -+ -+ dur = bfqd->RT_prod; -+ do_div(dur, bfqd->peak_rate); -+ -+ return dur; -+} -+ -+static void bfq_add_request(struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_data *bfqd = bfqq->bfqd; -+ struct request *next_rq, *prev; -+ unsigned long old_wr_coeff = bfqq->wr_coeff; -+ int idle_for_long_time = 0; -+ -+ bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); -+ bfqq->queued[rq_is_sync(rq)]++; -+ bfqd->queued++; -+ -+ elv_rb_add(&bfqq->sort_list, rq); -+ -+ /* -+ * Check if this request is a better next-serve candidate. -+ */ -+ prev = bfqq->next_rq; -+ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); -+ BUG_ON(next_rq == NULL); -+ bfqq->next_rq = next_rq; -+ -+ /* -+ * Adjust priority tree position, if next_rq changes. -+ */ -+ if (prev != bfqq->next_rq) -+ bfq_rq_pos_tree_add(bfqd, bfqq); -+ -+ if (!bfq_bfqq_busy(bfqq)) { -+ int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && -+ time_is_before_jiffies(bfqq->soft_rt_next_start); -+ idle_for_long_time = time_is_before_jiffies( -+ bfqq->budget_timeout + -+ bfqd->bfq_wr_min_idle_time); -+ entity->budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(next_rq, bfqq)); -+ -+ if (!bfq_bfqq_IO_bound(bfqq)) { -+ if (time_before(jiffies, -+ RQ_BIC(rq)->ttime.last_end_request + -+ bfqd->bfq_slice_idle)) { -+ bfqq->requests_within_timer++; -+ if (bfqq->requests_within_timer >= -+ bfqd->bfq_requests_within_timer) -+ bfq_mark_bfqq_IO_bound(bfqq); -+ } else -+ bfqq->requests_within_timer = 0; -+ } -+ -+ if (!bfqd->low_latency) -+ goto add_bfqq_busy; -+ -+ /* -+ * If the queue is not being boosted and has been idle -+ * for enough time, start a weight-raising period -+ */ -+ if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt)) { -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ if (idle_for_long_time) -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ else -+ bfqq->wr_cur_max_time = -+ bfqd->bfq_wr_rt_max_time; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais starting at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } else if (old_wr_coeff > 1) { -+ if (idle_for_long_time) -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ else if (bfqq->wr_cur_max_time == -+ bfqd->bfq_wr_rt_max_time && -+ !soft_rt) { -+ bfqq->wr_coeff = 1; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais ending at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq-> -+ wr_cur_max_time)); -+ } else if (time_before( -+ bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time, -+ jiffies + -+ bfqd->bfq_wr_rt_max_time) && -+ soft_rt) { -+ /* -+ * -+ * The remaining weight-raising time is lower -+ * than bfqd->bfq_wr_rt_max_time, which -+ * means that the application is enjoying -+ * weight raising either because deemed soft- -+ * rt in the near past, or because deemed -+ * interactive a long ago. In both cases, -+ * resetting now the current remaining weight- -+ * raising time for the application to the -+ * weight-raising duration for soft rt -+ * applications would not cause any latency -+ * increase for the application (as the new -+ * duration would be higher than the remaining -+ * time). -+ * -+ * In addition, the application is now meeting -+ * the requirements for being deemed soft rt. -+ * In the end we can correctly and safely -+ * (re)charge the weight-raising duration for -+ * the application with the weight-raising -+ * duration for soft rt applications. -+ * -+ * In particular, doing this recharge now, i.e., -+ * before the weight-raising period for the -+ * application finishes, reduces the probability -+ * of the following negative scenario: -+ * 1) the weight of a soft rt application is -+ * raised at startup (as for any newly -+ * created application), -+ * 2) since the application is not interactive, -+ * at a certain time weight-raising is -+ * stopped for the application, -+ * 3) at that time the application happens to -+ * still have pending requests, and hence -+ * is destined to not have a chance to be -+ * deemed soft rt before these requests are -+ * completed (see the comments to the -+ * function bfq_bfqq_softrt_next_start() -+ * for details on soft rt detection), -+ * 4) these pending requests experience a high -+ * latency because the application is not -+ * weight-raised while they are pending. -+ */ -+ bfqq->last_wr_start_finish = jiffies; -+ bfqq->wr_cur_max_time = -+ bfqd->bfq_wr_rt_max_time; -+ } -+ } -+ if (old_wr_coeff != bfqq->wr_coeff) -+ entity->ioprio_changed = 1; -+add_bfqq_busy: -+ bfqq->last_idle_bklogged = jiffies; -+ bfqq->service_from_backlogged = 0; -+ bfq_clear_bfqq_softrt_update(bfqq); -+ bfq_add_bfqq_busy(bfqd, bfqq); -+ } else { -+ if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) && -+ time_is_before_jiffies( -+ bfqq->last_wr_start_finish + -+ bfqd->bfq_wr_min_inter_arr_async)) { -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff; -+ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -+ -+ bfqd->wr_busy_queues++; -+ entity->ioprio_changed = 1; -+ bfq_log_bfqq(bfqd, bfqq, -+ "non-idle wrais starting at %lu, rais_max_time %u", -+ jiffies, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ if (prev != bfqq->next_rq) -+ bfq_updated_next_req(bfqd, bfqq); -+ } -+ -+ if (bfqd->low_latency && -+ (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || -+ idle_for_long_time)) -+ bfqq->last_wr_start_finish = jiffies; -+} -+ -+static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, -+ struct bio *bio) -+{ -+ struct task_struct *tsk = current; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq; -+ -+ bic = bfq_bic_lookup(bfqd, tsk->io_context); -+ if (bic == NULL) -+ return NULL; -+ -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ if (bfqq != NULL) -+ return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio)); -+ -+ return NULL; -+} -+ -+static void bfq_activate_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ -+ bfqd->rq_in_driver++; -+ bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); -+ bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", -+ (long long unsigned)bfqd->last_position); -+} -+ -+static inline void bfq_deactivate_request(struct request_queue *q, -+ struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ -+ BUG_ON(bfqd->rq_in_driver == 0); -+ bfqd->rq_in_driver--; -+} -+ -+static void bfq_remove_request(struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_data *bfqd = bfqq->bfqd; -+ const int sync = rq_is_sync(rq); -+ -+ if (bfqq->next_rq == rq) { -+ bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); -+ bfq_updated_next_req(bfqd, bfqq); -+ } -+ -+ list_del_init(&rq->queuelist); -+ BUG_ON(bfqq->queued[sync] == 0); -+ bfqq->queued[sync]--; -+ bfqd->queued--; -+ elv_rb_del(&bfqq->sort_list, rq); -+ -+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { -+ if (bfq_bfqq_busy(bfqq) && bfqq != bfqd->in_service_queue) -+ bfq_del_bfqq_busy(bfqd, bfqq, 1); -+ /* -+ * Remove queue from request-position tree as it is empty. -+ */ -+ if (bfqq->pos_root != NULL) { -+ rb_erase(&bfqq->pos_node, bfqq->pos_root); -+ bfqq->pos_root = NULL; -+ } -+ } -+ -+ if (rq->cmd_flags & REQ_META) { -+ BUG_ON(bfqq->meta_pending == 0); -+ bfqq->meta_pending--; -+ } -+} -+ -+static int bfq_merge(struct request_queue *q, struct request **req, -+ struct bio *bio) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct request *__rq; -+ -+ __rq = bfq_find_rq_fmerge(bfqd, bio); -+ if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) { -+ *req = __rq; -+ return ELEVATOR_FRONT_MERGE; -+ } -+ -+ return ELEVATOR_NO_MERGE; -+} -+ -+static void bfq_merged_request(struct request_queue *q, struct request *req, -+ int type) -+{ -+ if (type == ELEVATOR_FRONT_MERGE && -+ rb_prev(&req->rb_node) && -+ blk_rq_pos(req) < -+ blk_rq_pos(container_of(rb_prev(&req->rb_node), -+ struct request, rb_node))) { -+ struct bfq_queue *bfqq = RQ_BFQQ(req); -+ struct bfq_data *bfqd = bfqq->bfqd; -+ struct request *prev, *next_rq; -+ -+ /* Reposition request in its sort_list */ -+ elv_rb_del(&bfqq->sort_list, req); -+ elv_rb_add(&bfqq->sort_list, req); -+ /* Choose next request to be served for bfqq */ -+ prev = bfqq->next_rq; -+ next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req, -+ bfqd->last_position); -+ BUG_ON(next_rq == NULL); -+ bfqq->next_rq = next_rq; -+ /* -+ * If next_rq changes, update both the queue's budget to -+ * fit the new request and the queue's position in its -+ * rq_pos_tree. -+ */ -+ if (prev != bfqq->next_rq) { -+ bfq_updated_next_req(bfqd, bfqq); -+ bfq_rq_pos_tree_add(bfqd, bfqq); -+ } -+ } -+} -+ -+static void bfq_merged_requests(struct request_queue *q, struct request *rq, -+ struct request *next) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ /* -+ * Reposition in fifo if next is older than rq. -+ */ -+ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && -+ time_before(next->fifo_time, rq->fifo_time)) { -+ list_move(&rq->queuelist, &next->queuelist); -+ rq->fifo_time = next->fifo_time; -+ } -+ -+ if (bfqq->next_rq == next) -+ bfqq->next_rq = rq; -+ -+ bfq_remove_request(next); -+} -+ -+/* Must be called with bfqq != NULL */ -+static inline void bfq_bfqq_end_wr(struct bfq_queue *bfqq) -+{ -+ BUG_ON(bfqq == NULL); -+ if (bfq_bfqq_busy(bfqq)) -+ bfqq->bfqd->wr_busy_queues--; -+ bfqq->wr_coeff = 1; -+ bfqq->wr_cur_max_time = 0; -+ /* Trigger a weight change on the next activation of the queue */ -+ bfqq->entity.ioprio_changed = 1; -+} -+ -+static void bfq_end_wr_async_queues(struct bfq_data *bfqd, -+ struct bfq_group *bfqg) -+{ -+ int i, j; -+ -+ for (i = 0; i < 2; i++) -+ for (j = 0; j < IOPRIO_BE_NR; j++) -+ if (bfqg->async_bfqq[i][j] != NULL) -+ bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]); -+ if (bfqg->async_idle_bfqq != NULL) -+ bfq_bfqq_end_wr(bfqg->async_idle_bfqq); -+} -+ -+static void bfq_end_wr(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq; -+ -+ spin_lock_irq(bfqd->queue->queue_lock); -+ -+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) -+ bfq_bfqq_end_wr(bfqq); -+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) -+ bfq_bfqq_end_wr(bfqq); -+ bfq_end_wr_async(bfqd); -+ -+ spin_unlock_irq(bfqd->queue->queue_lock); -+} -+ -+static int bfq_allow_merge(struct request_queue *q, struct request *rq, -+ struct bio *bio) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq; -+ -+ /* -+ * Disallow merge of a sync bio into an async request. -+ */ -+ if (bfq_bio_sync(bio) && !rq_is_sync(rq)) -+ return 0; -+ -+ /* -+ * Lookup the bfqq that this bio will be queued with. Allow -+ * merge only if rq is queued there. -+ * Queue lock is held here. -+ */ -+ bic = bfq_bic_lookup(bfqd, current->io_context); -+ if (bic == NULL) -+ return 0; -+ -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ return bfqq == RQ_BFQQ(rq); -+} -+ -+static void __bfq_set_in_service_queue(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ if (bfqq != NULL) { -+ bfq_mark_bfqq_must_alloc(bfqq); -+ bfq_mark_bfqq_budget_new(bfqq); -+ bfq_clear_bfqq_fifo_expire(bfqq); -+ -+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "set_in_service_queue, cur-budget = %lu", -+ bfqq->entity.budget); -+ } -+ -+ bfqd->in_service_queue = bfqq; -+} -+ -+/* -+ * Get and set a new queue for service. -+ */ -+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ if (!bfqq) -+ bfqq = bfq_get_next_queue(bfqd); -+ else -+ bfq_get_next_queue_forced(bfqd, bfqq); -+ -+ __bfq_set_in_service_queue(bfqd, bfqq); -+ return bfqq; -+} -+ -+static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, -+ struct request *rq) -+{ -+ if (blk_rq_pos(rq) >= bfqd->last_position) -+ return blk_rq_pos(rq) - bfqd->last_position; -+ else -+ return bfqd->last_position - blk_rq_pos(rq); -+} -+ -+/* -+ * Return true if bfqq has no request pending and rq is close enough to -+ * bfqd->last_position, or if rq is closer to bfqd->last_position than -+ * bfqq->next_rq -+ */ -+static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) -+{ -+ return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; -+} -+ -+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) -+{ -+ struct rb_root *root = &bfqd->rq_pos_tree; -+ struct rb_node *parent, *node; -+ struct bfq_queue *__bfqq; -+ sector_t sector = bfqd->last_position; -+ -+ if (RB_EMPTY_ROOT(root)) -+ return NULL; -+ -+ /* -+ * First, if we find a request starting at the end of the last -+ * request, choose it. -+ */ -+ __bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL); -+ if (__bfqq != NULL) -+ return __bfqq; -+ -+ /* -+ * If the exact sector wasn't found, the parent of the NULL leaf -+ * will contain the closest sector (rq_pos_tree sorted by -+ * next_request position). -+ */ -+ __bfqq = rb_entry(parent, struct bfq_queue, pos_node); -+ if (bfq_rq_close(bfqd, __bfqq->next_rq)) -+ return __bfqq; -+ -+ if (blk_rq_pos(__bfqq->next_rq) < sector) -+ node = rb_next(&__bfqq->pos_node); -+ else -+ node = rb_prev(&__bfqq->pos_node); -+ if (node == NULL) -+ return NULL; -+ -+ __bfqq = rb_entry(node, struct bfq_queue, pos_node); -+ if (bfq_rq_close(bfqd, __bfqq->next_rq)) -+ return __bfqq; -+ -+ return NULL; -+} -+ -+/* -+ * bfqd - obvious -+ * cur_bfqq - passed in so that we don't decide that the current queue -+ * is closely cooperating with itself. -+ * -+ * We are assuming that cur_bfqq has dispatched at least one request, -+ * and that bfqd->last_position reflects a position on the disk associated -+ * with the I/O issued by cur_bfqq. -+ */ -+static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, -+ struct bfq_queue *cur_bfqq) -+{ -+ struct bfq_queue *bfqq; -+ -+ if (bfq_class_idle(cur_bfqq)) -+ return NULL; -+ if (!bfq_bfqq_sync(cur_bfqq)) -+ return NULL; -+ if (BFQQ_SEEKY(cur_bfqq)) -+ return NULL; -+ -+ /* If device has only one backlogged bfq_queue, don't search. */ -+ if (bfqd->busy_queues == 1) -+ return NULL; -+ -+ /* -+ * We should notice if some of the queues are cooperating, e.g. -+ * working closely on the same area of the disk. In that case, -+ * we can group them together and don't waste time idling. -+ */ -+ bfqq = bfqq_close(bfqd); -+ if (bfqq == NULL || bfqq == cur_bfqq) -+ return NULL; -+ -+ /* -+ * Do not merge queues from different bfq_groups. -+ */ -+ if (bfqq->entity.parent != cur_bfqq->entity.parent) -+ return NULL; -+ -+ /* -+ * It only makes sense to merge sync queues. -+ */ -+ if (!bfq_bfqq_sync(bfqq)) -+ return NULL; -+ if (BFQQ_SEEKY(bfqq)) -+ return NULL; -+ -+ /* -+ * Do not merge queues of different priority classes. -+ */ -+ if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq)) -+ return NULL; -+ -+ return bfqq; -+} -+ -+/* -+ * If enough samples have been computed, return the current max budget -+ * stored in bfqd, which is dynamically updated according to the -+ * estimated disk peak rate; otherwise return the default max budget -+ */ -+static inline unsigned long bfq_max_budget(struct bfq_data *bfqd) -+{ -+ if (bfqd->budgets_assigned < 194) -+ return bfq_default_max_budget; -+ else -+ return bfqd->bfq_max_budget; -+} -+ -+/* -+ * Return min budget, which is a fraction of the current or default -+ * max budget (trying with 1/32) -+ */ -+static inline unsigned long bfq_min_budget(struct bfq_data *bfqd) -+{ -+ if (bfqd->budgets_assigned < 194) -+ return bfq_default_max_budget / 32; -+ else -+ return bfqd->bfq_max_budget / 32; -+} -+ -+static void bfq_arm_slice_timer(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq = bfqd->in_service_queue; -+ struct bfq_io_cq *bic; -+ unsigned long sl; -+ -+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ /* Processes have exited, don't wait. */ -+ bic = bfqd->in_service_bic; -+ if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0) -+ return; -+ -+ bfq_mark_bfqq_wait_request(bfqq); -+ -+ /* -+ * We don't want to idle for seeks, but we do want to allow -+ * fair distribution of slice time for a process doing back-to-back -+ * seeks. So allow a little bit of time for him to submit a new rq. -+ * -+ * To prevent processes with (partly) seeky workloads from -+ * being too ill-treated, grant them a small fraction of the -+ * assigned budget before reducing the waiting time to -+ * BFQ_MIN_TT. This happened to help reduce latency. -+ */ -+ sl = bfqd->bfq_slice_idle; -+ /* -+ * Unless the queue is being weight-raised, grant only minimum idle -+ * time if the queue either has been seeky for long enough or has -+ * already proved to be constantly seeky. -+ */ -+ if (bfq_sample_valid(bfqq->seek_samples) && -+ ((BFQQ_SEEKY(bfqq) && bfqq->entity.service > -+ bfq_max_budget(bfqq->bfqd) / 8) || -+ bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1) -+ sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); -+ else if (bfqq->wr_coeff > 1) -+ sl = sl * 3; -+ bfqd->last_idling_start = ktime_get(); -+ mod_timer(&bfqd->idle_slice_timer, jiffies + sl); -+ bfq_log(bfqd, "arm idle: %u/%u ms", -+ jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); -+} -+ -+/* -+ * Set the maximum time for the in-service queue to consume its -+ * budget. This prevents seeky processes from lowering the disk -+ * throughput (always guaranteed with a time slice scheme as in CFQ). -+ */ -+static void bfq_set_budget_timeout(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq = bfqd->in_service_queue; -+ unsigned int timeout_coeff; -+ if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) -+ timeout_coeff = 1; -+ else -+ timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; -+ -+ bfqd->last_budget_start = ktime_get(); -+ -+ bfq_clear_bfqq_budget_new(bfqq); -+ bfqq->budget_timeout = jiffies + -+ bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; -+ -+ bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", -+ jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * -+ timeout_coeff)); -+} -+ -+/* -+ * Move request from internal lists to the request queue dispatch list. -+ */ -+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ /* -+ * For consistency, the next instruction should have been executed -+ * after removing the request from the queue and dispatching it. -+ * We execute instead this instruction before bfq_remove_request() -+ * (and hence introduce a temporary inconsistency), for efficiency. -+ * In fact, in a forced_dispatch, this prevents two counters related -+ * to bfqq->dispatched to risk to be uselessly decremented if bfqq -+ * is not in service, and then to be incremented again after -+ * incrementing bfqq->dispatched. -+ */ -+ bfqq->dispatched++; -+ bfq_remove_request(rq); -+ elv_dispatch_sort(q, rq); -+ -+ if (bfq_bfqq_sync(bfqq)) -+ bfqd->sync_flight++; -+} -+ -+/* -+ * Return expired entry, or NULL to just start from scratch in rbtree. -+ */ -+static struct request *bfq_check_fifo(struct bfq_queue *bfqq) -+{ -+ struct request *rq = NULL; -+ -+ if (bfq_bfqq_fifo_expire(bfqq)) -+ return NULL; -+ -+ bfq_mark_bfqq_fifo_expire(bfqq); -+ -+ if (list_empty(&bfqq->fifo)) -+ return NULL; -+ -+ rq = rq_entry_fifo(bfqq->fifo.next); -+ -+ if (time_before(jiffies, rq->fifo_time)) -+ return NULL; -+ -+ return rq; -+} -+ -+/* -+ * Must be called with the queue_lock held. -+ */ -+static int bfqq_process_refs(struct bfq_queue *bfqq) -+{ -+ int process_refs, io_refs; -+ -+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; -+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; -+ BUG_ON(process_refs < 0); -+ return process_refs; -+} -+ -+static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -+{ -+ int process_refs, new_process_refs; -+ struct bfq_queue *__bfqq; -+ -+ /* -+ * If there are no process references on the new_bfqq, then it is -+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain -+ * may have dropped their last reference (not just their last process -+ * reference). -+ */ -+ if (!bfqq_process_refs(new_bfqq)) -+ return; -+ -+ /* Avoid a circular list and skip interim queue merges. */ -+ while ((__bfqq = new_bfqq->new_bfqq)) { -+ if (__bfqq == bfqq) -+ return; -+ new_bfqq = __bfqq; -+ } -+ -+ process_refs = bfqq_process_refs(bfqq); -+ new_process_refs = bfqq_process_refs(new_bfqq); -+ /* -+ * If the process for the bfqq has gone away, there is no -+ * sense in merging the queues. -+ */ -+ if (process_refs == 0 || new_process_refs == 0) -+ return; -+ -+ /* -+ * Merge in the direction of the lesser amount of work. -+ */ -+ if (new_process_refs >= process_refs) { -+ bfqq->new_bfqq = new_bfqq; -+ atomic_add(process_refs, &new_bfqq->ref); -+ } else { -+ new_bfqq->new_bfqq = bfqq; -+ atomic_add(new_process_refs, &bfqq->ref); -+ } -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", -+ new_bfqq->pid); -+} -+ -+static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ return entity->budget - entity->service; -+} -+ -+static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ -+ __bfq_bfqd_reset_in_service(bfqd); -+ -+ /* -+ * If this bfqq is shared between multiple processes, check -+ * to make sure that those processes are still issuing I/Os -+ * within the mean seek distance. If not, it may be time to -+ * break the queues apart again. -+ */ -+ if (bfq_bfqq_coop(bfqq) && BFQQ_SEEKY(bfqq)) -+ bfq_mark_bfqq_split_coop(bfqq); -+ -+ if (RB_EMPTY_ROOT(&bfqq->sort_list)) { -+ /* -+ * Overloading budget_timeout field to store the time -+ * at which the queue remains with no backlog; used by -+ * the weight-raising mechanism. -+ */ -+ bfqq->budget_timeout = jiffies; -+ bfq_del_bfqq_busy(bfqd, bfqq, 1); -+ } else { -+ bfq_activate_bfqq(bfqd, bfqq); -+ /* -+ * Resort priority tree of potential close cooperators. -+ */ -+ bfq_rq_pos_tree_add(bfqd, bfqq); -+ } -+} -+ -+/** -+ * __bfq_bfqq_recalc_budget - try to adapt the budget to the @bfqq behavior. -+ * @bfqd: device data. -+ * @bfqq: queue to update. -+ * @reason: reason for expiration. -+ * -+ * Handle the feedback on @bfqq budget. See the body for detailed -+ * comments. -+ */ -+static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ enum bfqq_expiration reason) -+{ -+ struct request *next_rq; -+ unsigned long budget, min_budget; -+ -+ budget = bfqq->max_budget; -+ min_budget = bfq_min_budget(bfqd); -+ -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu", -+ bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu", -+ budget, bfq_min_budget(bfqd)); -+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", -+ bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); -+ -+ if (bfq_bfqq_sync(bfqq)) { -+ switch (reason) { -+ /* -+ * Caveat: in all the following cases we trade latency -+ * for throughput. -+ */ -+ case BFQ_BFQQ_TOO_IDLE: -+ /* -+ * This is the only case where we may reduce -+ * the budget: if there is no request of the -+ * process still waiting for completion, then -+ * we assume (tentatively) that the timer has -+ * expired because the batch of requests of -+ * the process could have been served with a -+ * smaller budget. Hence, betting that -+ * process will behave in the same way when it -+ * becomes backlogged again, we reduce its -+ * next budget. As long as we guess right, -+ * this budget cut reduces the latency -+ * experienced by the process. -+ * -+ * However, if there are still outstanding -+ * requests, then the process may have not yet -+ * issued its next request just because it is -+ * still waiting for the completion of some of -+ * the still outstanding ones. So in this -+ * subcase we do not reduce its budget, on the -+ * contrary we increase it to possibly boost -+ * the throughput, as discussed in the -+ * comments to the BUDGET_TIMEOUT case. -+ */ -+ if (bfqq->dispatched > 0) /* still outstanding reqs */ -+ budget = min(budget * 2, bfqd->bfq_max_budget); -+ else { -+ if (budget > 5 * min_budget) -+ budget -= 4 * min_budget; -+ else -+ budget = min_budget; -+ } -+ break; -+ case BFQ_BFQQ_BUDGET_TIMEOUT: -+ /* -+ * We double the budget here because: 1) it -+ * gives the chance to boost the throughput if -+ * this is not a seeky process (which may have -+ * bumped into this timeout because of, e.g., -+ * ZBR), 2) together with charge_full_budget -+ * it helps give seeky processes higher -+ * timestamps, and hence be served less -+ * frequently. -+ */ -+ budget = min(budget * 2, bfqd->bfq_max_budget); -+ break; -+ case BFQ_BFQQ_BUDGET_EXHAUSTED: -+ /* -+ * The process still has backlog, and did not -+ * let either the budget timeout or the disk -+ * idling timeout expire. Hence it is not -+ * seeky, has a short thinktime and may be -+ * happy with a higher budget too. So -+ * definitely increase the budget of this good -+ * candidate to boost the disk throughput. -+ */ -+ budget = min(budget * 4, bfqd->bfq_max_budget); -+ break; -+ case BFQ_BFQQ_NO_MORE_REQUESTS: -+ /* -+ * Leave the budget unchanged. -+ */ -+ default: -+ return; -+ } -+ } else /* async queue */ -+ /* async queues get always the maximum possible budget -+ * (their ability to dispatch is limited by -+ * @bfqd->bfq_max_budget_async_rq). -+ */ -+ budget = bfqd->bfq_max_budget; -+ -+ bfqq->max_budget = budget; -+ -+ if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 && -+ bfqq->max_budget > bfqd->bfq_max_budget) -+ bfqq->max_budget = bfqd->bfq_max_budget; -+ -+ /* -+ * Make sure that we have enough budget for the next request. -+ * Since the finish time of the bfqq must be kept in sync with -+ * the budget, be sure to call __bfq_bfqq_expire() after the -+ * update. -+ */ -+ next_rq = bfqq->next_rq; -+ if (next_rq != NULL) -+ bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(next_rq, bfqq)); -+ else -+ bfqq->entity.budget = bfqq->max_budget; -+ -+ bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu", -+ next_rq != NULL ? blk_rq_sectors(next_rq) : 0, -+ bfqq->entity.budget); -+} -+ -+static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) -+{ -+ unsigned long max_budget; -+ -+ /* -+ * The max_budget calculated when autotuning is equal to the -+ * amount of sectors transfered in timeout_sync at the -+ * estimated peak rate. -+ */ -+ max_budget = (unsigned long)(peak_rate * 1000 * -+ timeout >> BFQ_RATE_SHIFT); -+ -+ return max_budget; -+} -+ -+/* -+ * In addition to updating the peak rate, checks whether the process -+ * is "slow", and returns 1 if so. This slow flag is used, in addition -+ * to the budget timeout, to reduce the amount of service provided to -+ * seeky processes, and hence reduce their chances to lower the -+ * throughput. See the code for more details. -+ */ -+static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ int compensate, enum bfqq_expiration reason) -+{ -+ u64 bw, usecs, expected, timeout; -+ ktime_t delta; -+ int update = 0; -+ -+ if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) -+ return 0; -+ -+ if (compensate) -+ delta = bfqd->last_idling_start; -+ else -+ delta = ktime_get(); -+ delta = ktime_sub(delta, bfqd->last_budget_start); -+ usecs = ktime_to_us(delta); -+ -+ /* Don't trust short/unrealistic values. */ -+ if (usecs < 100 || usecs >= LONG_MAX) -+ return 0; -+ -+ /* -+ * Calculate the bandwidth for the last slice. We use a 64 bit -+ * value to store the peak rate, in sectors per usec in fixed -+ * point math. We do so to have enough precision in the estimate -+ * and to avoid overflows. -+ */ -+ bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; -+ do_div(bw, (unsigned long)usecs); -+ -+ timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); -+ -+ /* -+ * Use only long (> 20ms) intervals to filter out spikes for -+ * the peak rate estimation. -+ */ -+ if (usecs > 20000) { -+ if (bw > bfqd->peak_rate || -+ (!BFQQ_SEEKY(bfqq) && -+ reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { -+ bfq_log(bfqd, "measured bw =%llu", bw); -+ /* -+ * To smooth oscillations use a low-pass filter with -+ * alpha=7/8, i.e., -+ * new_rate = (7/8) * old_rate + (1/8) * bw -+ */ -+ do_div(bw, 8); -+ if (bw == 0) -+ return 0; -+ bfqd->peak_rate *= 7; -+ do_div(bfqd->peak_rate, 8); -+ bfqd->peak_rate += bw; -+ update = 1; -+ bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); -+ } -+ -+ update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; -+ -+ if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) -+ bfqd->peak_rate_samples++; -+ -+ if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && -+ update) { -+ int dev_type = blk_queue_nonrot(bfqd->queue); -+ if (bfqd->bfq_user_max_budget == 0) { -+ bfqd->bfq_max_budget = -+ bfq_calc_max_budget(bfqd->peak_rate, -+ timeout); -+ bfq_log(bfqd, "new max_budget=%lu", -+ bfqd->bfq_max_budget); -+ } -+ if (bfqd->device_speed == BFQ_BFQD_FAST && -+ bfqd->peak_rate < device_speed_thresh[dev_type]) { -+ bfqd->device_speed = BFQ_BFQD_SLOW; -+ bfqd->RT_prod = R_slow[dev_type] * -+ T_slow[dev_type]; -+ } else if (bfqd->device_speed == BFQ_BFQD_SLOW && -+ bfqd->peak_rate > device_speed_thresh[dev_type]) { -+ bfqd->device_speed = BFQ_BFQD_FAST; -+ bfqd->RT_prod = R_fast[dev_type] * -+ T_fast[dev_type]; -+ } -+ } -+ } -+ -+ /* -+ * If the process has been served for a too short time -+ * interval to let its possible sequential accesses prevail on -+ * the initial seek time needed to move the disk head on the -+ * first sector it requested, then give the process a chance -+ * and for the moment return false. -+ */ -+ if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) -+ return 0; -+ -+ /* -+ * A process is considered ``slow'' (i.e., seeky, so that we -+ * cannot treat it fairly in the service domain, as it would -+ * slow down too much the other processes) if, when a slice -+ * ends for whatever reason, it has received service at a -+ * rate that would not be high enough to complete the budget -+ * before the budget timeout expiration. -+ */ -+ expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; -+ -+ /* -+ * Caveat: processes doing IO in the slower disk zones will -+ * tend to be slow(er) even if not seeky. And the estimated -+ * peak rate will actually be an average over the disk -+ * surface. Hence, to not be too harsh with unlucky processes, -+ * we keep a budget/3 margin of safety before declaring a -+ * process slow. -+ */ -+ return expected > (4 * bfqq->entity.budget) / 3; -+} -+ -+/* -+ * To be deemed as soft real-time, an application must meet two -+ * requirements. First, the application must not require an average -+ * bandwidth higher than the approximate bandwidth required to playback or -+ * record a compressed high-definition video. -+ * The next function is invoked on the completion of the last request of a -+ * batch, to compute the next-start time instant, soft_rt_next_start, such -+ * that, if the next request of the application does not arrive before -+ * soft_rt_next_start, then the above requirement on the bandwidth is met. -+ * -+ * The second requirement is that the request pattern of the application is -+ * isochronous, i.e., that, after issuing a request or a batch of requests, -+ * the application stops issuing new requests until all its pending requests -+ * have been completed. After that, the application may issue a new batch, -+ * and so on. -+ * For this reason the next function is invoked to compute -+ * soft_rt_next_start only for applications that meet this requirement, -+ * whereas soft_rt_next_start is set to infinity for applications that do -+ * not. -+ * -+ * Unfortunately, even a greedy application may happen to behave in an -+ * isochronous way if the CPU load is high. In fact, the application may -+ * stop issuing requests while the CPUs are busy serving other processes, -+ * then restart, then stop again for a while, and so on. In addition, if -+ * the disk achieves a low enough throughput with the request pattern -+ * issued by the application (e.g., because the request pattern is random -+ * and/or the device is slow), then the application may meet the above -+ * bandwidth requirement too. To prevent such a greedy application to be -+ * deemed as soft real-time, a further rule is used in the computation of -+ * soft_rt_next_start: soft_rt_next_start must be higher than the current -+ * time plus the maximum time for which the arrival of a request is waited -+ * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. -+ * This filters out greedy applications, as the latter issue instead their -+ * next request as soon as possible after the last one has been completed -+ * (in contrast, when a batch of requests is completed, a soft real-time -+ * application spends some time processing data). -+ * -+ * Unfortunately, the last filter may easily generate false positives if -+ * only bfqd->bfq_slice_idle is used as a reference time interval and one -+ * or both the following cases occur: -+ * 1) HZ is so low that the duration of a jiffy is comparable to or higher -+ * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with -+ * HZ=100. -+ * 2) jiffies, instead of increasing at a constant rate, may stop increasing -+ * for a while, then suddenly 'jump' by several units to recover the lost -+ * increments. This seems to happen, e.g., inside virtual machines. -+ * To address this issue, we do not use as a reference time interval just -+ * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In -+ * particular we add the minimum number of jiffies for which the filter -+ * seems to be quite precise also in embedded systems and KVM/QEMU virtual -+ * machines. -+ */ -+static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ return max(bfqq->last_idle_bklogged + -+ HZ * bfqq->service_from_backlogged / -+ bfqd->bfq_wr_max_softrt_rate, -+ jiffies + bfqq->bfqd->bfq_slice_idle + 4); -+} -+ -+/* -+ * Return the largest-possible time instant such that, for as long as possible, -+ * the current time will be lower than this time instant according to the macro -+ * time_is_before_jiffies(). -+ */ -+static inline unsigned long bfq_infinity_from_now(unsigned long now) -+{ -+ return now + ULONG_MAX / 2; -+} -+ -+/** -+ * bfq_bfqq_expire - expire a queue. -+ * @bfqd: device owning the queue. -+ * @bfqq: the queue to expire. -+ * @compensate: if true, compensate for the time spent idling. -+ * @reason: the reason causing the expiration. -+ * -+ * -+ * If the process associated to the queue is slow (i.e., seeky), or in -+ * case of budget timeout, or, finally, if it is async, we -+ * artificially charge it an entire budget (independently of the -+ * actual service it received). As a consequence, the queue will get -+ * higher timestamps than the correct ones upon reactivation, and -+ * hence it will be rescheduled as if it had received more service -+ * than what it actually received. In the end, this class of processes -+ * will receive less service in proportion to how slowly they consume -+ * their budgets (and hence how seriously they tend to lower the -+ * throughput). -+ * -+ * In contrast, when a queue expires because it has been idling for -+ * too much or because it exhausted its budget, we do not touch the -+ * amount of service it has received. Hence when the queue will be -+ * reactivated and its timestamps updated, the latter will be in sync -+ * with the actual service received by the queue until expiration. -+ * -+ * Charging a full budget to the first type of queues and the exact -+ * service to the others has the effect of using the WF2Q+ policy to -+ * schedule the former on a timeslice basis, without violating the -+ * service domain guarantees of the latter. -+ */ -+static void bfq_bfqq_expire(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ int compensate, -+ enum bfqq_expiration reason) -+{ -+ int slow; -+ BUG_ON(bfqq != bfqd->in_service_queue); -+ -+ /* Update disk peak rate for autotuning and check whether the -+ * process is slow (see bfq_update_peak_rate). -+ */ -+ slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); -+ -+ /* -+ * As above explained, 'punish' slow (i.e., seeky), timed-out -+ * and async queues, to favor sequential sync workloads. -+ * -+ * Processes doing I/O in the slower disk zones will tend to be -+ * slow(er) even if not seeky. Hence, since the estimated peak -+ * rate is actually an average over the disk surface, these -+ * processes may timeout just for bad luck. To avoid punishing -+ * them we do not charge a full budget to a process that -+ * succeeded in consuming at least 2/3 of its budget. -+ */ -+ if (slow || (reason == BFQ_BFQQ_BUDGET_TIMEOUT && -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3)) -+ bfq_bfqq_charge_full_budget(bfqq); -+ -+ bfqq->service_from_backlogged += bfqq->entity.service; -+ -+ if (BFQQ_SEEKY(bfqq) && reason == BFQ_BFQQ_BUDGET_TIMEOUT && -+ !bfq_bfqq_constantly_seeky(bfqq)) { -+ bfq_mark_bfqq_constantly_seeky(bfqq); -+ if (!blk_queue_nonrot(bfqd->queue)) -+ bfqd->const_seeky_busy_in_flight_queues++; -+ } -+ -+ if (reason == BFQ_BFQQ_TOO_IDLE && -+ bfqq->entity.service <= 2 * bfqq->entity.budget / 10 ) -+ bfq_clear_bfqq_IO_bound(bfqq); -+ -+ if (bfqd->low_latency && bfqq->wr_coeff == 1) -+ bfqq->last_wr_start_finish = jiffies; -+ -+ if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 && -+ RB_EMPTY_ROOT(&bfqq->sort_list)) { -+ /* -+ * If we get here, and there are no outstanding requests, -+ * then the request pattern is isochronous (see the comments -+ * to the function bfq_bfqq_softrt_next_start()). Hence we -+ * can compute soft_rt_next_start. If, instead, the queue -+ * still has outstanding requests, then we have to wait -+ * for the completion of all the outstanding requests to -+ * discover whether the request pattern is actually -+ * isochronous. -+ */ -+ if (bfqq->dispatched == 0) -+ bfqq->soft_rt_next_start = -+ bfq_bfqq_softrt_next_start(bfqd, bfqq); -+ else { -+ /* -+ * The application is still waiting for the -+ * completion of one or more requests: -+ * prevent it from possibly being incorrectly -+ * deemed as soft real-time by setting its -+ * soft_rt_next_start to infinity. In fact, -+ * without this assignment, the application -+ * would be incorrectly deemed as soft -+ * real-time if: -+ * 1) it issued a new request before the -+ * completion of all its in-flight -+ * requests, and -+ * 2) at that time, its soft_rt_next_start -+ * happened to be in the past. -+ */ -+ bfqq->soft_rt_next_start = -+ bfq_infinity_from_now(jiffies); -+ /* -+ * Schedule an update of soft_rt_next_start to when -+ * the task may be discovered to be isochronous. -+ */ -+ bfq_mark_bfqq_softrt_update(bfqq); -+ } -+ } -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "expire (%d, slow %d, num_disp %d, idle_win %d)", reason, -+ slow, bfqq->dispatched, bfq_bfqq_idle_window(bfqq)); -+ -+ /* -+ * Increase, decrease or leave budget unchanged according to -+ * reason. -+ */ -+ __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); -+ __bfq_bfqq_expire(bfqd, bfqq); -+} -+ -+/* -+ * Budget timeout is not implemented through a dedicated timer, but -+ * just checked on request arrivals and completions, as well as on -+ * idle timer expirations. -+ */ -+static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) -+{ -+ if (bfq_bfqq_budget_new(bfqq) || -+ time_before(jiffies, bfqq->budget_timeout)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * If we expire a queue that is waiting for the arrival of a new -+ * request, we may prevent the fictitious timestamp back-shifting that -+ * allows the guarantees of the queue to be preserved (see [1] for -+ * this tricky aspect). Hence we return true only if this condition -+ * does not hold, or if the queue is slow enough to deserve only to be -+ * kicked off for preserving a high throughput. -+*/ -+static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) -+{ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "may_budget_timeout: wait_request %d left %d timeout %d", -+ bfq_bfqq_wait_request(bfqq), -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3, -+ bfq_bfqq_budget_timeout(bfqq)); -+ -+ return (!bfq_bfqq_wait_request(bfqq) || -+ bfq_bfqq_budget_left(bfqq) >= bfqq->entity.budget / 3) -+ && -+ bfq_bfqq_budget_timeout(bfqq); -+} -+ -+/* -+ * Device idling is allowed only for the queues for which this function -+ * returns true. For this reason, the return value of this function plays a -+ * critical role for both throughput boosting and service guarantees. The -+ * return value is computed through a logical expression. In this rather -+ * long comment, we try to briefly describe all the details and motivations -+ * behind the components of this logical expression. -+ * -+ * First, the expression may be true only for sync queues. Besides, if -+ * bfqq is also being weight-raised, then the expression always evaluates -+ * to true, as device idling is instrumental for preserving low-latency -+ * guarantees (see [1]). Otherwise, the expression evaluates to true only -+ * if bfqq has a non-null idle window and at least one of the following -+ * two conditions holds. The first condition is that the device is not -+ * performing NCQ, because idling the device most certainly boosts the -+ * throughput if this condition holds and bfqq has been granted a non-null -+ * idle window. The second compound condition is made of the logical AND of -+ * two components. -+ * -+ * The first component is true only if there is no weight-raised busy -+ * queue. This guarantees that the device is not idled for a sync non- -+ * weight-raised queue when there are busy weight-raised queues. The former -+ * is then expired immediately if empty. Combined with the timestamping -+ * rules of BFQ (see [1] for details), this causes sync non-weight-raised -+ * queues to get a lower number of requests served, and hence to ask for a -+ * lower number of requests from the request pool, before the busy weight- -+ * raised queues get served again. -+ * -+ * This is beneficial for the processes associated with weight-raised -+ * queues, when the request pool is saturated (e.g., in the presence of -+ * write hogs). In fact, if the processes associated with the other queues -+ * ask for requests at a lower rate, then weight-raised processes have a -+ * higher probability to get a request from the pool immediately (or at -+ * least soon) when they need one. Hence they have a higher probability to -+ * actually get a fraction of the disk throughput proportional to their -+ * high weight. This is especially true with NCQ-capable drives, which -+ * enqueue several requests in advance and further reorder internally- -+ * queued requests. -+ * -+ * In the end, mistreating non-weight-raised queues when there are busy -+ * weight-raised queues seems to mitigate starvation problems in the -+ * presence of heavy write workloads and NCQ, and hence to guarantee a -+ * higher application and system responsiveness in these hostile scenarios. -+ * -+ * If the first component of the compound condition is instead true, i.e., -+ * there is no weight-raised busy queue, then the second component of the -+ * compound condition takes into account service-guarantee and throughput -+ * issues related to NCQ (recall that the compound condition is evaluated -+ * only if the device is detected as supporting NCQ). -+ * -+ * As for service guarantees, allowing the drive to enqueue more than one -+ * request at a time, and hence delegating de facto final scheduling -+ * decisions to the drive's internal scheduler, causes loss of control on -+ * the actual request service order. In this respect, when the drive is -+ * allowed to enqueue more than one request at a time, the service -+ * distribution enforced by the drive's internal scheduler is likely to -+ * coincide with the desired device-throughput distribution only in the -+ * following, perfectly symmetric, scenario: -+ * 1) all active queues have the same weight, -+ * 2) all active groups at the same level in the groups tree have the same -+ * weight, -+ * 3) all active groups at the same level in the groups tree have the same -+ * number of children. -+ * -+ * Even in such a scenario, sequential I/O may still receive a preferential -+ * treatment, but this is not likely to be a big issue with flash-based -+ * devices, because of their non-dramatic loss of throughput with random -+ * I/O. Things do differ with HDDs, for which additional care is taken, as -+ * explained after completing the discussion for flash-based devices. -+ * -+ * Unfortunately, keeping the necessary state for evaluating exactly the -+ * above symmetry conditions would be quite complex and time-consuming. -+ * Therefore BFQ evaluates instead the following stronger sub-conditions, -+ * for which it is much easier to maintain the needed state: -+ * 1) all active queues have the same weight, -+ * 2) all active groups have the same weight, -+ * 3) all active groups have at most one active child each. -+ * In particular, the last two conditions are always true if hierarchical -+ * support and the cgroups interface are not enabled, hence no state needs -+ * to be maintained in this case. -+ * -+ * According to the above considerations, the second component of the -+ * compound condition evaluates to true if any of the above symmetry -+ * sub-condition does not hold, or the device is not flash-based. Therefore, -+ * if also the first component is true, then idling is allowed for a sync -+ * queue. These are the only sub-conditions considered if the device is -+ * flash-based, as, for such a device, it is sensible to force idling only -+ * for service-guarantee issues. In fact, as for throughput, idling -+ * NCQ-capable flash-based devices would not boost the throughput even -+ * with sequential I/O; rather it would lower the throughput in proportion -+ * to how fast the device is. In the end, (only) if all the three -+ * sub-conditions hold and the device is flash-based, the compound -+ * condition evaluates to false and therefore no idling is performed. -+ * -+ * As already said, things change with a rotational device, where idling -+ * boosts the throughput with sequential I/O (even with NCQ). Hence, for -+ * such a device the second component of the compound condition evaluates -+ * to true also if the following additional sub-condition does not hold: -+ * the queue is constantly seeky. Unfortunately, this different behavior -+ * with respect to flash-based devices causes an additional asymmetry: if -+ * some sync queues enjoy idling and some other sync queues do not, then -+ * the latter get a low share of the device throughput, simply because the -+ * former get many requests served after being set as in service, whereas -+ * the latter do not. As a consequence, to guarantee the desired throughput -+ * distribution, on HDDs the compound expression evaluates to true (and -+ * hence device idling is performed) also if the following last symmetry -+ * condition does not hold: no other queue is benefiting from idling. Also -+ * this last condition is actually replaced with a simpler-to-maintain and -+ * stronger condition: there is no busy queue which is not constantly seeky -+ * (and hence may also benefit from idling). -+ * -+ * To sum up, when all the required symmetry and throughput-boosting -+ * sub-conditions hold, the second component of the compound condition -+ * evaluates to false, and hence no idling is performed. This helps to -+ * keep the drives' internal queues full on NCQ-capable devices, and hence -+ * to boost the throughput, without causing 'almost' any loss of service -+ * guarantees. The 'almost' follows from the fact that, if the internal -+ * queue of one such device is filled while all the sub-conditions hold, -+ * but at some point in time some sub-condition stops to hold, then it may -+ * become impossible to let requests be served in the new desired order -+ * until all the requests already queued in the device have been served. -+ */ -+static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq) -+{ -+ struct bfq_data *bfqd = bfqq->bfqd; -+#ifdef CONFIG_CGROUP_BFQIO -+#define symmetric_scenario (!bfqd->active_numerous_groups && \ -+ !bfq_differentiated_weights(bfqd)) -+#else -+#define symmetric_scenario (!bfq_differentiated_weights(bfqd)) -+#endif -+#define cond_for_seeky_on_ncq_hdd (bfq_bfqq_constantly_seeky(bfqq) && \ -+ bfqd->busy_in_flight_queues == \ -+ bfqd->const_seeky_busy_in_flight_queues) -+/* -+ * Condition for expiring a non-weight-raised queue (and hence not idling -+ * the device). -+ */ -+#define cond_for_expiring_non_wr (bfqd->hw_tag && \ -+ (bfqd->wr_busy_queues > 0 || \ -+ (symmetric_scenario && \ -+ (blk_queue_nonrot(bfqd->queue) || \ -+ cond_for_seeky_on_ncq_hdd)))) -+ -+ return bfq_bfqq_sync(bfqq) && -+ (bfq_bfqq_IO_bound(bfqq) || bfqq->wr_coeff > 1) && -+ (bfqq->wr_coeff > 1 || -+ (bfq_bfqq_idle_window(bfqq) && -+ !cond_for_expiring_non_wr) -+ ); -+} -+ -+/* -+ * If the in-service queue is empty but sync, and the function -+ * bfq_bfqq_must_not_expire returns true, then: -+ * 1) the queue must remain in service and cannot be expired, and -+ * 2) the disk must be idled to wait for the possible arrival of a new -+ * request for the queue. -+ * See the comments to the function bfq_bfqq_must_not_expire for the reasons -+ * why performing device idling is the best choice to boost the throughput -+ * and preserve service guarantees when bfq_bfqq_must_not_expire itself -+ * returns true. -+ */ -+static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) -+{ -+ struct bfq_data *bfqd = bfqq->bfqd; -+ -+ return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 && -+ bfq_bfqq_must_not_expire(bfqq); -+} -+ -+/* -+ * Select a queue for service. If we have a current queue in service, -+ * check whether to continue servicing it, or retrieve and set a new one. -+ */ -+static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq, *new_bfqq = NULL; -+ struct request *next_rq; -+ enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; -+ -+ bfqq = bfqd->in_service_queue; -+ if (bfqq == NULL) -+ goto new_queue; -+ -+ bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); -+ -+ /* -+ * If another queue has a request waiting within our mean seek -+ * distance, let it run. The expire code will check for close -+ * cooperators and put the close queue at the front of the -+ * service tree. If possible, merge the expiring queue with the -+ * new bfqq. -+ */ -+ new_bfqq = bfq_close_cooperator(bfqd, bfqq); -+ if (new_bfqq != NULL && bfqq->new_bfqq == NULL) -+ bfq_setup_merge(bfqq, new_bfqq); -+ -+ if (bfq_may_expire_for_budg_timeout(bfqq) && -+ !timer_pending(&bfqd->idle_slice_timer) && -+ !bfq_bfqq_must_idle(bfqq)) -+ goto expire; -+ -+ next_rq = bfqq->next_rq; -+ /* -+ * If bfqq has requests queued and it has enough budget left to -+ * serve them, keep the queue, otherwise expire it. -+ */ -+ if (next_rq != NULL) { -+ if (bfq_serv_to_charge(next_rq, bfqq) > -+ bfq_bfqq_budget_left(bfqq)) { -+ reason = BFQ_BFQQ_BUDGET_EXHAUSTED; -+ goto expire; -+ } else { -+ /* -+ * The idle timer may be pending because we may -+ * not disable disk idling even when a new request -+ * arrives. -+ */ -+ if (timer_pending(&bfqd->idle_slice_timer)) { -+ /* -+ * If we get here: 1) at least a new request -+ * has arrived but we have not disabled the -+ * timer because the request was too small, -+ * 2) then the block layer has unplugged -+ * the device, causing the dispatch to be -+ * invoked. -+ * -+ * Since the device is unplugged, now the -+ * requests are probably large enough to -+ * provide a reasonable throughput. -+ * So we disable idling. -+ */ -+ bfq_clear_bfqq_wait_request(bfqq); -+ del_timer(&bfqd->idle_slice_timer); -+ } -+ if (new_bfqq == NULL) -+ goto keep_queue; -+ else -+ goto expire; -+ } -+ } -+ -+ /* -+ * No requests pending. If the in-service queue still has requests -+ * in flight (possibly waiting for a completion) or is idling for a -+ * new request, then keep it. -+ */ -+ if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || -+ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) { -+ bfqq = NULL; -+ goto keep_queue; -+ } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { -+ /* -+ * Expiring the queue because there is a close cooperator, -+ * cancel timer. -+ */ -+ bfq_clear_bfqq_wait_request(bfqq); -+ del_timer(&bfqd->idle_slice_timer); -+ } -+ -+ reason = BFQ_BFQQ_NO_MORE_REQUESTS; -+expire: -+ bfq_bfqq_expire(bfqd, bfqq, 0, reason); -+new_queue: -+ bfqq = bfq_set_in_service_queue(bfqd, new_bfqq); -+ bfq_log(bfqd, "select_queue: new queue %d returned", -+ bfqq != NULL ? bfqq->pid : 0); -+keep_queue: -+ return bfqq; -+} -+ -+static void bfq_update_wr_data(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ if (bfqq->wr_coeff > 1) { /* queue is being boosted */ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "raising period dur %u/%u msec, old coeff %u, w %d(%d)", -+ jiffies_to_msecs(jiffies - -+ bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time), -+ bfqq->wr_coeff, -+ bfqq->entity.weight, bfqq->entity.orig_weight); -+ -+ BUG_ON(bfqq != bfqd->in_service_queue && entity->weight != -+ entity->orig_weight * bfqq->wr_coeff); -+ if (entity->ioprio_changed) -+ bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); -+ /* -+ * If too much time has elapsed from the beginning -+ * of this weight-raising, stop it. -+ */ -+ if (time_is_before_jiffies(bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time)) { -+ bfqq->last_wr_start_finish = jiffies; -+ bfq_log_bfqq(bfqd, bfqq, -+ "wrais ending at %lu, rais_max_time %u", -+ bfqq->last_wr_start_finish, -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ bfq_bfqq_end_wr(bfqq); -+ __bfq_entity_update_weight_prio( -+ bfq_entity_service_tree(entity), -+ entity); -+ } -+ } -+} -+ -+/* -+ * Dispatch one request from bfqq, moving it to the request queue -+ * dispatch list. -+ */ -+static int bfq_dispatch_request(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ int dispatched = 0; -+ struct request *rq; -+ unsigned long service_to_charge; -+ -+ BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ /* Follow expired path, else get first next available. */ -+ rq = bfq_check_fifo(bfqq); -+ if (rq == NULL) -+ rq = bfqq->next_rq; -+ service_to_charge = bfq_serv_to_charge(rq, bfqq); -+ -+ if (service_to_charge > bfq_bfqq_budget_left(bfqq)) { -+ /* -+ * This may happen if the next rq is chosen in fifo order -+ * instead of sector order. The budget is properly -+ * dimensioned to be always sufficient to serve the next -+ * request only if it is chosen in sector order. The reason -+ * is that it would be quite inefficient and little useful -+ * to always make sure that the budget is large enough to -+ * serve even the possible next rq in fifo order. -+ * In fact, requests are seldom served in fifo order. -+ * -+ * Expire the queue for budget exhaustion, and make sure -+ * that the next act_budget is enough to serve the next -+ * request, even if it comes from the fifo expired path. -+ */ -+ bfqq->next_rq = rq; -+ /* -+ * Since this dispatch is failed, make sure that -+ * a new one will be performed -+ */ -+ if (!bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+ goto expire; -+ } -+ -+ /* Finally, insert request into driver dispatch list. */ -+ bfq_bfqq_served(bfqq, service_to_charge); -+ bfq_dispatch_insert(bfqd->queue, rq); -+ -+ bfq_update_wr_data(bfqd, bfqq); -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "dispatched %u sec req (%llu), budg left %lu", -+ blk_rq_sectors(rq), -+ (long long unsigned)blk_rq_pos(rq), -+ bfq_bfqq_budget_left(bfqq)); -+ -+ dispatched++; -+ -+ if (bfqd->in_service_bic == NULL) { -+ atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount); -+ bfqd->in_service_bic = RQ_BIC(rq); -+ } -+ -+ if (bfqd->busy_queues > 1 && ((!bfq_bfqq_sync(bfqq) && -+ dispatched >= bfqd->bfq_max_budget_async_rq) || -+ bfq_class_idle(bfqq))) -+ goto expire; -+ -+ return dispatched; -+ -+expire: -+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED); -+ return dispatched; -+} -+ -+static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq) -+{ -+ int dispatched = 0; -+ -+ while (bfqq->next_rq != NULL) { -+ bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq); -+ dispatched++; -+ } -+ -+ BUG_ON(!list_empty(&bfqq->fifo)); -+ return dispatched; -+} -+ -+/* -+ * Drain our current requests. -+ * Used for barriers and when switching io schedulers on-the-fly. -+ */ -+static int bfq_forced_dispatch(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq, *n; -+ struct bfq_service_tree *st; -+ int dispatched = 0; -+ -+ bfqq = bfqd->in_service_queue; -+ if (bfqq != NULL) -+ __bfq_bfqq_expire(bfqd, bfqq); -+ -+ /* -+ * Loop through classes, and be careful to leave the scheduler -+ * in a consistent state, as feedback mechanisms and vtime -+ * updates cannot be disabled during the process. -+ */ -+ list_for_each_entry_safe(bfqq, n, &bfqd->active_list, bfqq_list) { -+ st = bfq_entity_service_tree(&bfqq->entity); -+ -+ dispatched += __bfq_forced_dispatch_bfqq(bfqq); -+ bfqq->max_budget = bfq_max_budget(bfqd); -+ -+ bfq_forget_idle(st); -+ } -+ -+ BUG_ON(bfqd->busy_queues != 0); -+ -+ return dispatched; -+} -+ -+static int bfq_dispatch_requests(struct request_queue *q, int force) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_queue *bfqq; -+ int max_dispatch; -+ -+ bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues); -+ if (bfqd->busy_queues == 0) -+ return 0; -+ -+ if (unlikely(force)) -+ return bfq_forced_dispatch(bfqd); -+ -+ bfqq = bfq_select_queue(bfqd); -+ if (bfqq == NULL) -+ return 0; -+ -+ max_dispatch = bfqd->bfq_quantum; -+ if (bfq_class_idle(bfqq)) -+ max_dispatch = 1; -+ -+ if (!bfq_bfqq_sync(bfqq)) -+ max_dispatch = bfqd->bfq_max_budget_async_rq; -+ -+ if (bfqq->dispatched >= max_dispatch) { -+ if (bfqd->busy_queues > 1) -+ return 0; -+ if (bfqq->dispatched >= 4 * max_dispatch) -+ return 0; -+ } -+ -+ if (bfqd->sync_flight != 0 && !bfq_bfqq_sync(bfqq)) -+ return 0; -+ -+ bfq_clear_bfqq_wait_request(bfqq); -+ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); -+ -+ if (!bfq_dispatch_request(bfqd, bfqq)) -+ return 0; -+ -+ bfq_log_bfqq(bfqd, bfqq, "dispatched one request of %d (max_disp %d)", -+ bfqq->pid, max_dispatch); -+ -+ return 1; -+} -+ -+/* -+ * Task holds one reference to the queue, dropped when task exits. Each rq -+ * in-flight on this queue also holds a reference, dropped when rq is freed. -+ * -+ * Queue lock must be held here. -+ */ -+static void bfq_put_queue(struct bfq_queue *bfqq) -+{ -+ struct bfq_data *bfqd = bfqq->bfqd; -+ -+ BUG_ON(atomic_read(&bfqq->ref) <= 0); -+ -+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ if (!atomic_dec_and_test(&bfqq->ref)) -+ return; -+ -+ BUG_ON(rb_first(&bfqq->sort_list) != NULL); -+ BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0); -+ BUG_ON(bfqq->entity.tree != NULL); -+ BUG_ON(bfq_bfqq_busy(bfqq)); -+ BUG_ON(bfqd->in_service_queue == bfqq); -+ -+ bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq); -+ -+ kmem_cache_free(bfq_pool, bfqq); -+} -+ -+static void bfq_put_cooperator(struct bfq_queue *bfqq) -+{ -+ struct bfq_queue *__bfqq, *next; -+ -+ /* -+ * If this queue was scheduled to merge with another queue, be -+ * sure to drop the reference taken on that queue (and others in -+ * the merge chain). See bfq_setup_merge and bfq_merge_bfqqs. -+ */ -+ __bfqq = bfqq->new_bfqq; -+ while (__bfqq) { -+ if (__bfqq == bfqq) -+ break; -+ next = __bfqq->new_bfqq; -+ bfq_put_queue(__bfqq); -+ __bfqq = next; -+ } -+} -+ -+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ if (bfqq == bfqd->in_service_queue) { -+ __bfq_bfqq_expire(bfqd, bfqq); -+ bfq_schedule_dispatch(bfqd); -+ } -+ -+ bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ -+ bfq_put_cooperator(bfqq); -+ -+ bfq_put_queue(bfqq); -+} -+ -+static inline void bfq_init_icq(struct io_cq *icq) -+{ -+ struct bfq_io_cq *bic = icq_to_bic(icq); -+ -+ bic->ttime.last_end_request = jiffies; -+} -+ -+static void bfq_exit_icq(struct io_cq *icq) -+{ -+ struct bfq_io_cq *bic = icq_to_bic(icq); -+ struct bfq_data *bfqd = bic_to_bfqd(bic); -+ -+ if (bic->bfqq[BLK_RW_ASYNC]) { -+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_ASYNC]); -+ bic->bfqq[BLK_RW_ASYNC] = NULL; -+ } -+ -+ if (bic->bfqq[BLK_RW_SYNC]) { -+ bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); -+ bic->bfqq[BLK_RW_SYNC] = NULL; -+ } -+} -+ -+/* -+ * Update the entity prio values; note that the new values will not -+ * be used until the next (re)activation. -+ */ -+static void bfq_init_prio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) -+{ -+ struct task_struct *tsk = current; -+ int ioprio_class; -+ -+ if (!bfq_bfqq_prio_changed(bfqq)) -+ return; -+ -+ ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); -+ switch (ioprio_class) { -+ default: -+ dev_err(bfqq->bfqd->queue->backing_dev_info.dev, -+ "bfq: bad prio %x\n", ioprio_class); -+ case IOPRIO_CLASS_NONE: -+ /* -+ * No prio set, inherit CPU scheduling settings. -+ */ -+ bfqq->entity.new_ioprio = task_nice_ioprio(tsk); -+ bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk); -+ break; -+ case IOPRIO_CLASS_RT: -+ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); -+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT; -+ break; -+ case IOPRIO_CLASS_BE: -+ bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio); -+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE; -+ break; -+ case IOPRIO_CLASS_IDLE: -+ bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE; -+ bfqq->entity.new_ioprio = 7; -+ bfq_clear_bfqq_idle_window(bfqq); -+ break; -+ } -+ -+ bfqq->entity.ioprio_changed = 1; -+ -+ bfq_clear_bfqq_prio_changed(bfqq); -+} -+ -+static void bfq_changed_ioprio(struct bfq_io_cq *bic) -+{ -+ struct bfq_data *bfqd; -+ struct bfq_queue *bfqq, *new_bfqq; -+ struct bfq_group *bfqg; -+ unsigned long uninitialized_var(flags); -+ int ioprio = bic->icq.ioc->ioprio; -+ -+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), -+ &flags); -+ /* -+ * This condition may trigger on a newly created bic, be sure to -+ * drop the lock before returning. -+ */ -+ if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio)) -+ goto out; -+ -+ bfqq = bic->bfqq[BLK_RW_ASYNC]; -+ if (bfqq != NULL) { -+ bfqg = container_of(bfqq->entity.sched_data, struct bfq_group, -+ sched_data); -+ new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic, -+ GFP_ATOMIC); -+ if (new_bfqq != NULL) { -+ bic->bfqq[BLK_RW_ASYNC] = new_bfqq; -+ bfq_log_bfqq(bfqd, bfqq, -+ "changed_ioprio: bfqq %p %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ } -+ } -+ -+ bfqq = bic->bfqq[BLK_RW_SYNC]; -+ if (bfqq != NULL) -+ bfq_mark_bfqq_prio_changed(bfqq); -+ -+ bic->ioprio = ioprio; -+ -+out: -+ bfq_put_bfqd_unlock(bfqd, &flags); -+} -+ -+static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ pid_t pid, int is_sync) -+{ -+ RB_CLEAR_NODE(&bfqq->entity.rb_node); -+ INIT_LIST_HEAD(&bfqq->fifo); -+ -+ atomic_set(&bfqq->ref, 0); -+ bfqq->bfqd = bfqd; -+ -+ bfq_mark_bfqq_prio_changed(bfqq); -+ -+ if (is_sync) { -+ if (!bfq_class_idle(bfqq)) -+ bfq_mark_bfqq_idle_window(bfqq); -+ bfq_mark_bfqq_sync(bfqq); -+ } -+ bfq_mark_bfqq_IO_bound(bfqq); -+ -+ /* Tentative initial value to trade off between thr and lat */ -+ bfqq->max_budget = (2 * bfq_max_budget(bfqd)) / 3; -+ bfqq->pid = pid; -+ -+ bfqq->wr_coeff = 1; -+ bfqq->last_wr_start_finish = 0; -+ /* -+ * Set to the value for which bfqq will not be deemed as -+ * soft rt when it becomes backlogged. -+ */ -+ bfqq->soft_rt_next_start = bfq_infinity_from_now(jiffies); -+} -+ -+static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, -+ int is_sync, -+ struct bfq_io_cq *bic, -+ gfp_t gfp_mask) -+{ -+ struct bfq_queue *bfqq, *new_bfqq = NULL; -+ -+retry: -+ /* bic always exists here */ -+ bfqq = bic_to_bfqq(bic, is_sync); -+ -+ /* -+ * Always try a new alloc if we fall back to the OOM bfqq -+ * originally, since it should just be a temporary situation. -+ */ -+ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { -+ bfqq = NULL; -+ if (new_bfqq != NULL) { -+ bfqq = new_bfqq; -+ new_bfqq = NULL; -+ } else if (gfp_mask & __GFP_WAIT) { -+ spin_unlock_irq(bfqd->queue->queue_lock); -+ new_bfqq = kmem_cache_alloc_node(bfq_pool, -+ gfp_mask | __GFP_ZERO, -+ bfqd->queue->node); -+ spin_lock_irq(bfqd->queue->queue_lock); -+ if (new_bfqq != NULL) -+ goto retry; -+ } else { -+ bfqq = kmem_cache_alloc_node(bfq_pool, -+ gfp_mask | __GFP_ZERO, -+ bfqd->queue->node); -+ } -+ -+ if (bfqq != NULL) { -+ bfq_init_bfqq(bfqd, bfqq, current->pid, is_sync); -+ bfq_log_bfqq(bfqd, bfqq, "allocated"); -+ } else { -+ bfqq = &bfqd->oom_bfqq; -+ bfq_log_bfqq(bfqd, bfqq, "using oom bfqq"); -+ } -+ -+ bfq_init_prio_data(bfqq, bic); -+ bfq_init_entity(&bfqq->entity, bfqg); -+ } -+ -+ if (new_bfqq != NULL) -+ kmem_cache_free(bfq_pool, new_bfqq); -+ -+ return bfqq; -+} -+ -+static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, -+ int ioprio_class, int ioprio) -+{ -+ switch (ioprio_class) { -+ case IOPRIO_CLASS_RT: -+ return &bfqg->async_bfqq[0][ioprio]; -+ case IOPRIO_CLASS_NONE: -+ ioprio = IOPRIO_NORM; -+ /* fall through */ -+ case IOPRIO_CLASS_BE: -+ return &bfqg->async_bfqq[1][ioprio]; -+ case IOPRIO_CLASS_IDLE: -+ return &bfqg->async_idle_bfqq; -+ default: -+ BUG(); -+ } -+} -+ -+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, int is_sync, -+ struct bfq_io_cq *bic, gfp_t gfp_mask) -+{ -+ const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio); -+ const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); -+ struct bfq_queue **async_bfqq = NULL; -+ struct bfq_queue *bfqq = NULL; -+ -+ if (!is_sync) { -+ async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, -+ ioprio); -+ bfqq = *async_bfqq; -+ } -+ -+ if (bfqq == NULL) -+ bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask); -+ -+ /* -+ * Pin the queue now that it's allocated, scheduler exit will -+ * prune it. -+ */ -+ if (!is_sync && *async_bfqq == NULL) { -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ *async_bfqq = bfqq; -+ } -+ -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ return bfqq; -+} -+ -+static void bfq_update_io_thinktime(struct bfq_data *bfqd, -+ struct bfq_io_cq *bic) -+{ -+ unsigned long elapsed = jiffies - bic->ttime.last_end_request; -+ unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); -+ -+ bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8; -+ bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8; -+ bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) / -+ bic->ttime.ttime_samples; -+} -+ -+static void bfq_update_io_seektime(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct request *rq) -+{ -+ sector_t sdist; -+ u64 total; -+ -+ if (bfqq->last_request_pos < blk_rq_pos(rq)) -+ sdist = blk_rq_pos(rq) - bfqq->last_request_pos; -+ else -+ sdist = bfqq->last_request_pos - blk_rq_pos(rq); -+ -+ /* -+ * Don't allow the seek distance to get too large from the -+ * odd fragment, pagein, etc. -+ */ -+ if (bfqq->seek_samples == 0) /* first request, not really a seek */ -+ sdist = 0; -+ else if (bfqq->seek_samples <= 60) /* second & third seek */ -+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); -+ else -+ sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); -+ -+ bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; -+ bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; -+ total = bfqq->seek_total + (bfqq->seek_samples/2); -+ do_div(total, bfqq->seek_samples); -+ bfqq->seek_mean = (sector_t)total; -+ -+ bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, -+ (u64)bfqq->seek_mean); -+} -+ -+/* -+ * Disable idle window if the process thinks too long or seeks so much that -+ * it doesn't matter. -+ */ -+static void bfq_update_idle_window(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq, -+ struct bfq_io_cq *bic) -+{ -+ int enable_idle; -+ -+ /* Don't idle for async or idle io prio class. */ -+ if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) -+ return; -+ -+ enable_idle = bfq_bfqq_idle_window(bfqq); -+ -+ if (atomic_read(&bic->icq.ioc->active_ref) == 0 || -+ bfqd->bfq_slice_idle == 0 || -+ (bfqd->hw_tag && BFQQ_SEEKY(bfqq) && -+ bfqq->wr_coeff == 1)) -+ enable_idle = 0; -+ else if (bfq_sample_valid(bic->ttime.ttime_samples)) { -+ if (bic->ttime.ttime_mean > bfqd->bfq_slice_idle && -+ bfqq->wr_coeff == 1) -+ enable_idle = 0; -+ else -+ enable_idle = 1; -+ } -+ bfq_log_bfqq(bfqd, bfqq, "update_idle_window: enable_idle %d", -+ enable_idle); -+ -+ if (enable_idle) -+ bfq_mark_bfqq_idle_window(bfqq); -+ else -+ bfq_clear_bfqq_idle_window(bfqq); -+} -+ -+/* -+ * Called when a new fs request (rq) is added to bfqq. Check if there's -+ * something we should do about it. -+ */ -+static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct request *rq) -+{ -+ struct bfq_io_cq *bic = RQ_BIC(rq); -+ -+ if (rq->cmd_flags & REQ_META) -+ bfqq->meta_pending++; -+ -+ bfq_update_io_thinktime(bfqd, bic); -+ bfq_update_io_seektime(bfqd, bfqq, rq); -+ if (!BFQQ_SEEKY(bfqq) && bfq_bfqq_constantly_seeky(bfqq)) { -+ bfq_clear_bfqq_constantly_seeky(bfqq); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ BUG_ON(!bfqd->const_seeky_busy_in_flight_queues); -+ bfqd->const_seeky_busy_in_flight_queues--; -+ } -+ } -+ if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || -+ !BFQQ_SEEKY(bfqq)) -+ bfq_update_idle_window(bfqd, bfqq, bic); -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", -+ bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), -+ (long long unsigned)bfqq->seek_mean); -+ -+ bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); -+ -+ if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) { -+ int small_req = bfqq->queued[rq_is_sync(rq)] == 1 && -+ blk_rq_sectors(rq) < 32; -+ int budget_timeout = bfq_bfqq_budget_timeout(bfqq); -+ -+ /* -+ * There is just this request queued: if the request -+ * is small and the queue is not to be expired, then -+ * just exit. -+ * -+ * In this way, if the disk is being idled to wait for -+ * a new request from the in-service queue, we avoid -+ * unplugging the device and committing the disk to serve -+ * just a small request. On the contrary, we wait for -+ * the block layer to decide when to unplug the device: -+ * hopefully, new requests will be merged to this one -+ * quickly, then the device will be unplugged and -+ * larger requests will be dispatched. -+ */ -+ if (small_req && !budget_timeout) -+ return; -+ -+ /* -+ * A large enough request arrived, or the queue is to -+ * be expired: in both cases disk idling is to be -+ * stopped, so clear wait_request flag and reset -+ * timer. -+ */ -+ bfq_clear_bfqq_wait_request(bfqq); -+ del_timer(&bfqd->idle_slice_timer); -+ -+ /* -+ * The queue is not empty, because a new request just -+ * arrived. Hence we can safely expire the queue, in -+ * case of budget timeout, without risking that the -+ * timestamps of the queue are not updated correctly. -+ * See [1] for more details. -+ */ -+ if (budget_timeout) -+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); -+ -+ /* -+ * Let the request rip immediately, or let a new queue be -+ * selected if bfqq has just been expired. -+ */ -+ __blk_run_queue(bfqd->queue); -+ } -+} -+ -+static void bfq_insert_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ assert_spin_locked(bfqd->queue->queue_lock); -+ bfq_init_prio_data(bfqq, RQ_BIC(rq)); -+ -+ bfq_add_request(rq); -+ -+ rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; -+ list_add_tail(&rq->queuelist, &bfqq->fifo); -+ -+ bfq_rq_enqueued(bfqd, bfqq, rq); -+} -+ -+static void bfq_update_hw_tag(struct bfq_data *bfqd) -+{ -+ bfqd->max_rq_in_driver = max(bfqd->max_rq_in_driver, -+ bfqd->rq_in_driver); -+ -+ if (bfqd->hw_tag == 1) -+ return; -+ -+ /* -+ * This sample is valid if the number of outstanding requests -+ * is large enough to allow a queueing behavior. Note that the -+ * sum is not exact, as it's not taking into account deactivated -+ * requests. -+ */ -+ if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD) -+ return; -+ -+ if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) -+ return; -+ -+ bfqd->hw_tag = bfqd->max_rq_in_driver > BFQ_HW_QUEUE_THRESHOLD; -+ bfqd->max_rq_in_driver = 0; -+ bfqd->hw_tag_samples = 0; -+} -+ -+static void bfq_completed_request(struct request_queue *q, struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_data *bfqd = bfqq->bfqd; -+ bool sync = bfq_bfqq_sync(bfqq); -+ -+ bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)", -+ blk_rq_sectors(rq), sync); -+ -+ bfq_update_hw_tag(bfqd); -+ -+ BUG_ON(!bfqd->rq_in_driver); -+ BUG_ON(!bfqq->dispatched); -+ bfqd->rq_in_driver--; -+ bfqq->dispatched--; -+ -+ if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { -+ bfq_weights_tree_remove(bfqd, &bfqq->entity, -+ &bfqd->queue_weights_tree); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ BUG_ON(!bfqd->busy_in_flight_queues); -+ bfqd->busy_in_flight_queues--; -+ if (bfq_bfqq_constantly_seeky(bfqq)) { -+ BUG_ON(!bfqd-> -+ const_seeky_busy_in_flight_queues); -+ bfqd->const_seeky_busy_in_flight_queues--; -+ } -+ } -+ } -+ -+ if (sync) { -+ bfqd->sync_flight--; -+ RQ_BIC(rq)->ttime.last_end_request = jiffies; -+ } -+ -+ /* -+ * If we are waiting to discover whether the request pattern of the -+ * task associated with the queue is actually isochronous, and -+ * both requisites for this condition to hold are satisfied, then -+ * compute soft_rt_next_start (see the comments to the function -+ * bfq_bfqq_softrt_next_start()). -+ */ -+ if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 && -+ RB_EMPTY_ROOT(&bfqq->sort_list)) -+ bfqq->soft_rt_next_start = -+ bfq_bfqq_softrt_next_start(bfqd, bfqq); -+ -+ /* -+ * If this is the in-service queue, check if it needs to be expired, -+ * or if we want to idle in case it has no pending requests. -+ */ -+ if (bfqd->in_service_queue == bfqq) { -+ if (bfq_bfqq_budget_new(bfqq)) -+ bfq_set_budget_timeout(bfqd); -+ -+ if (bfq_bfqq_must_idle(bfqq)) { -+ bfq_arm_slice_timer(bfqd); -+ goto out; -+ } else if (bfq_may_expire_for_budg_timeout(bfqq)) -+ bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT); -+ else if (RB_EMPTY_ROOT(&bfqq->sort_list) && -+ (bfqq->dispatched == 0 || -+ !bfq_bfqq_must_not_expire(bfqq))) -+ bfq_bfqq_expire(bfqd, bfqq, 0, -+ BFQ_BFQQ_NO_MORE_REQUESTS); -+ } -+ -+ if (!bfqd->rq_in_driver) -+ bfq_schedule_dispatch(bfqd); -+ -+out: -+ return; -+} -+ -+static inline int __bfq_may_queue(struct bfq_queue *bfqq) -+{ -+ if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) { -+ bfq_clear_bfqq_must_alloc(bfqq); -+ return ELV_MQUEUE_MUST; -+ } -+ -+ return ELV_MQUEUE_MAY; -+} -+ -+static int bfq_may_queue(struct request_queue *q, int rw) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct task_struct *tsk = current; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq; -+ -+ /* -+ * Don't force setup of a queue from here, as a call to may_queue -+ * does not necessarily imply that a request actually will be -+ * queued. So just lookup a possibly existing queue, or return -+ * 'may queue' if that fails. -+ */ -+ bic = bfq_bic_lookup(bfqd, tsk->io_context); -+ if (bic == NULL) -+ return ELV_MQUEUE_MAY; -+ -+ bfqq = bic_to_bfqq(bic, rw_is_sync(rw)); -+ if (bfqq != NULL) { -+ bfq_init_prio_data(bfqq, bic); -+ -+ return __bfq_may_queue(bfqq); -+ } -+ -+ return ELV_MQUEUE_MAY; -+} -+ -+/* -+ * Queue lock held here. -+ */ -+static void bfq_put_request(struct request *rq) -+{ -+ struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ -+ if (bfqq != NULL) { -+ const int rw = rq_data_dir(rq); -+ -+ BUG_ON(!bfqq->allocated[rw]); -+ bfqq->allocated[rw]--; -+ -+ rq->elv.priv[0] = NULL; -+ rq->elv.priv[1] = NULL; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ } -+} -+ -+static struct bfq_queue * -+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, -+ struct bfq_queue *bfqq) -+{ -+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", -+ (long unsigned)bfqq->new_bfqq->pid); -+ bic_set_bfqq(bic, bfqq->new_bfqq, 1); -+ bfq_mark_bfqq_coop(bfqq->new_bfqq); -+ bfq_put_queue(bfqq); -+ return bic_to_bfqq(bic, 1); -+} -+ -+/* -+ * Returns NULL if a new bfqq should be allocated, or the old bfqq if this -+ * was the last process referring to said bfqq. -+ */ -+static struct bfq_queue * -+bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) -+{ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); -+ if (bfqq_process_refs(bfqq) == 1) { -+ bfqq->pid = current->pid; -+ bfq_clear_bfqq_coop(bfqq); -+ bfq_clear_bfqq_split_coop(bfqq); -+ return bfqq; -+ } -+ -+ bic_set_bfqq(bic, NULL, 1); -+ -+ bfq_put_cooperator(bfqq); -+ -+ bfq_put_queue(bfqq); -+ return NULL; -+} -+ -+/* -+ * Allocate bfq data structures associated with this request. -+ */ -+static int bfq_set_request(struct request_queue *q, struct request *rq, -+ struct bio *bio, gfp_t gfp_mask) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); -+ const int rw = rq_data_dir(rq); -+ const int is_sync = rq_is_sync(rq); -+ struct bfq_queue *bfqq; -+ struct bfq_group *bfqg; -+ unsigned long flags; -+ -+ might_sleep_if(gfp_mask & __GFP_WAIT); -+ -+ bfq_changed_ioprio(bic); -+ -+ spin_lock_irqsave(q->queue_lock, flags); -+ -+ if (bic == NULL) -+ goto queue_fail; -+ -+ bfqg = bfq_bic_update_cgroup(bic); -+ -+new_queue: -+ bfqq = bic_to_bfqq(bic, is_sync); -+ if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) { -+ bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); -+ bic_set_bfqq(bic, bfqq, is_sync); -+ } else { -+ /* -+ * If the queue was seeky for too long, break it apart. -+ */ -+ if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { -+ bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); -+ bfqq = bfq_split_bfqq(bic, bfqq); -+ if (!bfqq) -+ goto new_queue; -+ } -+ -+ /* -+ * Check to see if this queue is scheduled to merge with -+ * another closely cooperating queue. The merging of queues -+ * happens here as it must be done in process context. -+ * The reference on new_bfqq was taken in merge_bfqqs. -+ */ -+ if (bfqq->new_bfqq != NULL) -+ bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq); -+ } -+ -+ bfqq->allocated[rw]++; -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqd, bfqq, "set_request: bfqq %p, %d", bfqq, -+ atomic_read(&bfqq->ref)); -+ -+ rq->elv.priv[0] = bic; -+ rq->elv.priv[1] = bfqq; -+ -+ spin_unlock_irqrestore(q->queue_lock, flags); -+ -+ return 0; -+ -+queue_fail: -+ bfq_schedule_dispatch(bfqd); -+ spin_unlock_irqrestore(q->queue_lock, flags); -+ -+ return 1; -+} -+ -+static void bfq_kick_queue(struct work_struct *work) -+{ -+ struct bfq_data *bfqd = -+ container_of(work, struct bfq_data, unplug_work); -+ struct request_queue *q = bfqd->queue; -+ -+ spin_lock_irq(q->queue_lock); -+ __blk_run_queue(q); -+ spin_unlock_irq(q->queue_lock); -+} -+ -+/* -+ * Handler of the expiration of the timer running if the in-service queue -+ * is idling inside its time slice. -+ */ -+static void bfq_idle_slice_timer(unsigned long data) -+{ -+ struct bfq_data *bfqd = (struct bfq_data *)data; -+ struct bfq_queue *bfqq; -+ unsigned long flags; -+ enum bfqq_expiration reason; -+ -+ spin_lock_irqsave(bfqd->queue->queue_lock, flags); -+ -+ bfqq = bfqd->in_service_queue; -+ /* -+ * Theoretical race here: the in-service queue can be NULL or -+ * different from the queue that was idling if the timer handler -+ * spins on the queue_lock and a new request arrives for the -+ * current queue and there is a full dispatch cycle that changes -+ * the in-service queue. This can hardly happen, but in the worst -+ * case we just expire a queue too early. -+ */ -+ if (bfqq != NULL) { -+ bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); -+ if (bfq_bfqq_budget_timeout(bfqq)) -+ /* -+ * Also here the queue can be safely expired -+ * for budget timeout without wasting -+ * guarantees -+ */ -+ reason = BFQ_BFQQ_BUDGET_TIMEOUT; -+ else if (bfqq->queued[0] == 0 && bfqq->queued[1] == 0) -+ /* -+ * The queue may not be empty upon timer expiration, -+ * because we may not disable the timer when the -+ * first request of the in-service queue arrives -+ * during disk idling. -+ */ -+ reason = BFQ_BFQQ_TOO_IDLE; -+ else -+ goto schedule_dispatch; -+ -+ bfq_bfqq_expire(bfqd, bfqq, 1, reason); -+ } -+ -+schedule_dispatch: -+ bfq_schedule_dispatch(bfqd); -+ -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); -+} -+ -+static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) -+{ -+ del_timer_sync(&bfqd->idle_slice_timer); -+ cancel_work_sync(&bfqd->unplug_work); -+} -+ -+static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd, -+ struct bfq_queue **bfqq_ptr) -+{ -+ struct bfq_group *root_group = bfqd->root_group; -+ struct bfq_queue *bfqq = *bfqq_ptr; -+ -+ bfq_log(bfqd, "put_async_bfqq: %p", bfqq); -+ if (bfqq != NULL) { -+ bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group); -+ bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ *bfqq_ptr = NULL; -+ } -+} -+ -+/* -+ * Release all the bfqg references to its async queues. If we are -+ * deallocating the group these queues may still contain requests, so -+ * we reparent them to the root cgroup (i.e., the only one that will -+ * exist for sure until all the requests on a device are gone). -+ */ -+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) -+{ -+ int i, j; -+ -+ for (i = 0; i < 2; i++) -+ for (j = 0; j < IOPRIO_BE_NR; j++) -+ __bfq_put_async_bfqq(bfqd, &bfqg->async_bfqq[i][j]); -+ -+ __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); -+} -+ -+static void bfq_exit_queue(struct elevator_queue *e) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ struct request_queue *q = bfqd->queue; -+ struct bfq_queue *bfqq, *n; -+ -+ bfq_shutdown_timer_wq(bfqd); -+ -+ spin_lock_irq(q->queue_lock); -+ -+ BUG_ON(bfqd->in_service_queue != NULL); -+ list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) -+ bfq_deactivate_bfqq(bfqd, bfqq, 0); -+ -+ bfq_disconnect_groups(bfqd); -+ spin_unlock_irq(q->queue_lock); -+ -+ bfq_shutdown_timer_wq(bfqd); -+ -+ synchronize_rcu(); -+ -+ BUG_ON(timer_pending(&bfqd->idle_slice_timer)); -+ -+ bfq_free_root_group(bfqd); -+ kfree(bfqd); -+} -+ -+static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) -+{ -+ struct bfq_group *bfqg; -+ struct bfq_data *bfqd; -+ struct elevator_queue *eq; -+ -+ eq = elevator_alloc(q, e); -+ if (eq == NULL) -+ return -ENOMEM; -+ -+ bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node); -+ if (bfqd == NULL) { -+ kobject_put(&eq->kobj); -+ return -ENOMEM; -+ } -+ eq->elevator_data = bfqd; -+ -+ /* -+ * Our fallback bfqq if bfq_find_alloc_queue() runs into OOM issues. -+ * Grab a permanent reference to it, so that the normal code flow -+ * will not attempt to free it. -+ */ -+ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, 1, 0); -+ atomic_inc(&bfqd->oom_bfqq.ref); -+ -+ bfqd->queue = q; -+ -+ spin_lock_irq(q->queue_lock); -+ q->elevator = eq; -+ spin_unlock_irq(q->queue_lock); -+ -+ bfqg = bfq_alloc_root_group(bfqd, q->node); -+ if (bfqg == NULL) { -+ kfree(bfqd); -+ kobject_put(&eq->kobj); -+ return -ENOMEM; -+ } -+ -+ bfqd->root_group = bfqg; -+#ifdef CONFIG_CGROUP_BFQIO -+ bfqd->active_numerous_groups = 0; -+#endif -+ -+ init_timer(&bfqd->idle_slice_timer); -+ bfqd->idle_slice_timer.function = bfq_idle_slice_timer; -+ bfqd->idle_slice_timer.data = (unsigned long)bfqd; -+ -+ bfqd->rq_pos_tree = RB_ROOT; -+ bfqd->queue_weights_tree = RB_ROOT; -+ bfqd->group_weights_tree = RB_ROOT; -+ -+ INIT_WORK(&bfqd->unplug_work, bfq_kick_queue); -+ -+ INIT_LIST_HEAD(&bfqd->active_list); -+ INIT_LIST_HEAD(&bfqd->idle_list); -+ -+ bfqd->hw_tag = -1; -+ -+ bfqd->bfq_max_budget = bfq_default_max_budget; -+ -+ bfqd->bfq_quantum = bfq_quantum; -+ bfqd->bfq_fifo_expire[0] = bfq_fifo_expire[0]; -+ bfqd->bfq_fifo_expire[1] = bfq_fifo_expire[1]; -+ bfqd->bfq_back_max = bfq_back_max; -+ bfqd->bfq_back_penalty = bfq_back_penalty; -+ bfqd->bfq_slice_idle = bfq_slice_idle; -+ bfqd->bfq_class_idle_last_service = 0; -+ bfqd->bfq_max_budget_async_rq = bfq_max_budget_async_rq; -+ bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; -+ bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; -+ -+ bfqd->bfq_coop_thresh = 2; -+ bfqd->bfq_failed_cooperations = 7000; -+ bfqd->bfq_requests_within_timer = 120; -+ -+ bfqd->low_latency = true; -+ -+ bfqd->bfq_wr_coeff = 20; -+ bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300); -+ bfqd->bfq_wr_max_time = 0; -+ bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000); -+ bfqd->bfq_wr_min_inter_arr_async = msecs_to_jiffies(500); -+ bfqd->bfq_wr_max_softrt_rate = 7000; /* -+ * Approximate rate required -+ * to playback or record a -+ * high-definition compressed -+ * video. -+ */ -+ bfqd->wr_busy_queues = 0; -+ bfqd->busy_in_flight_queues = 0; -+ bfqd->const_seeky_busy_in_flight_queues = 0; -+ -+ /* -+ * Begin by assuming, optimistically, that the device peak rate is -+ * equal to the highest reference rate. -+ */ -+ bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] * -+ T_fast[blk_queue_nonrot(bfqd->queue)]; -+ bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)]; -+ bfqd->device_speed = BFQ_BFQD_FAST; -+ -+ return 0; -+} -+ -+static void bfq_slab_kill(void) -+{ -+ if (bfq_pool != NULL) -+ kmem_cache_destroy(bfq_pool); -+} -+ -+static int __init bfq_slab_setup(void) -+{ -+ bfq_pool = KMEM_CACHE(bfq_queue, 0); -+ if (bfq_pool == NULL) -+ return -ENOMEM; -+ return 0; -+} -+ -+static ssize_t bfq_var_show(unsigned int var, char *page) -+{ -+ return sprintf(page, "%d\n", var); -+} -+ -+static ssize_t bfq_var_store(unsigned long *var, const char *page, -+ size_t count) -+{ -+ unsigned long new_val; -+ int ret = kstrtoul(page, 10, &new_val); -+ -+ if (ret == 0) -+ *var = new_val; -+ -+ return count; -+} -+ -+static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ? -+ jiffies_to_msecs(bfqd->bfq_wr_max_time) : -+ jiffies_to_msecs(bfq_wr_duration(bfqd))); -+} -+ -+static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) -+{ -+ struct bfq_queue *bfqq; -+ struct bfq_data *bfqd = e->elevator_data; -+ ssize_t num_char = 0; -+ -+ num_char += sprintf(page + num_char, "Tot reqs queued %d\n\n", -+ bfqd->queued); -+ -+ spin_lock_irq(bfqd->queue->queue_lock); -+ -+ num_char += sprintf(page + num_char, "Active:\n"); -+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { -+ num_char += sprintf(page + num_char, -+ "pid%d: weight %hu, nr_queued %d %d, dur %d/%u\n", -+ bfqq->pid, -+ bfqq->entity.weight, -+ bfqq->queued[0], -+ bfqq->queued[1], -+ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ -+ num_char += sprintf(page + num_char, "Idle:\n"); -+ list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { -+ num_char += sprintf(page + num_char, -+ "pid%d: weight %hu, dur %d/%u\n", -+ bfqq->pid, -+ bfqq->entity.weight, -+ jiffies_to_msecs(jiffies - -+ bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); -+ } -+ -+ spin_unlock_irq(bfqd->queue->queue_lock); -+ -+ return num_char; -+} -+ -+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ -+static ssize_t __FUNC(struct elevator_queue *e, char *page) \ -+{ \ -+ struct bfq_data *bfqd = e->elevator_data; \ -+ unsigned int __data = __VAR; \ -+ if (__CONV) \ -+ __data = jiffies_to_msecs(__data); \ -+ return bfq_var_show(__data, (page)); \ -+} -+SHOW_FUNCTION(bfq_quantum_show, bfqd->bfq_quantum, 0); -+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); -+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); -+SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); -+SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); -+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); -+SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); -+SHOW_FUNCTION(bfq_max_budget_async_rq_show, -+ bfqd->bfq_max_budget_async_rq, 0); -+SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout[BLK_RW_SYNC], 1); -+SHOW_FUNCTION(bfq_timeout_async_show, bfqd->bfq_timeout[BLK_RW_ASYNC], 1); -+SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); -+SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0); -+SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1); -+SHOW_FUNCTION(bfq_wr_min_idle_time_show, bfqd->bfq_wr_min_idle_time, 1); -+SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async, -+ 1); -+SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0); -+#undef SHOW_FUNCTION -+ -+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -+static ssize_t \ -+__FUNC(struct elevator_queue *e, const char *page, size_t count) \ -+{ \ -+ struct bfq_data *bfqd = e->elevator_data; \ -+ unsigned long uninitialized_var(__data); \ -+ int ret = bfq_var_store(&__data, (page), count); \ -+ if (__data < (MIN)) \ -+ __data = (MIN); \ -+ else if (__data > (MAX)) \ -+ __data = (MAX); \ -+ if (__CONV) \ -+ *(__PTR) = msecs_to_jiffies(__data); \ -+ else \ -+ *(__PTR) = __data; \ -+ return ret; \ -+} -+STORE_FUNCTION(bfq_quantum_store, &bfqd->bfq_quantum, 1, INT_MAX, 0); -+STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); -+STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, -+ INT_MAX, 0); -+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); -+STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, -+ 1, INT_MAX, 0); -+STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0); -+STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX, -+ 1); -+STORE_FUNCTION(bfq_wr_min_idle_time_store, &bfqd->bfq_wr_min_idle_time, 0, -+ INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_min_inter_arr_async_store, -+ &bfqd->bfq_wr_min_inter_arr_async, 0, INT_MAX, 1); -+STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0, -+ INT_MAX, 0); -+#undef STORE_FUNCTION -+ -+/* do nothing for the moment */ -+static ssize_t bfq_weights_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ return count; -+} -+ -+static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) -+{ -+ u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); -+ -+ if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) -+ return bfq_calc_max_budget(bfqd->peak_rate, timeout); -+ else -+ return bfq_default_max_budget; -+} -+ -+static ssize_t bfq_max_budget_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data == 0) -+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); -+ else { -+ if (__data > INT_MAX) -+ __data = INT_MAX; -+ bfqd->bfq_max_budget = __data; -+ } -+ -+ bfqd->bfq_user_max_budget = __data; -+ -+ return ret; -+} -+ -+static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data < 1) -+ __data = 1; -+ else if (__data > INT_MAX) -+ __data = INT_MAX; -+ -+ bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); -+ if (bfqd->bfq_user_max_budget == 0) -+ bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); -+ -+ return ret; -+} -+ -+static ssize_t bfq_low_latency_store(struct elevator_queue *e, -+ const char *page, size_t count) -+{ -+ struct bfq_data *bfqd = e->elevator_data; -+ unsigned long uninitialized_var(__data); -+ int ret = bfq_var_store(&__data, (page), count); -+ -+ if (__data > 1) -+ __data = 1; -+ if (__data == 0 && bfqd->low_latency != 0) -+ bfq_end_wr(bfqd); -+ bfqd->low_latency = __data; -+ -+ return ret; -+} -+ -+#define BFQ_ATTR(name) \ -+ __ATTR(name, S_IRUGO|S_IWUSR, bfq_##name##_show, bfq_##name##_store) -+ -+static struct elv_fs_entry bfq_attrs[] = { -+ BFQ_ATTR(quantum), -+ BFQ_ATTR(fifo_expire_sync), -+ BFQ_ATTR(fifo_expire_async), -+ BFQ_ATTR(back_seek_max), -+ BFQ_ATTR(back_seek_penalty), -+ BFQ_ATTR(slice_idle), -+ BFQ_ATTR(max_budget), -+ BFQ_ATTR(max_budget_async_rq), -+ BFQ_ATTR(timeout_sync), -+ BFQ_ATTR(timeout_async), -+ BFQ_ATTR(low_latency), -+ BFQ_ATTR(wr_coeff), -+ BFQ_ATTR(wr_max_time), -+ BFQ_ATTR(wr_rt_max_time), -+ BFQ_ATTR(wr_min_idle_time), -+ BFQ_ATTR(wr_min_inter_arr_async), -+ BFQ_ATTR(wr_max_softrt_rate), -+ BFQ_ATTR(weights), -+ __ATTR_NULL -+}; -+ -+static struct elevator_type iosched_bfq = { -+ .ops = { -+ .elevator_merge_fn = bfq_merge, -+ .elevator_merged_fn = bfq_merged_request, -+ .elevator_merge_req_fn = bfq_merged_requests, -+ .elevator_allow_merge_fn = bfq_allow_merge, -+ .elevator_dispatch_fn = bfq_dispatch_requests, -+ .elevator_add_req_fn = bfq_insert_request, -+ .elevator_activate_req_fn = bfq_activate_request, -+ .elevator_deactivate_req_fn = bfq_deactivate_request, -+ .elevator_completed_req_fn = bfq_completed_request, -+ .elevator_former_req_fn = elv_rb_former_request, -+ .elevator_latter_req_fn = elv_rb_latter_request, -+ .elevator_init_icq_fn = bfq_init_icq, -+ .elevator_exit_icq_fn = bfq_exit_icq, -+ .elevator_set_req_fn = bfq_set_request, -+ .elevator_put_req_fn = bfq_put_request, -+ .elevator_may_queue_fn = bfq_may_queue, -+ .elevator_init_fn = bfq_init_queue, -+ .elevator_exit_fn = bfq_exit_queue, -+ }, -+ .icq_size = sizeof(struct bfq_io_cq), -+ .icq_align = __alignof__(struct bfq_io_cq), -+ .elevator_attrs = bfq_attrs, -+ .elevator_name = "bfq", -+ .elevator_owner = THIS_MODULE, -+}; -+ -+static int __init bfq_init(void) -+{ -+ /* -+ * Can be 0 on HZ < 1000 setups. -+ */ -+ if (bfq_slice_idle == 0) -+ bfq_slice_idle = 1; -+ -+ if (bfq_timeout_async == 0) -+ bfq_timeout_async = 1; -+ -+ if (bfq_slab_setup()) -+ return -ENOMEM; -+ -+ /* -+ * Times to load large popular applications for the typical systems -+ * installed on the reference devices (see the comments before the -+ * definitions of the two arrays). -+ */ -+ T_slow[0] = msecs_to_jiffies(2600); -+ T_slow[1] = msecs_to_jiffies(1000); -+ T_fast[0] = msecs_to_jiffies(5500); -+ T_fast[1] = msecs_to_jiffies(2000); -+ -+ /* -+ * Thresholds that determine the switch between speed classes (see -+ * the comments before the definition of the array). -+ */ -+ device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2; -+ device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2; -+ -+ elv_register(&iosched_bfq); -+ pr_info("BFQ I/O-scheduler version: v7r5"); -+ -+ return 0; -+} -+ -+static void __exit bfq_exit(void) -+{ -+ elv_unregister(&iosched_bfq); -+ bfq_slab_kill(); -+} -+ -+module_init(bfq_init); -+module_exit(bfq_exit); -+ -+MODULE_AUTHOR("Fabio Checconi, Paolo Valente"); -+MODULE_LICENSE("GPL"); -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -new file mode 100644 -index 0000000..c4831b7 ---- /dev/null -+++ block/bfq-sched.c -@@ -0,0 +1,1207 @@ -+/* -+ * BFQ: Hierarchical B-WF2Q+ scheduler. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ */ -+ -+#ifdef CONFIG_CGROUP_BFQIO -+#define for_each_entity(entity) \ -+ for (; entity != NULL; entity = entity->parent) -+ -+#define for_each_entity_safe(entity, parent) \ -+ for (; entity && ({ parent = entity->parent; 1; }); entity = parent) -+ -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, -+ int extract, -+ struct bfq_data *bfqd); -+ -+static inline void bfq_update_budget(struct bfq_entity *next_in_service) -+{ -+ struct bfq_entity *bfqg_entity; -+ struct bfq_group *bfqg; -+ struct bfq_sched_data *group_sd; -+ -+ BUG_ON(next_in_service == NULL); -+ -+ group_sd = next_in_service->sched_data; -+ -+ bfqg = container_of(group_sd, struct bfq_group, sched_data); -+ /* -+ * bfq_group's my_entity field is not NULL only if the group -+ * is not the root group. We must not touch the root entity -+ * as it must never become an in-service entity. -+ */ -+ bfqg_entity = bfqg->my_entity; -+ if (bfqg_entity != NULL) -+ bfqg_entity->budget = next_in_service->budget; -+} -+ -+static int bfq_update_next_in_service(struct bfq_sched_data *sd) -+{ -+ struct bfq_entity *next_in_service; -+ -+ if (sd->in_service_entity != NULL) -+ /* will update/requeue at the end of service */ -+ return 0; -+ -+ /* -+ * NOTE: this can be improved in many ways, such as returning -+ * 1 (and thus propagating upwards the update) only when the -+ * budget changes, or caching the bfqq that will be scheduled -+ * next from this subtree. By now we worry more about -+ * correctness than about performance... -+ */ -+ next_in_service = bfq_lookup_next_entity(sd, 0, NULL); -+ sd->next_in_service = next_in_service; -+ -+ if (next_in_service != NULL) -+ bfq_update_budget(next_in_service); -+ -+ return 1; -+} -+ -+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, -+ struct bfq_entity *entity) -+{ -+ BUG_ON(sd->next_in_service != entity); -+} -+#else -+#define for_each_entity(entity) \ -+ for (; entity != NULL; entity = NULL) -+ -+#define for_each_entity_safe(entity, parent) \ -+ for (parent = NULL; entity != NULL; entity = parent) -+ -+static inline int bfq_update_next_in_service(struct bfq_sched_data *sd) -+{ -+ return 0; -+} -+ -+static inline void bfq_check_next_in_service(struct bfq_sched_data *sd, -+ struct bfq_entity *entity) -+{ -+} -+ -+static inline void bfq_update_budget(struct bfq_entity *next_in_service) -+{ -+} -+#endif -+ -+/* -+ * Shift for timestamp calculations. This actually limits the maximum -+ * service allowed in one timestamp delta (small shift values increase it), -+ * the maximum total weight that can be used for the queues in the system -+ * (big shift values increase it), and the period of virtual time -+ * wraparounds. -+ */ -+#define WFQ_SERVICE_SHIFT 22 -+ -+/** -+ * bfq_gt - compare two timestamps. -+ * @a: first ts. -+ * @b: second ts. -+ * -+ * Return @a > @b, dealing with wrapping correctly. -+ */ -+static inline int bfq_gt(u64 a, u64 b) -+{ -+ return (s64)(a - b) > 0; -+} -+ -+static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = NULL; -+ -+ BUG_ON(entity == NULL); -+ -+ if (entity->my_sched_data == NULL) -+ bfqq = container_of(entity, struct bfq_queue, entity); -+ -+ return bfqq; -+} -+ -+ -+/** -+ * bfq_delta - map service into the virtual time domain. -+ * @service: amount of service. -+ * @weight: scale factor (weight of an entity or weight sum). -+ */ -+static inline u64 bfq_delta(unsigned long service, -+ unsigned long weight) -+{ -+ u64 d = (u64)service << WFQ_SERVICE_SHIFT; -+ -+ do_div(d, weight); -+ return d; -+} -+ -+/** -+ * bfq_calc_finish - assign the finish time to an entity. -+ * @entity: the entity to act upon. -+ * @service: the service to be charged to the entity. -+ */ -+static inline void bfq_calc_finish(struct bfq_entity *entity, -+ unsigned long service) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ BUG_ON(entity->weight == 0); -+ -+ entity->finish = entity->start + -+ bfq_delta(service, entity->weight); -+ -+ if (bfqq != NULL) { -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "calc_finish: serv %lu, w %d", -+ service, entity->weight); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, -+ "calc_finish: start %llu, finish %llu, delta %llu", -+ entity->start, entity->finish, -+ bfq_delta(service, entity->weight)); -+ } -+} -+ -+/** -+ * bfq_entity_of - get an entity from a node. -+ * @node: the node field of the entity. -+ * -+ * Convert a node pointer to the relative entity. This is used only -+ * to simplify the logic of some functions and not as the generic -+ * conversion mechanism because, e.g., in the tree walking functions, -+ * the check for a %NULL value would be redundant. -+ */ -+static inline struct bfq_entity *bfq_entity_of(struct rb_node *node) -+{ -+ struct bfq_entity *entity = NULL; -+ -+ if (node != NULL) -+ entity = rb_entry(node, struct bfq_entity, rb_node); -+ -+ return entity; -+} -+ -+/** -+ * bfq_extract - remove an entity from a tree. -+ * @root: the tree root. -+ * @entity: the entity to remove. -+ */ -+static inline void bfq_extract(struct rb_root *root, -+ struct bfq_entity *entity) -+{ -+ BUG_ON(entity->tree != root); -+ -+ entity->tree = NULL; -+ rb_erase(&entity->rb_node, root); -+} -+ -+/** -+ * bfq_idle_extract - extract an entity from the idle tree. -+ * @st: the service tree of the owning @entity. -+ * @entity: the entity being removed. -+ */ -+static void bfq_idle_extract(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct rb_node *next; -+ -+ BUG_ON(entity->tree != &st->idle); -+ -+ if (entity == st->first_idle) { -+ next = rb_next(&entity->rb_node); -+ st->first_idle = bfq_entity_of(next); -+ } -+ -+ if (entity == st->last_idle) { -+ next = rb_prev(&entity->rb_node); -+ st->last_idle = bfq_entity_of(next); -+ } -+ -+ bfq_extract(&st->idle, entity); -+ -+ if (bfqq != NULL) -+ list_del(&bfqq->bfqq_list); -+} -+ -+/** -+ * bfq_insert - generic tree insertion. -+ * @root: tree root. -+ * @entity: entity to insert. -+ * -+ * This is used for the idle and the active tree, since they are both -+ * ordered by finish time. -+ */ -+static void bfq_insert(struct rb_root *root, struct bfq_entity *entity) -+{ -+ struct bfq_entity *entry; -+ struct rb_node **node = &root->rb_node; -+ struct rb_node *parent = NULL; -+ -+ BUG_ON(entity->tree != NULL); -+ -+ while (*node != NULL) { -+ parent = *node; -+ entry = rb_entry(parent, struct bfq_entity, rb_node); -+ -+ if (bfq_gt(entry->finish, entity->finish)) -+ node = &parent->rb_left; -+ else -+ node = &parent->rb_right; -+ } -+ -+ rb_link_node(&entity->rb_node, parent, node); -+ rb_insert_color(&entity->rb_node, root); -+ -+ entity->tree = root; -+} -+ -+/** -+ * bfq_update_min - update the min_start field of a entity. -+ * @entity: the entity to update. -+ * @node: one of its children. -+ * -+ * This function is called when @entity may store an invalid value for -+ * min_start due to updates to the active tree. The function assumes -+ * that the subtree rooted at @node (which may be its left or its right -+ * child) has a valid min_start value. -+ */ -+static inline void bfq_update_min(struct bfq_entity *entity, -+ struct rb_node *node) -+{ -+ struct bfq_entity *child; -+ -+ if (node != NULL) { -+ child = rb_entry(node, struct bfq_entity, rb_node); -+ if (bfq_gt(entity->min_start, child->min_start)) -+ entity->min_start = child->min_start; -+ } -+} -+ -+/** -+ * bfq_update_active_node - recalculate min_start. -+ * @node: the node to update. -+ * -+ * @node may have changed position or one of its children may have moved, -+ * this function updates its min_start value. The left and right subtrees -+ * are assumed to hold a correct min_start value. -+ */ -+static inline void bfq_update_active_node(struct rb_node *node) -+{ -+ struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); -+ -+ entity->min_start = entity->start; -+ bfq_update_min(entity, node->rb_right); -+ bfq_update_min(entity, node->rb_left); -+} -+ -+/** -+ * bfq_update_active_tree - update min_start for the whole active tree. -+ * @node: the starting node. -+ * -+ * @node must be the deepest modified node after an update. This function -+ * updates its min_start using the values held by its children, assuming -+ * that they did not change, and then updates all the nodes that may have -+ * changed in the path to the root. The only nodes that may have changed -+ * are the ones in the path or their siblings. -+ */ -+static void bfq_update_active_tree(struct rb_node *node) -+{ -+ struct rb_node *parent; -+ -+up: -+ bfq_update_active_node(node); -+ -+ parent = rb_parent(node); -+ if (parent == NULL) -+ return; -+ -+ if (node == parent->rb_left && parent->rb_right != NULL) -+ bfq_update_active_node(parent->rb_right); -+ else if (parent->rb_left != NULL) -+ bfq_update_active_node(parent->rb_left); -+ -+ node = parent; -+ goto up; -+} -+ -+static void bfq_weights_tree_add(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root); -+ -+static void bfq_weights_tree_remove(struct bfq_data *bfqd, -+ struct bfq_entity *entity, -+ struct rb_root *root); -+ -+ -+/** -+ * bfq_active_insert - insert an entity in the active tree of its -+ * group/device. -+ * @st: the service tree of the entity. -+ * @entity: the entity being inserted. -+ * -+ * The active tree is ordered by finish time, but an extra key is kept -+ * per each node, containing the minimum value for the start times of -+ * its children (and the node itself), so it's possible to search for -+ * the eligible node with the lowest finish time in logarithmic time. -+ */ -+static void bfq_active_insert(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct rb_node *node = &entity->rb_node; -+#ifdef CONFIG_CGROUP_BFQIO -+ struct bfq_sched_data *sd = NULL; -+ struct bfq_group *bfqg = NULL; -+ struct bfq_data *bfqd = NULL; -+#endif -+ -+ bfq_insert(&st->active, entity); -+ -+ if (node->rb_left != NULL) -+ node = node->rb_left; -+ else if (node->rb_right != NULL) -+ node = node->rb_right; -+ -+ bfq_update_active_tree(node); -+ -+#ifdef CONFIG_CGROUP_BFQIO -+ sd = entity->sched_data; -+ bfqg = container_of(sd, struct bfq_group, sched_data); -+ BUG_ON(!bfqg); -+ bfqd = (struct bfq_data *)bfqg->bfqd; -+#endif -+ if (bfqq != NULL) -+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); -+#ifdef CONFIG_CGROUP_BFQIO -+ else { /* bfq_group */ -+ BUG_ON(!bfqd); -+ bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree); -+ } -+ if (bfqg != bfqd->root_group) { -+ BUG_ON(!bfqg); -+ BUG_ON(!bfqd); -+ bfqg->active_entities++; -+ if (bfqg->active_entities == 2) -+ bfqd->active_numerous_groups++; -+ } -+#endif -+} -+ -+/** -+ * bfq_ioprio_to_weight - calc a weight from an ioprio. -+ * @ioprio: the ioprio value to convert. -+ */ -+static inline unsigned short bfq_ioprio_to_weight(int ioprio) -+{ -+ BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); -+ return IOPRIO_BE_NR - ioprio; -+} -+ -+/** -+ * bfq_weight_to_ioprio - calc an ioprio from a weight. -+ * @weight: the weight value to convert. -+ * -+ * To preserve as mush as possible the old only-ioprio user interface, -+ * 0 is used as an escape ioprio value for weights (numerically) equal or -+ * larger than IOPRIO_BE_NR -+ */ -+static inline unsigned short bfq_weight_to_ioprio(int weight) -+{ -+ BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT); -+ return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight; -+} -+ -+static inline void bfq_get_entity(struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ -+ if (bfqq != NULL) { -+ atomic_inc(&bfqq->ref); -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ } -+} -+ -+/** -+ * bfq_find_deepest - find the deepest node that an extraction can modify. -+ * @node: the node being removed. -+ * -+ * Do the first step of an extraction in an rb tree, looking for the -+ * node that will replace @node, and returning the deepest node that -+ * the following modifications to the tree can touch. If @node is the -+ * last node in the tree return %NULL. -+ */ -+static struct rb_node *bfq_find_deepest(struct rb_node *node) -+{ -+ struct rb_node *deepest; -+ -+ if (node->rb_right == NULL && node->rb_left == NULL) -+ deepest = rb_parent(node); -+ else if (node->rb_right == NULL) -+ deepest = node->rb_left; -+ else if (node->rb_left == NULL) -+ deepest = node->rb_right; -+ else { -+ deepest = rb_next(node); -+ if (deepest->rb_right != NULL) -+ deepest = deepest->rb_right; -+ else if (rb_parent(deepest) != node) -+ deepest = rb_parent(deepest); -+ } -+ -+ return deepest; -+} -+ -+/** -+ * bfq_active_extract - remove an entity from the active tree. -+ * @st: the service_tree containing the tree. -+ * @entity: the entity being removed. -+ */ -+static void bfq_active_extract(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct rb_node *node; -+#ifdef CONFIG_CGROUP_BFQIO -+ struct bfq_sched_data *sd = NULL; -+ struct bfq_group *bfqg = NULL; -+ struct bfq_data *bfqd = NULL; -+#endif -+ -+ node = bfq_find_deepest(&entity->rb_node); -+ bfq_extract(&st->active, entity); -+ -+ if (node != NULL) -+ bfq_update_active_tree(node); -+ -+#ifdef CONFIG_CGROUP_BFQIO -+ sd = entity->sched_data; -+ bfqg = container_of(sd, struct bfq_group, sched_data); -+ BUG_ON(!bfqg); -+ bfqd = (struct bfq_data *)bfqg->bfqd; -+#endif -+ if (bfqq != NULL) -+ list_del(&bfqq->bfqq_list); -+#ifdef CONFIG_CGROUP_BFQIO -+ else { /* bfq_group */ -+ BUG_ON(!bfqd); -+ bfq_weights_tree_remove(bfqd, entity, -+ &bfqd->group_weights_tree); -+ } -+ if (bfqg != bfqd->root_group) { -+ BUG_ON(!bfqg); -+ BUG_ON(!bfqd); -+ BUG_ON(!bfqg->active_entities); -+ bfqg->active_entities--; -+ if (bfqg->active_entities == 1) { -+ BUG_ON(!bfqd->active_numerous_groups); -+ bfqd->active_numerous_groups--; -+ } -+ } -+#endif -+} -+ -+/** -+ * bfq_idle_insert - insert an entity into the idle tree. -+ * @st: the service tree containing the tree. -+ * @entity: the entity to insert. -+ */ -+static void bfq_idle_insert(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct bfq_entity *first_idle = st->first_idle; -+ struct bfq_entity *last_idle = st->last_idle; -+ -+ if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish)) -+ st->first_idle = entity; -+ if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish)) -+ st->last_idle = entity; -+ -+ bfq_insert(&st->idle, entity); -+ -+ if (bfqq != NULL) -+ list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list); -+} -+ -+/** -+ * bfq_forget_entity - remove an entity from the wfq trees. -+ * @st: the service tree. -+ * @entity: the entity being removed. -+ * -+ * Update the device status and forget everything about @entity, putting -+ * the device reference to it, if it is a queue. Entities belonging to -+ * groups are not refcounted. -+ */ -+static void bfq_forget_entity(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ struct bfq_sched_data *sd; -+ -+ BUG_ON(!entity->on_st); -+ -+ entity->on_st = 0; -+ st->wsum -= entity->weight; -+ if (bfqq != NULL) { -+ sd = entity->sched_data; -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", -+ bfqq, atomic_read(&bfqq->ref)); -+ bfq_put_queue(bfqq); -+ } -+} -+ -+/** -+ * bfq_put_idle_entity - release the idle tree ref of an entity. -+ * @st: service tree for the entity. -+ * @entity: the entity being released. -+ */ -+static void bfq_put_idle_entity(struct bfq_service_tree *st, -+ struct bfq_entity *entity) -+{ -+ bfq_idle_extract(st, entity); -+ bfq_forget_entity(st, entity); -+} -+ -+/** -+ * bfq_forget_idle - update the idle tree if necessary. -+ * @st: the service tree to act upon. -+ * -+ * To preserve the global O(log N) complexity we only remove one entry here; -+ * as the idle tree will not grow indefinitely this can be done safely. -+ */ -+static void bfq_forget_idle(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *first_idle = st->first_idle; -+ struct bfq_entity *last_idle = st->last_idle; -+ -+ if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL && -+ !bfq_gt(last_idle->finish, st->vtime)) { -+ /* -+ * Forget the whole idle tree, increasing the vtime past -+ * the last finish time of idle entities. -+ */ -+ st->vtime = last_idle->finish; -+ } -+ -+ if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime)) -+ bfq_put_idle_entity(st, first_idle); -+} -+ -+static struct bfq_service_tree * -+__bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, -+ struct bfq_entity *entity) -+{ -+ struct bfq_service_tree *new_st = old_st; -+ -+ if (entity->ioprio_changed) { -+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -+ unsigned short prev_weight, new_weight; -+ struct bfq_data *bfqd = NULL; -+ struct rb_root *root; -+#ifdef CONFIG_CGROUP_BFQIO -+ struct bfq_sched_data *sd; -+ struct bfq_group *bfqg; -+#endif -+ -+ if (bfqq != NULL) -+ bfqd = bfqq->bfqd; -+#ifdef CONFIG_CGROUP_BFQIO -+ else { -+ sd = entity->my_sched_data; -+ bfqg = container_of(sd, struct bfq_group, sched_data); -+ BUG_ON(!bfqg); -+ bfqd = (struct bfq_data *)bfqg->bfqd; -+ BUG_ON(!bfqd); -+ } -+#endif -+ -+ BUG_ON(old_st->wsum < entity->weight); -+ old_st->wsum -= entity->weight; -+ -+ if (entity->new_weight != entity->orig_weight) { -+ entity->orig_weight = entity->new_weight; -+ entity->ioprio = -+ bfq_weight_to_ioprio(entity->orig_weight); -+ } else if (entity->new_ioprio != entity->ioprio) { -+ entity->ioprio = entity->new_ioprio; -+ entity->orig_weight = -+ bfq_ioprio_to_weight(entity->ioprio); -+ } else -+ entity->new_weight = entity->orig_weight = -+ bfq_ioprio_to_weight(entity->ioprio); -+ -+ entity->ioprio_class = entity->new_ioprio_class; -+ entity->ioprio_changed = 0; -+ -+ /* -+ * NOTE: here we may be changing the weight too early, -+ * this will cause unfairness. The correct approach -+ * would have required additional complexity to defer -+ * weight changes to the proper time instants (i.e., -+ * when entity->finish <= old_st->vtime). -+ */ -+ new_st = bfq_entity_service_tree(entity); -+ -+ prev_weight = entity->weight; -+ new_weight = entity->orig_weight * -+ (bfqq != NULL ? bfqq->wr_coeff : 1); -+ /* -+ * If the weight of the entity changes, remove the entity -+ * from its old weight counter (if there is a counter -+ * associated with the entity), and add it to the counter -+ * associated with its new weight. -+ */ -+ if (prev_weight != new_weight) { -+ root = bfqq ? &bfqd->queue_weights_tree : -+ &bfqd->group_weights_tree; -+ bfq_weights_tree_remove(bfqd, entity, root); -+ } -+ entity->weight = new_weight; -+ /* -+ * Add the entity to its weights tree only if it is -+ * not associated with a weight-raised queue. -+ */ -+ if (prev_weight != new_weight && -+ (bfqq ? bfqq->wr_coeff == 1 : 1)) -+ /* If we get here, root has been initialized. */ -+ bfq_weights_tree_add(bfqd, entity, root); -+ -+ new_st->wsum += entity->weight; -+ -+ if (new_st != old_st) -+ entity->start = new_st->vtime; -+ } -+ -+ return new_st; -+} -+ -+/** -+ * bfq_bfqq_served - update the scheduler status after selection for -+ * service. -+ * @bfqq: the queue being served. -+ * @served: bytes to transfer. -+ * -+ * NOTE: this can be optimized, as the timestamps of upper level entities -+ * are synchronized every time a new bfqq is selected for service. By now, -+ * we keep it to better check consistency. -+ */ -+static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ struct bfq_service_tree *st; -+ -+ for_each_entity(entity) { -+ st = bfq_entity_service_tree(entity); -+ -+ entity->service += served; -+ BUG_ON(entity->service > entity->budget); -+ BUG_ON(st->wsum == 0); -+ -+ st->vtime += bfq_delta(served, st->wsum); -+ bfq_forget_idle(st); -+ } -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served); -+} -+ -+/** -+ * bfq_bfqq_charge_full_budget - set the service to the entity budget. -+ * @bfqq: the queue that needs a service update. -+ * -+ * When it's not possible to be fair in the service domain, because -+ * a queue is not consuming its budget fast enough (the meaning of -+ * fast depends on the timeout parameter), we charge it a full -+ * budget. In this way we should obtain a sort of time-domain -+ * fairness among all the seeky/slow queues. -+ */ -+static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); -+ -+ bfq_bfqq_served(bfqq, entity->budget - entity->service); -+} -+ -+/** -+ * __bfq_activate_entity - activate an entity. -+ * @entity: the entity being activated. -+ * -+ * Called whenever an entity is activated, i.e., it is not active and one -+ * of its children receives a new request, or has to be reactivated due to -+ * budget exhaustion. It uses the current budget of the entity (and the -+ * service received if @entity is active) of the queue to calculate its -+ * timestamps. -+ */ -+static void __bfq_activate_entity(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sd = entity->sched_data; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ -+ if (entity == sd->in_service_entity) { -+ BUG_ON(entity->tree != NULL); -+ /* -+ * If we are requeueing the current entity we have -+ * to take care of not charging to it service it has -+ * not received. -+ */ -+ bfq_calc_finish(entity, entity->service); -+ entity->start = entity->finish; -+ sd->in_service_entity = NULL; -+ } else if (entity->tree == &st->active) { -+ /* -+ * Requeueing an entity due to a change of some -+ * next_in_service entity below it. We reuse the -+ * old start time. -+ */ -+ bfq_active_extract(st, entity); -+ } else if (entity->tree == &st->idle) { -+ /* -+ * Must be on the idle tree, bfq_idle_extract() will -+ * check for that. -+ */ -+ bfq_idle_extract(st, entity); -+ entity->start = bfq_gt(st->vtime, entity->finish) ? -+ st->vtime : entity->finish; -+ } else { -+ /* -+ * The finish time of the entity may be invalid, and -+ * it is in the past for sure, otherwise the queue -+ * would have been on the idle tree. -+ */ -+ entity->start = st->vtime; -+ st->wsum += entity->weight; -+ bfq_get_entity(entity); -+ -+ BUG_ON(entity->on_st); -+ entity->on_st = 1; -+ } -+ -+ st = __bfq_entity_update_weight_prio(st, entity); -+ bfq_calc_finish(entity, entity->budget); -+ bfq_active_insert(st, entity); -+} -+ -+/** -+ * bfq_activate_entity - activate an entity and its ancestors if necessary. -+ * @entity: the entity to activate. -+ * -+ * Activate @entity and all the entities on the path from it to the root. -+ */ -+static void bfq_activate_entity(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sd; -+ -+ for_each_entity(entity) { -+ __bfq_activate_entity(entity); -+ -+ sd = entity->sched_data; -+ if (!bfq_update_next_in_service(sd)) -+ /* -+ * No need to propagate the activation to the -+ * upper entities, as they will be updated when -+ * the in-service entity is rescheduled. -+ */ -+ break; -+ } -+} -+ -+/** -+ * __bfq_deactivate_entity - deactivate an entity from its service tree. -+ * @entity: the entity to deactivate. -+ * @requeue: if false, the entity will not be put into the idle tree. -+ * -+ * Deactivate an entity, independently from its previous state. If the -+ * entity was not on a service tree just return, otherwise if it is on -+ * any scheduler tree, extract it from that tree, and if necessary -+ * and if the caller did not specify @requeue, put it on the idle tree. -+ * -+ * Return %1 if the caller should update the entity hierarchy, i.e., -+ * if the entity was in service or if it was the next_in_service for -+ * its sched_data; return %0 otherwise. -+ */ -+static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -+{ -+ struct bfq_sched_data *sd = entity->sched_data; -+ struct bfq_service_tree *st = bfq_entity_service_tree(entity); -+ int was_in_service = entity == sd->in_service_entity; -+ int ret = 0; -+ -+ if (!entity->on_st) -+ return 0; -+ -+ BUG_ON(was_in_service && entity->tree != NULL); -+ -+ if (was_in_service) { -+ bfq_calc_finish(entity, entity->service); -+ sd->in_service_entity = NULL; -+ } else if (entity->tree == &st->active) -+ bfq_active_extract(st, entity); -+ else if (entity->tree == &st->idle) -+ bfq_idle_extract(st, entity); -+ else if (entity->tree != NULL) -+ BUG(); -+ -+ if (was_in_service || sd->next_in_service == entity) -+ ret = bfq_update_next_in_service(sd); -+ -+ if (!requeue || !bfq_gt(entity->finish, st->vtime)) -+ bfq_forget_entity(st, entity); -+ else -+ bfq_idle_insert(st, entity); -+ -+ BUG_ON(sd->in_service_entity == entity); -+ BUG_ON(sd->next_in_service == entity); -+ -+ return ret; -+} -+ -+/** -+ * bfq_deactivate_entity - deactivate an entity. -+ * @entity: the entity to deactivate. -+ * @requeue: true if the entity can be put on the idle tree -+ */ -+static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) -+{ -+ struct bfq_sched_data *sd; -+ struct bfq_entity *parent; -+ -+ for_each_entity_safe(entity, parent) { -+ sd = entity->sched_data; -+ -+ if (!__bfq_deactivate_entity(entity, requeue)) -+ /* -+ * The parent entity is still backlogged, and -+ * we don't need to update it as it is still -+ * in service. -+ */ -+ break; -+ -+ if (sd->next_in_service != NULL) -+ /* -+ * The parent entity is still backlogged and -+ * the budgets on the path towards the root -+ * need to be updated. -+ */ -+ goto update; -+ -+ /* -+ * If we reach there the parent is no more backlogged and -+ * we want to propagate the dequeue upwards. -+ */ -+ requeue = 1; -+ } -+ -+ return; -+ -+update: -+ entity = parent; -+ for_each_entity(entity) { -+ __bfq_activate_entity(entity); -+ -+ sd = entity->sched_data; -+ if (!bfq_update_next_in_service(sd)) -+ break; -+ } -+} -+ -+/** -+ * bfq_update_vtime - update vtime if necessary. -+ * @st: the service tree to act upon. -+ * -+ * If necessary update the service tree vtime to have at least one -+ * eligible entity, skipping to its start time. Assumes that the -+ * active tree of the device is not empty. -+ * -+ * NOTE: this hierarchical implementation updates vtimes quite often, -+ * we may end up with reactivated processes getting timestamps after a -+ * vtime skip done because we needed a ->first_active entity on some -+ * intermediate node. -+ */ -+static void bfq_update_vtime(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *entry; -+ struct rb_node *node = st->active.rb_node; -+ -+ entry = rb_entry(node, struct bfq_entity, rb_node); -+ if (bfq_gt(entry->min_start, st->vtime)) { -+ st->vtime = entry->min_start; -+ bfq_forget_idle(st); -+ } -+} -+ -+/** -+ * bfq_first_active_entity - find the eligible entity with -+ * the smallest finish time -+ * @st: the service tree to select from. -+ * -+ * This function searches the first schedulable entity, starting from the -+ * root of the tree and going on the left every time on this side there is -+ * a subtree with at least one eligible (start >= vtime) entity. The path on -+ * the right is followed only if a) the left subtree contains no eligible -+ * entities and b) no eligible entity has been found yet. -+ */ -+static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st) -+{ -+ struct bfq_entity *entry, *first = NULL; -+ struct rb_node *node = st->active.rb_node; -+ -+ while (node != NULL) { -+ entry = rb_entry(node, struct bfq_entity, rb_node); -+left: -+ if (!bfq_gt(entry->start, st->vtime)) -+ first = entry; -+ -+ BUG_ON(bfq_gt(entry->min_start, st->vtime)); -+ -+ if (node->rb_left != NULL) { -+ entry = rb_entry(node->rb_left, -+ struct bfq_entity, rb_node); -+ if (!bfq_gt(entry->min_start, st->vtime)) { -+ node = node->rb_left; -+ goto left; -+ } -+ } -+ if (first != NULL) -+ break; -+ node = node->rb_right; -+ } -+ -+ BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active)); -+ return first; -+} -+ -+/** -+ * __bfq_lookup_next_entity - return the first eligible entity in @st. -+ * @st: the service tree. -+ * -+ * Update the virtual time in @st and return the first eligible entity -+ * it contains. -+ */ -+static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, -+ bool force) -+{ -+ struct bfq_entity *entity, *new_next_in_service = NULL; -+ -+ if (RB_EMPTY_ROOT(&st->active)) -+ return NULL; -+ -+ bfq_update_vtime(st); -+ entity = bfq_first_active_entity(st); -+ BUG_ON(bfq_gt(entity->start, st->vtime)); -+ -+ /* -+ * If the chosen entity does not match with the sched_data's -+ * next_in_service and we are forcedly serving the IDLE priority -+ * class tree, bubble up budget update. -+ */ -+ if (unlikely(force && entity != entity->sched_data->next_in_service)) { -+ new_next_in_service = entity; -+ for_each_entity(new_next_in_service) -+ bfq_update_budget(new_next_in_service); -+ } -+ -+ return entity; -+} -+ -+/** -+ * bfq_lookup_next_entity - return the first eligible entity in @sd. -+ * @sd: the sched_data. -+ * @extract: if true the returned entity will be also extracted from @sd. -+ * -+ * NOTE: since we cache the next_in_service entity at each level of the -+ * hierarchy, the complexity of the lookup can be decreased with -+ * absolutely no effort just returning the cached next_in_service value; -+ * we prefer to do full lookups to test the consistency of * the data -+ * structures. -+ */ -+static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, -+ int extract, -+ struct bfq_data *bfqd) -+{ -+ struct bfq_service_tree *st = sd->service_tree; -+ struct bfq_entity *entity; -+ int i = 0; -+ -+ BUG_ON(sd->in_service_entity != NULL); -+ -+ if (bfqd != NULL && -+ jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { -+ entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1, -+ true); -+ if (entity != NULL) { -+ i = BFQ_IOPRIO_CLASSES - 1; -+ bfqd->bfq_class_idle_last_service = jiffies; -+ sd->next_in_service = entity; -+ } -+ } -+ for (; i < BFQ_IOPRIO_CLASSES; i++) { -+ entity = __bfq_lookup_next_entity(st + i, false); -+ if (entity != NULL) { -+ if (extract) { -+ bfq_check_next_in_service(sd, entity); -+ bfq_active_extract(st + i, entity); -+ sd->in_service_entity = entity; -+ sd->next_in_service = NULL; -+ } -+ break; -+ } -+ } -+ -+ return entity; -+} -+ -+/* -+ * Get next queue for service. -+ */ -+static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) -+{ -+ struct bfq_entity *entity = NULL; -+ struct bfq_sched_data *sd; -+ struct bfq_queue *bfqq; -+ -+ BUG_ON(bfqd->in_service_queue != NULL); -+ -+ if (bfqd->busy_queues == 0) -+ return NULL; -+ -+ sd = &bfqd->root_group->sched_data; -+ for (; sd != NULL; sd = entity->my_sched_data) { -+ entity = bfq_lookup_next_entity(sd, 1, bfqd); -+ BUG_ON(entity == NULL); -+ entity->service = 0; -+ } -+ -+ bfqq = bfq_entity_to_bfqq(entity); -+ BUG_ON(bfqq == NULL); -+ -+ return bfqq; -+} -+ -+/* -+ * Forced extraction of the given queue. -+ */ -+static void bfq_get_next_queue_forced(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity; -+ struct bfq_sched_data *sd; -+ -+ BUG_ON(bfqd->in_service_queue != NULL); -+ -+ entity = &bfqq->entity; -+ /* -+ * Bubble up extraction/update from the leaf to the root. -+ */ -+ for_each_entity(entity) { -+ sd = entity->sched_data; -+ bfq_update_budget(entity); -+ bfq_update_vtime(bfq_entity_service_tree(entity)); -+ bfq_active_extract(bfq_entity_service_tree(entity), entity); -+ sd->in_service_entity = entity; -+ sd->next_in_service = NULL; -+ entity->service = 0; -+ } -+ -+ return; -+} -+ -+static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) -+{ -+ if (bfqd->in_service_bic != NULL) { -+ put_io_context(bfqd->in_service_bic->icq.ioc); -+ bfqd->in_service_bic = NULL; -+ } -+ -+ bfqd->in_service_queue = NULL; -+ del_timer(&bfqd->idle_slice_timer); -+} -+ -+static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ int requeue) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ if (bfqq == bfqd->in_service_queue) -+ __bfq_bfqd_reset_in_service(bfqd); -+ -+ bfq_deactivate_entity(entity, requeue); -+} -+ -+static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ struct bfq_entity *entity = &bfqq->entity; -+ -+ bfq_activate_entity(entity); -+} -+ -+/* -+ * Called when the bfqq no longer has requests pending, remove it from -+ * the service tree. -+ */ -+static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ int requeue) -+{ -+ BUG_ON(!bfq_bfqq_busy(bfqq)); -+ BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); -+ -+ bfq_log_bfqq(bfqd, bfqq, "del from busy"); -+ -+ bfq_clear_bfqq_busy(bfqq); -+ -+ BUG_ON(bfqd->busy_queues == 0); -+ bfqd->busy_queues--; -+ -+ if (!bfqq->dispatched) { -+ bfq_weights_tree_remove(bfqd, &bfqq->entity, -+ &bfqd->queue_weights_tree); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ BUG_ON(!bfqd->busy_in_flight_queues); -+ bfqd->busy_in_flight_queues--; -+ if (bfq_bfqq_constantly_seeky(bfqq)) { -+ BUG_ON(!bfqd-> -+ const_seeky_busy_in_flight_queues); -+ bfqd->const_seeky_busy_in_flight_queues--; -+ } -+ } -+ } -+ if (bfqq->wr_coeff > 1) -+ bfqd->wr_busy_queues--; -+ -+ bfq_deactivate_bfqq(bfqd, bfqq, requeue); -+} -+ -+/* -+ * Called when an inactive queue receives a new request. -+ */ -+static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) -+{ -+ BUG_ON(bfq_bfqq_busy(bfqq)); -+ BUG_ON(bfqq == bfqd->in_service_queue); -+ -+ bfq_log_bfqq(bfqd, bfqq, "add to busy"); -+ -+ bfq_activate_bfqq(bfqd, bfqq); -+ -+ bfq_mark_bfqq_busy(bfqq); -+ bfqd->busy_queues++; -+ -+ if (!bfqq->dispatched) { -+ if (bfqq->wr_coeff == 1) -+ bfq_weights_tree_add(bfqd, &bfqq->entity, -+ &bfqd->queue_weights_tree); -+ if (!blk_queue_nonrot(bfqd->queue)) { -+ bfqd->busy_in_flight_queues++; -+ if (bfq_bfqq_constantly_seeky(bfqq)) -+ bfqd->const_seeky_busy_in_flight_queues++; -+ } -+ } -+ if (bfqq->wr_coeff > 1) -+ bfqd->wr_busy_queues++; -+} -diff --git a/block/bfq.h b/block/bfq.h -new file mode 100644 -index 0000000..aeca08e ---- /dev/null -+++ block/bfq.h -@@ -0,0 +1,742 @@ -+/* -+ * BFQ-v7r5 for 3.15.0: data structures and common functions prototypes. -+ * -+ * Based on ideas and code from CFQ: -+ * Copyright (C) 2003 Jens Axboe -+ * -+ * Copyright (C) 2008 Fabio Checconi -+ * Paolo Valente -+ * -+ * Copyright (C) 2010 Paolo Valente -+ */ -+ -+#ifndef _BFQ_H -+#define _BFQ_H -+ -+#include -+#include -+#include -+#include -+ -+#define BFQ_IOPRIO_CLASSES 3 -+#define BFQ_CL_IDLE_TIMEOUT (HZ/5) -+ -+#define BFQ_MIN_WEIGHT 1 -+#define BFQ_MAX_WEIGHT 1000 -+ -+#define BFQ_DEFAULT_GRP_WEIGHT 10 -+#define BFQ_DEFAULT_GRP_IOPRIO 0 -+#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE -+ -+struct bfq_entity; -+ -+/** -+ * struct bfq_service_tree - per ioprio_class service tree. -+ * @active: tree for active entities (i.e., those backlogged). -+ * @idle: tree for idle entities (i.e., those not backlogged, with V <= F_i). -+ * @first_idle: idle entity with minimum F_i. -+ * @last_idle: idle entity with maximum F_i. -+ * @vtime: scheduler virtual time. -+ * @wsum: scheduler weight sum; active and idle entities contribute to it. -+ * -+ * Each service tree represents a B-WF2Q+ scheduler on its own. Each -+ * ioprio_class has its own independent scheduler, and so its own -+ * bfq_service_tree. All the fields are protected by the queue lock -+ * of the containing bfqd. -+ */ -+struct bfq_service_tree { -+ struct rb_root active; -+ struct rb_root idle; -+ -+ struct bfq_entity *first_idle; -+ struct bfq_entity *last_idle; -+ -+ u64 vtime; -+ unsigned long wsum; -+}; -+ -+/** -+ * struct bfq_sched_data - multi-class scheduler. -+ * @in_service_entity: entity in service. -+ * @next_in_service: head-of-the-line entity in the scheduler. -+ * @service_tree: array of service trees, one per ioprio_class. -+ * -+ * bfq_sched_data is the basic scheduler queue. It supports three -+ * ioprio_classes, and can be used either as a toplevel queue or as -+ * an intermediate queue on a hierarchical setup. -+ * @next_in_service points to the active entity of the sched_data -+ * service trees that will be scheduled next. -+ * -+ * The supported ioprio_classes are the same as in CFQ, in descending -+ * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. -+ * Requests from higher priority queues are served before all the -+ * requests from lower priority queues; among requests of the same -+ * queue requests are served according to B-WF2Q+. -+ * All the fields are protected by the queue lock of the containing bfqd. -+ */ -+struct bfq_sched_data { -+ struct bfq_entity *in_service_entity; -+ struct bfq_entity *next_in_service; -+ struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES]; -+}; -+ -+/** -+ * struct bfq_weight_counter - counter of the number of all active entities -+ * with a given weight. -+ * @weight: weight of the entities that this counter refers to. -+ * @num_active: number of active entities with this weight. -+ * @weights_node: weights tree member (see bfq_data's @queue_weights_tree -+ * and @group_weights_tree). -+ */ -+struct bfq_weight_counter { -+ short int weight; -+ unsigned int num_active; -+ struct rb_node weights_node; -+}; -+ -+/** -+ * struct bfq_entity - schedulable entity. -+ * @rb_node: service_tree member. -+ * @weight_counter: pointer to the weight counter associated with this entity. -+ * @on_st: flag, true if the entity is on a tree (either the active or -+ * the idle one of its service_tree). -+ * @finish: B-WF2Q+ finish timestamp (aka F_i). -+ * @start: B-WF2Q+ start timestamp (aka S_i). -+ * @tree: tree the entity is enqueued into; %NULL if not on a tree. -+ * @min_start: minimum start time of the (active) subtree rooted at -+ * this entity; used for O(log N) lookups into active trees. -+ * @service: service received during the last round of service. -+ * @budget: budget used to calculate F_i; F_i = S_i + @budget / @weight. -+ * @weight: weight of the queue -+ * @parent: parent entity, for hierarchical scheduling. -+ * @my_sched_data: for non-leaf nodes in the cgroup hierarchy, the -+ * associated scheduler queue, %NULL on leaf nodes. -+ * @sched_data: the scheduler queue this entity belongs to. -+ * @ioprio: the ioprio in use. -+ * @new_weight: when a weight change is requested, the new weight value. -+ * @orig_weight: original weight, used to implement weight boosting -+ * @new_ioprio: when an ioprio change is requested, the new ioprio value. -+ * @ioprio_class: the ioprio_class in use. -+ * @new_ioprio_class: when an ioprio_class change is requested, the new -+ * ioprio_class value. -+ * @ioprio_changed: flag, true when the user requested a weight, ioprio or -+ * ioprio_class change. -+ * -+ * A bfq_entity is used to represent either a bfq_queue (leaf node in the -+ * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each -+ * entity belongs to the sched_data of the parent group in the cgroup -+ * hierarchy. Non-leaf entities have also their own sched_data, stored -+ * in @my_sched_data. -+ * -+ * Each entity stores independently its priority values; this would -+ * allow different weights on different devices, but this -+ * functionality is not exported to userspace by now. Priorities and -+ * weights are updated lazily, first storing the new values into the -+ * new_* fields, then setting the @ioprio_changed flag. As soon as -+ * there is a transition in the entity state that allows the priority -+ * update to take place the effective and the requested priority -+ * values are synchronized. -+ * -+ * Unless cgroups are used, the weight value is calculated from the -+ * ioprio to export the same interface as CFQ. When dealing with -+ * ``well-behaved'' queues (i.e., queues that do not spend too much -+ * time to consume their budget and have true sequential behavior, and -+ * when there are no external factors breaking anticipation) the -+ * relative weights at each level of the cgroups hierarchy should be -+ * guaranteed. All the fields are protected by the queue lock of the -+ * containing bfqd. -+ */ -+struct bfq_entity { -+ struct rb_node rb_node; -+ struct bfq_weight_counter *weight_counter; -+ -+ int on_st; -+ -+ u64 finish; -+ u64 start; -+ -+ struct rb_root *tree; -+ -+ u64 min_start; -+ -+ unsigned long service, budget; -+ unsigned short weight, new_weight; -+ unsigned short orig_weight; -+ -+ struct bfq_entity *parent; -+ -+ struct bfq_sched_data *my_sched_data; -+ struct bfq_sched_data *sched_data; -+ -+ unsigned short ioprio, new_ioprio; -+ unsigned short ioprio_class, new_ioprio_class; -+ -+ int ioprio_changed; -+}; -+ -+struct bfq_group; -+ -+/** -+ * struct bfq_queue - leaf schedulable entity. -+ * @ref: reference counter. -+ * @bfqd: parent bfq_data. -+ * @new_bfqq: shared bfq_queue if queue is cooperating with -+ * one or more other queues. -+ * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree). -+ * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree). -+ * @sort_list: sorted list of pending requests. -+ * @next_rq: if fifo isn't expired, next request to serve. -+ * @queued: nr of requests queued in @sort_list. -+ * @allocated: currently allocated requests. -+ * @meta_pending: pending metadata requests. -+ * @fifo: fifo list of requests in sort_list. -+ * @entity: entity representing this queue in the scheduler. -+ * @max_budget: maximum budget allowed from the feedback mechanism. -+ * @budget_timeout: budget expiration (in jiffies). -+ * @dispatched: number of requests on the dispatch list or inside driver. -+ * @flags: status flags. -+ * @bfqq_list: node for active/idle bfqq list inside our bfqd. -+ * @seek_samples: number of seeks sampled -+ * @seek_total: sum of the distances of the seeks sampled -+ * @seek_mean: mean seek distance -+ * @last_request_pos: position of the last request enqueued -+ * @requests_within_timer: number of consecutive pairs of request completion -+ * and arrival, such that the queue becomes idle -+ * after the completion, but the next request arrives -+ * within an idle time slice; used only if the queue's -+ * IO_bound has been cleared. -+ * @pid: pid of the process owning the queue, used for logging purposes. -+ * @last_wr_start_finish: start time of the current weight-raising period if -+ * the @bfq-queue is being weight-raised, otherwise -+ * finish time of the last weight-raising period -+ * @wr_cur_max_time: current max raising time for this queue -+ * @soft_rt_next_start: minimum time instant such that, only if a new -+ * request is enqueued after this time instant in an -+ * idle @bfq_queue with no outstanding requests, then -+ * the task associated with the queue it is deemed as -+ * soft real-time (see the comments to the function -+ * bfq_bfqq_softrt_next_start()). -+ * @last_idle_bklogged: time of the last transition of the @bfq_queue from -+ * idle to backlogged -+ * @service_from_backlogged: cumulative service received from the @bfq_queue -+ * since the last transition from idle to -+ * backlogged -+ * -+ * A bfq_queue is a leaf request queue; it can be associated with an io_context -+ * or more, if it is async or shared between cooperating processes. @cgroup -+ * holds a reference to the cgroup, to be sure that it does not disappear while -+ * a bfqq still references it (mostly to avoid races between request issuing and -+ * task migration followed by cgroup destruction). -+ * All the fields are protected by the queue lock of the containing bfqd. -+ */ -+struct bfq_queue { -+ atomic_t ref; -+ struct bfq_data *bfqd; -+ -+ /* fields for cooperating queues handling */ -+ struct bfq_queue *new_bfqq; -+ struct rb_node pos_node; -+ struct rb_root *pos_root; -+ -+ struct rb_root sort_list; -+ struct request *next_rq; -+ int queued[2]; -+ int allocated[2]; -+ int meta_pending; -+ struct list_head fifo; -+ -+ struct bfq_entity entity; -+ -+ unsigned long max_budget; -+ unsigned long budget_timeout; -+ -+ int dispatched; -+ -+ unsigned int flags; -+ -+ struct list_head bfqq_list; -+ -+ unsigned int seek_samples; -+ u64 seek_total; -+ sector_t seek_mean; -+ sector_t last_request_pos; -+ -+ unsigned int requests_within_timer; -+ -+ pid_t pid; -+ -+ /* weight-raising fields */ -+ unsigned long wr_cur_max_time; -+ unsigned long soft_rt_next_start; -+ unsigned long last_wr_start_finish; -+ unsigned int wr_coeff; -+ unsigned long last_idle_bklogged; -+ unsigned long service_from_backlogged; -+}; -+ -+/** -+ * struct bfq_ttime - per process thinktime stats. -+ * @ttime_total: total process thinktime -+ * @ttime_samples: number of thinktime samples -+ * @ttime_mean: average process thinktime -+ */ -+struct bfq_ttime { -+ unsigned long last_end_request; -+ -+ unsigned long ttime_total; -+ unsigned long ttime_samples; -+ unsigned long ttime_mean; -+}; -+ -+/** -+ * struct bfq_io_cq - per (request_queue, io_context) structure. -+ * @icq: associated io_cq structure -+ * @bfqq: array of two process queues, the sync and the async -+ * @ttime: associated @bfq_ttime struct -+ */ -+struct bfq_io_cq { -+ struct io_cq icq; /* must be the first member */ -+ struct bfq_queue *bfqq[2]; -+ struct bfq_ttime ttime; -+ int ioprio; -+}; -+ -+enum bfq_device_speed { -+ BFQ_BFQD_FAST, -+ BFQ_BFQD_SLOW, -+}; -+ -+/** -+ * struct bfq_data - per device data structure. -+ * @queue: request queue for the managed device. -+ * @root_group: root bfq_group for the device. -+ * @rq_pos_tree: rbtree sorted by next_request position, used when -+ * determining if two or more queues have interleaving -+ * requests (see bfq_close_cooperator()). -+ * @active_numerous_groups: number of bfq_groups containing more than one -+ * active @bfq_entity. -+ * @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by -+ * weight. Used to keep track of whether all @bfq_queues -+ * have the same weight. The tree contains one counter -+ * for each distinct weight associated to some active -+ * and not weight-raised @bfq_queue (see the comments to -+ * the functions bfq_weights_tree_[add|remove] for -+ * further details). -+ * @group_weights_tree: rbtree of non-queue @bfq_entity weight counters, sorted -+ * by weight. Used to keep track of whether all -+ * @bfq_groups have the same weight. The tree contains -+ * one counter for each distinct weight associated to -+ * some active @bfq_group (see the comments to the -+ * functions bfq_weights_tree_[add|remove] for further -+ * details). -+ * @busy_queues: number of bfq_queues containing requests (including the -+ * queue in service, even if it is idling). -+ * @busy_in_flight_queues: number of @bfq_queues containing pending or -+ * in-flight requests, plus the @bfq_queue in -+ * service, even if idle but waiting for the -+ * possible arrival of its next sync request. This -+ * field is updated only if the device is rotational, -+ * but used only if the device is also NCQ-capable. -+ * The reason why the field is updated also for non- -+ * NCQ-capable rotational devices is related to the -+ * fact that the value of @hw_tag may be set also -+ * later than when busy_in_flight_queues may need to -+ * be incremented for the first time(s). Taking also -+ * this possibility into account, to avoid unbalanced -+ * increments/decrements, would imply more overhead -+ * than just updating busy_in_flight_queues -+ * regardless of the value of @hw_tag. -+ * @const_seeky_busy_in_flight_queues: number of constantly-seeky @bfq_queues -+ * (that is, seeky queues that expired -+ * for budget timeout at least once) -+ * containing pending or in-flight -+ * requests, including the in-service -+ * @bfq_queue if constantly seeky. This -+ * field is updated only if the device -+ * is rotational, but used only if the -+ * device is also NCQ-capable (see the -+ * comments to @busy_in_flight_queues). -+ * @wr_busy_queues: number of weight-raised busy @bfq_queues. -+ * @queued: number of queued requests. -+ * @rq_in_driver: number of requests dispatched and waiting for completion. -+ * @sync_flight: number of sync requests in the driver. -+ * @max_rq_in_driver: max number of reqs in driver in the last -+ * @hw_tag_samples completed requests. -+ * @hw_tag_samples: nr of samples used to calculate hw_tag. -+ * @hw_tag: flag set to one if the driver is showing a queueing behavior. -+ * @budgets_assigned: number of budgets assigned. -+ * @idle_slice_timer: timer set when idling for the next sequential request -+ * from the queue in service. -+ * @unplug_work: delayed work to restart dispatching on the request queue. -+ * @in_service_queue: bfq_queue in service. -+ * @in_service_bic: bfq_io_cq (bic) associated with the @in_service_queue. -+ * @last_position: on-disk position of the last served request. -+ * @last_budget_start: beginning of the last budget. -+ * @last_idling_start: beginning of the last idle slice. -+ * @peak_rate: peak transfer rate observed for a budget. -+ * @peak_rate_samples: number of samples used to calculate @peak_rate. -+ * @bfq_max_budget: maximum budget allotted to a bfq_queue before -+ * rescheduling. -+ * @group_list: list of all the bfq_groups active on the device. -+ * @active_list: list of all the bfq_queues active on the device. -+ * @idle_list: list of all the bfq_queues idle on the device. -+ * @bfq_quantum: max number of requests dispatched per dispatch round. -+ * @bfq_fifo_expire: timeout for async/sync requests; when it expires -+ * requests are served in fifo order. -+ * @bfq_back_penalty: weight of backward seeks wrt forward ones. -+ * @bfq_back_max: maximum allowed backward seek. -+ * @bfq_slice_idle: maximum idling time. -+ * @bfq_user_max_budget: user-configured max budget value -+ * (0 for auto-tuning). -+ * @bfq_max_budget_async_rq: maximum budget (in nr of requests) allotted to -+ * async queues. -+ * @bfq_timeout: timeout for bfq_queues to consume their budget; used to -+ * to prevent seeky queues to impose long latencies to well -+ * behaved ones (this also implies that seeky queues cannot -+ * receive guarantees in the service domain; after a timeout -+ * they are charged for the whole allocated budget, to try -+ * to preserve a behavior reasonably fair among them, but -+ * without service-domain guarantees). -+ * @bfq_coop_thresh: number of queue merges after which a @bfq_queue is -+ * no more granted any weight-raising. -+ * @bfq_failed_cooperations: number of consecutive failed cooperation -+ * chances after which weight-raising is restored -+ * to a queue subject to more than bfq_coop_thresh -+ * queue merges. -+ * @bfq_requests_within_timer: number of consecutive requests that must be -+ * issued within the idle time slice to set -+ * again idling to a queue which was marked as -+ * non-I/O-bound (see the definition of the -+ * IO_bound flag for further details). -+ * @bfq_wr_coeff: Maximum factor by which the weight of a weight-raised -+ * queue is multiplied -+ * @bfq_wr_max_time: maximum duration of a weight-raising period (jiffies) -+ * @bfq_wr_rt_max_time: maximum duration for soft real-time processes -+ * @bfq_wr_min_idle_time: minimum idle period after which weight-raising -+ * may be reactivated for a queue (in jiffies) -+ * @bfq_wr_min_inter_arr_async: minimum period between request arrivals -+ * after which weight-raising may be -+ * reactivated for an already busy queue -+ * (in jiffies) -+ * @bfq_wr_max_softrt_rate: max service-rate for a soft real-time queue, -+ * sectors per seconds -+ * @RT_prod: cached value of the product R*T used for computing the maximum -+ * duration of the weight raising automatically -+ * @device_speed: device-speed class for the low-latency heuristic -+ * @oom_bfqq: fallback dummy bfqq for extreme OOM conditions -+ * -+ * All the fields are protected by the @queue lock. -+ */ -+struct bfq_data { -+ struct request_queue *queue; -+ -+ struct bfq_group *root_group; -+ struct rb_root rq_pos_tree; -+ -+#ifdef CONFIG_CGROUP_BFQIO -+ int active_numerous_groups; -+#endif -+ -+ struct rb_root queue_weights_tree; -+ struct rb_root group_weights_tree; -+ -+ int busy_queues; -+ int busy_in_flight_queues; -+ int const_seeky_busy_in_flight_queues; -+ int wr_busy_queues; -+ int queued; -+ int rq_in_driver; -+ int sync_flight; -+ -+ int max_rq_in_driver; -+ int hw_tag_samples; -+ int hw_tag; -+ -+ int budgets_assigned; -+ -+ struct timer_list idle_slice_timer; -+ struct work_struct unplug_work; -+ -+ struct bfq_queue *in_service_queue; -+ struct bfq_io_cq *in_service_bic; -+ -+ sector_t last_position; -+ -+ ktime_t last_budget_start; -+ ktime_t last_idling_start; -+ int peak_rate_samples; -+ u64 peak_rate; -+ unsigned long bfq_max_budget; -+ -+ struct hlist_head group_list; -+ struct list_head active_list; -+ struct list_head idle_list; -+ -+ unsigned int bfq_quantum; -+ unsigned int bfq_fifo_expire[2]; -+ unsigned int bfq_back_penalty; -+ unsigned int bfq_back_max; -+ unsigned int bfq_slice_idle; -+ u64 bfq_class_idle_last_service; -+ -+ unsigned int bfq_user_max_budget; -+ unsigned int bfq_max_budget_async_rq; -+ unsigned int bfq_timeout[2]; -+ -+ unsigned int bfq_coop_thresh; -+ unsigned int bfq_failed_cooperations; -+ unsigned int bfq_requests_within_timer; -+ -+ bool low_latency; -+ -+ /* parameters of the low_latency heuristics */ -+ unsigned int bfq_wr_coeff; -+ unsigned int bfq_wr_max_time; -+ unsigned int bfq_wr_rt_max_time; -+ unsigned int bfq_wr_min_idle_time; -+ unsigned long bfq_wr_min_inter_arr_async; -+ unsigned int bfq_wr_max_softrt_rate; -+ u64 RT_prod; -+ enum bfq_device_speed device_speed; -+ -+ struct bfq_queue oom_bfqq; -+}; -+ -+enum bfqq_state_flags { -+ BFQ_BFQQ_FLAG_busy = 0, /* has requests or is in service */ -+ BFQ_BFQQ_FLAG_wait_request, /* waiting for a request */ -+ BFQ_BFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ -+ BFQ_BFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ -+ BFQ_BFQQ_FLAG_idle_window, /* slice idling enabled */ -+ BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ -+ BFQ_BFQQ_FLAG_sync, /* synchronous queue */ -+ BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ -+ BFQ_BFQQ_FLAG_IO_bound, /* -+ * bfqq has timed-out at least once -+ * having consumed at most 2/10 of -+ * its budget -+ */ -+ BFQ_BFQQ_FLAG_constantly_seeky, /* -+ * bfqq has proved to be slow and -+ * seeky until budget timeout -+ */ -+ BFQ_BFQQ_FLAG_softrt_update, /* -+ * may need softrt-next-start -+ * update -+ */ -+ BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ -+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ -+}; -+ -+#define BFQ_BFQQ_FNS(name) \ -+static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ -+{ \ -+ (bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \ -+} \ -+static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \ -+{ \ -+ (bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \ -+} \ -+static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ -+{ \ -+ return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ -+} -+ -+BFQ_BFQQ_FNS(busy); -+BFQ_BFQQ_FNS(wait_request); -+BFQ_BFQQ_FNS(must_alloc); -+BFQ_BFQQ_FNS(fifo_expire); -+BFQ_BFQQ_FNS(idle_window); -+BFQ_BFQQ_FNS(prio_changed); -+BFQ_BFQQ_FNS(sync); -+BFQ_BFQQ_FNS(budget_new); -+BFQ_BFQQ_FNS(IO_bound); -+BFQ_BFQQ_FNS(constantly_seeky); -+BFQ_BFQQ_FNS(coop); -+BFQ_BFQQ_FNS(split_coop); -+BFQ_BFQQ_FNS(softrt_update); -+#undef BFQ_BFQQ_FNS -+ -+/* Logging facilities. */ -+#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) \ -+ blk_add_trace_msg((bfqd)->queue, "bfq%d " fmt, (bfqq)->pid, ##args) -+ -+#define bfq_log(bfqd, fmt, args...) \ -+ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) -+ -+/* Expiration reasons. */ -+enum bfqq_expiration { -+ BFQ_BFQQ_TOO_IDLE = 0, /* -+ * queue has been idling for -+ * too long -+ */ -+ BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ -+ BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ -+ BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ -+}; -+ -+#ifdef CONFIG_CGROUP_BFQIO -+/** -+ * struct bfq_group - per (device, cgroup) data structure. -+ * @entity: schedulable entity to insert into the parent group sched_data. -+ * @sched_data: own sched_data, to contain child entities (they may be -+ * both bfq_queues and bfq_groups). -+ * @group_node: node to be inserted into the bfqio_cgroup->group_data -+ * list of the containing cgroup's bfqio_cgroup. -+ * @bfqd_node: node to be inserted into the @bfqd->group_list list -+ * of the groups active on the same device; used for cleanup. -+ * @bfqd: the bfq_data for the device this group acts upon. -+ * @async_bfqq: array of async queues for all the tasks belonging to -+ * the group, one queue per ioprio value per ioprio_class, -+ * except for the idle class that has only one queue. -+ * @async_idle_bfqq: async queue for the idle class (ioprio is ignored). -+ * @my_entity: pointer to @entity, %NULL for the toplevel group; used -+ * to avoid too many special cases during group creation/ -+ * migration. -+ * @active_entities: number of active entities belonging to the group; -+ * unused for the root group. Used to know whether there -+ * are groups with more than one active @bfq_entity -+ * (see the comments to the function -+ * bfq_bfqq_must_not_expire()). -+ * -+ * Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup -+ * there is a set of bfq_groups, each one collecting the lower-level -+ * entities belonging to the group that are acting on the same device. -+ * -+ * Locking works as follows: -+ * o @group_node is protected by the bfqio_cgroup lock, and is accessed -+ * via RCU from its readers. -+ * o @bfqd is protected by the queue lock, RCU is used to access it -+ * from the readers. -+ * o All the other fields are protected by the @bfqd queue lock. -+ */ -+struct bfq_group { -+ struct bfq_entity entity; -+ struct bfq_sched_data sched_data; -+ -+ struct hlist_node group_node; -+ struct hlist_node bfqd_node; -+ -+ void *bfqd; -+ -+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; -+ struct bfq_queue *async_idle_bfqq; -+ -+ struct bfq_entity *my_entity; -+ -+ int active_entities; -+}; -+ -+/** -+ * struct bfqio_cgroup - bfq cgroup data structure. -+ * @css: subsystem state for bfq in the containing cgroup. -+ * @online: flag marked when the subsystem is inserted. -+ * @weight: cgroup weight. -+ * @ioprio: cgroup ioprio. -+ * @ioprio_class: cgroup ioprio_class. -+ * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data. -+ * @group_data: list containing the bfq_group belonging to this cgroup. -+ * -+ * @group_data is accessed using RCU, with @lock protecting the updates, -+ * @ioprio and @ioprio_class are protected by @lock. -+ */ -+struct bfqio_cgroup { -+ struct cgroup_subsys_state css; -+ bool online; -+ -+ unsigned short weight, ioprio, ioprio_class; -+ -+ spinlock_t lock; -+ struct hlist_head group_data; -+}; -+#else -+struct bfq_group { -+ struct bfq_sched_data sched_data; -+ -+ struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; -+ struct bfq_queue *async_idle_bfqq; -+}; -+#endif -+ -+static inline struct bfq_service_tree * -+bfq_entity_service_tree(struct bfq_entity *entity) -+{ -+ struct bfq_sched_data *sched_data = entity->sched_data; -+ unsigned int idx = entity->ioprio_class - 1; -+ -+ BUG_ON(idx >= BFQ_IOPRIO_CLASSES); -+ BUG_ON(sched_data == NULL); -+ -+ return sched_data->service_tree + idx; -+} -+ -+static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, -+ int is_sync) -+{ -+ return bic->bfqq[!!is_sync]; -+} -+ -+static inline void bic_set_bfqq(struct bfq_io_cq *bic, -+ struct bfq_queue *bfqq, int is_sync) -+{ -+ bic->bfqq[!!is_sync] = bfqq; -+} -+ -+static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) -+{ -+ return bic->icq.q->elevator->elevator_data; -+} -+ -+/** -+ * bfq_get_bfqd_locked - get a lock to a bfqd using a RCU protected pointer. -+ * @ptr: a pointer to a bfqd. -+ * @flags: storage for the flags to be saved. -+ * -+ * This function allows bfqg->bfqd to be protected by the -+ * queue lock of the bfqd they reference; the pointer is dereferenced -+ * under RCU, so the storage for bfqd is assured to be safe as long -+ * as the RCU read side critical section does not end. After the -+ * bfqd->queue->queue_lock is taken the pointer is rechecked, to be -+ * sure that no other writer accessed it. If we raced with a writer, -+ * the function returns NULL, with the queue unlocked, otherwise it -+ * returns the dereferenced pointer, with the queue locked. -+ */ -+static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr, -+ unsigned long *flags) -+{ -+ struct bfq_data *bfqd; -+ -+ rcu_read_lock(); -+ bfqd = rcu_dereference(*(struct bfq_data **)ptr); -+ -+ if (bfqd != NULL) { -+ spin_lock_irqsave(bfqd->queue->queue_lock, *flags); -+ if (*ptr == bfqd) -+ goto out; -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); -+ } -+ -+ bfqd = NULL; -+out: -+ rcu_read_unlock(); -+ return bfqd; -+} -+ -+static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd, -+ unsigned long *flags) -+{ -+ spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags); -+} -+ -+static void bfq_changed_ioprio(struct bfq_io_cq *bic); -+static void bfq_put_queue(struct bfq_queue *bfqq); -+static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); -+static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, -+ struct bfq_group *bfqg, int is_sync, -+ struct bfq_io_cq *bic, gfp_t gfp_mask); -+static void bfq_end_wr_async_queues(struct bfq_data *bfqd, -+ struct bfq_group *bfqg); -+static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); -+static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); -+ -+#endif /* _BFQ_H */ --- -1.9.3 - diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r5-for-3.15.0.patch b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r5-for-3.15.0.patch deleted file mode 100644 index 06413755a..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r5-for-3.15.0.patch +++ /dev/null @@ -1,1189 +0,0 @@ -# Calculate format=diff os_linux_system==desktop -From e07cf5701607dd66a8e360c7037ac29b0df4e279 Mon Sep 17 00:00:00 2001 -From: Mauro Andreolini -Date: Wed, 18 Jun 2014 17:38:07 +0200 -Subject: [PATCH 3/3] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r5 for - 3.15.0 - -A set of processes may happen to perform interleaved reads, i.e.,requests -whose union would give rise to a sequential read pattern. There are two -typical cases: in the first case, processes read fixed-size chunks of -data at a fixed distance from each other, while in the second case processes -may read variable-size chunks at variable distances. The latter case occurs -for example with QEMU, which splits the I/O generated by the guest into -multiple chunks, and lets these chunks be served by a pool of cooperating -processes, iteratively assigning the next chunk of I/O to the first -available process. CFQ uses actual queue merging for the first type of -rocesses, whereas it uses preemption to get a sequential read pattern out -of the read requests performed by the second type of processes. In the end -it uses two different mechanisms to achieve the same goal: boosting the -throughput with interleaved I/O. - -This patch introduces Early Queue Merge (EQM), a unified mechanism to get a -sequential read pattern with both types of processes. The main idea is -checking newly arrived requests against the next request of the active queue -both in case of actual request insert and in case of request merge. By doing -so, both the types of processes can be handled by just merging their queues. -EQM is then simpler and more compact than the pair of mechanisms used in -CFQ. - -Finally, EQM also preserves the typical low-latency properties of BFQ, by -properly restoring the weight-raising state of a queue when it gets back to -a non-merged state. - -Signed-off-by: Mauro Andreolini -Signed-off-by: Arianna Avanzini -Signed-off-by: Paolo Valente ---- - block/bfq-iosched.c | 736 ++++++++++++++++++++++++++++++++++++---------------- - block/bfq-sched.c | 28 -- - block/bfq.h | 46 +++- - 3 files changed, 556 insertions(+), 254 deletions(-) - -diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index 6cf7bca..4579c6d 100644 ---- a/block/bfq-iosched.c -+++ b/block/bfq-iosched.c -@@ -571,6 +571,57 @@ static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd) - return dur; - } - -+static inline unsigned -+bfq_bfqq_cooperations(struct bfq_queue *bfqq) -+{ -+ return bfqq->bic ? bfqq->bic->cooperations : 0; -+} -+ -+static inline void -+bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) -+{ -+ if (bic->saved_idle_window) -+ bfq_mark_bfqq_idle_window(bfqq); -+ else -+ bfq_clear_bfqq_idle_window(bfqq); -+ if (bic->saved_IO_bound) -+ bfq_mark_bfqq_IO_bound(bfqq); -+ else -+ bfq_clear_bfqq_IO_bound(bfqq); -+ if (bic->wr_time_left && bfqq->bfqd->low_latency && -+ bic->cooperations < bfqq->bfqd->bfq_coop_thresh) { -+ /* -+ * Start a weight raising period with the duration given by -+ * the raising_time_left snapshot. -+ */ -+ if (bfq_bfqq_busy(bfqq)) -+ bfqq->bfqd->wr_busy_queues++; -+ bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff; -+ bfqq->wr_cur_max_time = bic->wr_time_left; -+ bfqq->last_wr_start_finish = jiffies; -+ bfqq->entity.ioprio_changed = 1; -+ } -+ /* -+ * Clear wr_time_left to prevent bfq_bfqq_save_state() from -+ * getting confused about the queue's need of a weight-raising -+ * period. -+ */ -+ bic->wr_time_left = 0; -+} -+ -+/* -+ * Must be called with the queue_lock held. -+ */ -+static int bfqq_process_refs(struct bfq_queue *bfqq) -+{ -+ int process_refs, io_refs; -+ -+ io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; -+ process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; -+ BUG_ON(process_refs < 0); -+ return process_refs; -+} -+ - static void bfq_add_request(struct request *rq) - { - struct bfq_queue *bfqq = RQ_BFQQ(rq); -@@ -602,8 +653,11 @@ static void bfq_add_request(struct request *rq) - - if (!bfq_bfqq_busy(bfqq)) { - int soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && -+ bfq_bfqq_cooperations(bfqq) < bfqd->bfq_coop_thresh && - time_is_before_jiffies(bfqq->soft_rt_next_start); -- idle_for_long_time = time_is_before_jiffies( -+ idle_for_long_time = bfq_bfqq_cooperations(bfqq) < -+ bfqd->bfq_coop_thresh && -+ time_is_before_jiffies( - bfqq->budget_timeout + - bfqd->bfq_wr_min_idle_time); - entity->budget = max_t(unsigned long, bfqq->max_budget, -@@ -624,11 +678,20 @@ static void bfq_add_request(struct request *rq) - if (!bfqd->low_latency) - goto add_bfqq_busy; - -+ if (bfq_bfqq_just_split(bfqq)) -+ goto set_ioprio_changed; -+ - /* -- * If the queue is not being boosted and has been idle -- * for enough time, start a weight-raising period -+ * If the queue: -+ * - is not being boosted, -+ * - has been idle for enough time, -+ * - is not a sync queue or is linked to a bfq_io_cq (it is -+ * shared "for its nature" or it is not shared and its -+ * requests have not been redirected to a shared queue) -+ * start a weight-raising period. - */ -- if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt)) { -+ if (old_wr_coeff == 1 && (idle_for_long_time || soft_rt) && -+ (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) { - bfqq->wr_coeff = bfqd->bfq_wr_coeff; - if (idle_for_long_time) - bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -@@ -642,9 +705,11 @@ static void bfq_add_request(struct request *rq) - } else if (old_wr_coeff > 1) { - if (idle_for_long_time) - bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); -- else if (bfqq->wr_cur_max_time == -- bfqd->bfq_wr_rt_max_time && -- !soft_rt) { -+ else if (bfq_bfqq_cooperations(bfqq) >= -+ bfqd->bfq_coop_thresh || -+ (bfqq->wr_cur_max_time == -+ bfqd->bfq_wr_rt_max_time && -+ !soft_rt)) { - bfqq->wr_coeff = 1; - bfq_log_bfqq(bfqd, bfqq, - "wrais ending at %lu, rais_max_time %u", -@@ -660,18 +725,18 @@ static void bfq_add_request(struct request *rq) - /* - * - * The remaining weight-raising time is lower -- * than bfqd->bfq_wr_rt_max_time, which -- * means that the application is enjoying -- * weight raising either because deemed soft- -- * rt in the near past, or because deemed -- * interactive a long ago. In both cases, -- * resetting now the current remaining weight- -- * raising time for the application to the -- * weight-raising duration for soft rt -- * applications would not cause any latency -- * increase for the application (as the new -- * duration would be higher than the remaining -- * time). -+ * than bfqd->bfq_wr_rt_max_time, which means -+ * that the application is enjoying weight -+ * raising either because deemed soft-rt in -+ * the near past, or because deemed interactive -+ * a long ago. -+ * In both cases, resetting now the current -+ * remaining weight-raising time for the -+ * application to the weight-raising duration -+ * for soft rt applications would not cause any -+ * latency increase for the application (as the -+ * new duration would be higher than the -+ * remaining time). - * - * In addition, the application is now meeting - * the requirements for being deemed soft rt. -@@ -706,6 +771,7 @@ static void bfq_add_request(struct request *rq) - bfqd->bfq_wr_rt_max_time; - } - } -+set_ioprio_changed: - if (old_wr_coeff != bfqq->wr_coeff) - entity->ioprio_changed = 1; - add_bfqq_busy: -@@ -918,90 +984,35 @@ static void bfq_end_wr(struct bfq_data *bfqd) - spin_unlock_irq(bfqd->queue->queue_lock); - } - --static int bfq_allow_merge(struct request_queue *q, struct request *rq, -- struct bio *bio) -+static inline sector_t bfq_io_struct_pos(void *io_struct, bool request) - { -- struct bfq_data *bfqd = q->elevator->elevator_data; -- struct bfq_io_cq *bic; -- struct bfq_queue *bfqq; -- -- /* -- * Disallow merge of a sync bio into an async request. -- */ -- if (bfq_bio_sync(bio) && !rq_is_sync(rq)) -- return 0; -- -- /* -- * Lookup the bfqq that this bio will be queued with. Allow -- * merge only if rq is queued there. -- * Queue lock is held here. -- */ -- bic = bfq_bic_lookup(bfqd, current->io_context); -- if (bic == NULL) -- return 0; -- -- bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -- return bfqq == RQ_BFQQ(rq); --} -- --static void __bfq_set_in_service_queue(struct bfq_data *bfqd, -- struct bfq_queue *bfqq) --{ -- if (bfqq != NULL) { -- bfq_mark_bfqq_must_alloc(bfqq); -- bfq_mark_bfqq_budget_new(bfqq); -- bfq_clear_bfqq_fifo_expire(bfqq); -- -- bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; -- -- bfq_log_bfqq(bfqd, bfqq, -- "set_in_service_queue, cur-budget = %lu", -- bfqq->entity.budget); -- } -- -- bfqd->in_service_queue = bfqq; --} -- --/* -- * Get and set a new queue for service. -- */ --static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd, -- struct bfq_queue *bfqq) --{ -- if (!bfqq) -- bfqq = bfq_get_next_queue(bfqd); -+ if (request) -+ return blk_rq_pos(io_struct); - else -- bfq_get_next_queue_forced(bfqd, bfqq); -- -- __bfq_set_in_service_queue(bfqd, bfqq); -- return bfqq; -+ return ((struct bio *)io_struct)->bi_iter.bi_sector; - } - --static inline sector_t bfq_dist_from_last(struct bfq_data *bfqd, -- struct request *rq) -+static inline sector_t bfq_dist_from(sector_t pos1, -+ sector_t pos2) - { -- if (blk_rq_pos(rq) >= bfqd->last_position) -- return blk_rq_pos(rq) - bfqd->last_position; -+ if (pos1 >= pos2) -+ return pos1 - pos2; - else -- return bfqd->last_position - blk_rq_pos(rq); -+ return pos2 - pos1; - } - --/* -- * Return true if bfqq has no request pending and rq is close enough to -- * bfqd->last_position, or if rq is closer to bfqd->last_position than -- * bfqq->next_rq -- */ --static inline int bfq_rq_close(struct bfq_data *bfqd, struct request *rq) -+static inline int bfq_rq_close_to_sector(void *io_struct, bool request, -+ sector_t sector) - { -- return bfq_dist_from_last(bfqd, rq) <= BFQQ_SEEK_THR; -+ return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <= -+ BFQQ_SEEK_THR; - } - --static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) -+static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector) - { - struct rb_root *root = &bfqd->rq_pos_tree; - struct rb_node *parent, *node; - struct bfq_queue *__bfqq; -- sector_t sector = bfqd->last_position; - - if (RB_EMPTY_ROOT(root)) - return NULL; -@@ -1020,7 +1031,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) - * next_request position). - */ - __bfqq = rb_entry(parent, struct bfq_queue, pos_node); -- if (bfq_rq_close(bfqd, __bfqq->next_rq)) -+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) - return __bfqq; - - if (blk_rq_pos(__bfqq->next_rq) < sector) -@@ -1031,7 +1042,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) - return NULL; - - __bfqq = rb_entry(node, struct bfq_queue, pos_node); -- if (bfq_rq_close(bfqd, __bfqq->next_rq)) -+ if (bfq_rq_close_to_sector(__bfqq->next_rq, true, sector)) - return __bfqq; - - return NULL; -@@ -1040,14 +1051,12 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd) - /* - * bfqd - obvious - * cur_bfqq - passed in so that we don't decide that the current queue -- * is closely cooperating with itself. -- * -- * We are assuming that cur_bfqq has dispatched at least one request, -- * and that bfqd->last_position reflects a position on the disk associated -- * with the I/O issued by cur_bfqq. -+ * is closely cooperating with itself -+ * sector - used as a reference point to search for a close queue - */ - static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, -- struct bfq_queue *cur_bfqq) -+ struct bfq_queue *cur_bfqq, -+ sector_t sector) - { - struct bfq_queue *bfqq; - -@@ -1067,7 +1076,7 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, - * working closely on the same area of the disk. In that case, - * we can group them together and don't waste time idling. - */ -- bfqq = bfqq_close(bfqd); -+ bfqq = bfqq_close(bfqd, sector); - if (bfqq == NULL || bfqq == cur_bfqq) - return NULL; - -@@ -1094,6 +1103,305 @@ static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd, - return bfqq; - } - -+static struct bfq_queue * -+bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -+{ -+ int process_refs, new_process_refs; -+ struct bfq_queue *__bfqq; -+ -+ /* -+ * If there are no process references on the new_bfqq, then it is -+ * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain -+ * may have dropped their last reference (not just their last process -+ * reference). -+ */ -+ if (!bfqq_process_refs(new_bfqq)) -+ return NULL; -+ -+ /* Avoid a circular list and skip interim queue merges. */ -+ while ((__bfqq = new_bfqq->new_bfqq)) { -+ if (__bfqq == bfqq) -+ return NULL; -+ new_bfqq = __bfqq; -+ } -+ -+ process_refs = bfqq_process_refs(bfqq); -+ new_process_refs = bfqq_process_refs(new_bfqq); -+ /* -+ * If the process for the bfqq has gone away, there is no -+ * sense in merging the queues. -+ */ -+ if (process_refs == 0 || new_process_refs == 0) -+ return NULL; -+ -+ bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", -+ new_bfqq->pid); -+ -+ /* -+ * Merging is just a redirection: the requests of the process -+ * owning one of the two queues are redirected to the other queue. -+ * The latter queue, in its turn, is set as shared if this is the -+ * first time that the requests of some process are redirected to -+ * it. -+ * -+ * We redirect bfqq to new_bfqq and not the opposite, because we -+ * are in the context of the process owning bfqq, hence we have -+ * the io_cq of this process. So we can immediately configure this -+ * io_cq to redirect the requests of the process to new_bfqq. -+ * -+ * NOTE, even if new_bfqq coincides with the in-service queue, the -+ * io_cq of new_bfqq is not available, because, if the in-service -+ * queue is shared, bfqd->in_service_bic may not point to the -+ * io_cq of the in-service queue. -+ * Redirecting the requests of the process owning bfqq to the -+ * currently in-service queue is in any case the best option, as -+ * we feed the in-service queue with new requests close to the -+ * last request served and, by doing so, hopefully increase the -+ * throughput. -+ */ -+ bfqq->new_bfqq = new_bfqq; -+ atomic_add(process_refs, &new_bfqq->ref); -+ return new_bfqq; -+} -+ -+/* -+ * Attempt to schedule a merge of bfqq with the currently in-service queue -+ * or with a close queue among the scheduled queues. -+ * Return NULL if no merge was scheduled, a pointer to the shared bfq_queue -+ * structure otherwise. -+ */ -+static struct bfq_queue * -+bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ void *io_struct, bool request) -+{ -+ struct bfq_queue *in_service_bfqq, *new_bfqq; -+ -+ if (bfqq->new_bfqq) -+ return bfqq->new_bfqq; -+ -+ if (!io_struct) -+ return NULL; -+ -+ in_service_bfqq = bfqd->in_service_queue; -+ -+ if (in_service_bfqq == NULL || in_service_bfqq == bfqq || -+ !bfqd->in_service_bic) -+ goto check_scheduled; -+ -+ if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq)) -+ goto check_scheduled; -+ -+ if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq)) -+ goto check_scheduled; -+ -+ if (in_service_bfqq->entity.parent != bfqq->entity.parent) -+ goto check_scheduled; -+ -+ if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) && -+ bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) { -+ new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq); -+ if (new_bfqq != NULL) -+ return new_bfqq; /* Merge with in-service queue */ -+ } -+ -+ /* -+ * Check whether there is a cooperator among currently scheduled -+ * queues. The only thing we need is that the bio/request is not -+ * NULL, as we need it to establish whether a cooperator exists. -+ */ -+check_scheduled: -+ new_bfqq = bfq_close_cooperator(bfqd, bfqq, -+ bfq_io_struct_pos(io_struct, request)); -+ if (new_bfqq) -+ return bfq_setup_merge(bfqq, new_bfqq); -+ -+ return NULL; -+} -+ -+static inline void -+bfq_bfqq_save_state(struct bfq_queue *bfqq) -+{ -+ /* -+ * If bfqq->bic == NULL, the queue is already shared or its requests -+ * have already been redirected to a shared queue; both idle window -+ * and weight raising state have already been saved. Do nothing. -+ */ -+ if (bfqq->bic == NULL) -+ return; -+ if (bfqq->bic->wr_time_left) -+ /* -+ * This is the queue of a just-started process, and would -+ * deserve weight raising: we set wr_time_left to the full -+ * weight-raising duration to trigger weight-raising when -+ * and if the queue is split and the first request of the -+ * queue is enqueued. -+ */ -+ bfqq->bic->wr_time_left = bfq_wr_duration(bfqq->bfqd); -+ else if (bfqq->wr_coeff > 1) { -+ unsigned long wr_duration = -+ jiffies - bfqq->last_wr_start_finish; -+ /* -+ * It may happen that a queue's weight raising period lasts -+ * longer than its wr_cur_max_time, as weight raising is -+ * handled only when a request is enqueued or dispatched (it -+ * does not use any timer). If the weight raising period is -+ * about to end, don't save it. -+ */ -+ if (bfqq->wr_cur_max_time <= wr_duration) -+ bfqq->bic->wr_time_left = 0; -+ else -+ bfqq->bic->wr_time_left = -+ bfqq->wr_cur_max_time - wr_duration; -+ /* -+ * The bfq_queue is becoming shared or the requests of the -+ * process owning the queue are being redirected to a shared -+ * queue. Stop the weight raising period of the queue, as in -+ * both cases it should not be owned by an interactive or -+ * soft real-time application. -+ */ -+ bfq_bfqq_end_wr(bfqq); -+ } else -+ bfqq->bic->wr_time_left = 0; -+ bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); -+ bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); -+ bfqq->bic->cooperations++; -+ bfqq->bic->failed_cooperations = 0; -+} -+ -+static inline void -+bfq_get_bic_reference(struct bfq_queue *bfqq) -+{ -+ /* -+ * If bfqq->bic has a non-NULL value, the bic to which it belongs -+ * is about to begin using a shared bfq_queue. -+ */ -+ if (bfqq->bic) -+ atomic_long_inc(&bfqq->bic->icq.ioc->refcount); -+} -+ -+static void -+bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, -+ struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) -+{ -+ bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", -+ (long unsigned)new_bfqq->pid); -+ /* Save weight raising and idle window of the merged queues */ -+ bfq_bfqq_save_state(bfqq); -+ bfq_bfqq_save_state(new_bfqq); -+ if (bfq_bfqq_IO_bound(bfqq)) -+ bfq_mark_bfqq_IO_bound(new_bfqq); -+ bfq_clear_bfqq_IO_bound(bfqq); -+ /* -+ * Grab a reference to the bic, to prevent it from being destroyed -+ * before being possibly touched by a bfq_split_bfqq(). -+ */ -+ bfq_get_bic_reference(bfqq); -+ bfq_get_bic_reference(new_bfqq); -+ /* -+ * Merge queues (that is, let bic redirect its requests to new_bfqq) -+ */ -+ bic_set_bfqq(bic, new_bfqq, 1); -+ bfq_mark_bfqq_coop(new_bfqq); -+ /* -+ * new_bfqq now belongs to at least two bics (it is a shared queue): -+ * set new_bfqq->bic to NULL. bfqq either: -+ * - does not belong to any bic any more, and hence bfqq->bic must -+ * be set to NULL, or -+ * - is a queue whose owning bics have already been redirected to a -+ * different queue, hence the queue is destined to not belong to -+ * any bic soon and bfqq->bic is already NULL (therefore the next -+ * assignment causes no harm). -+ */ -+ new_bfqq->bic = NULL; -+ bfqq->bic = NULL; -+ bfq_put_queue(bfqq); -+} -+ -+static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq) -+{ -+ struct bfq_io_cq *bic = bfqq->bic; -+ struct bfq_data *bfqd = bfqq->bfqd; -+ -+ if (bic && bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh) { -+ bic->failed_cooperations++; -+ if (bic->failed_cooperations >= bfqd->bfq_failed_cooperations) -+ bic->cooperations = 0; -+ } -+} -+ -+static int bfq_allow_merge(struct request_queue *q, struct request *rq, -+ struct bio *bio) -+{ -+ struct bfq_data *bfqd = q->elevator->elevator_data; -+ struct bfq_io_cq *bic; -+ struct bfq_queue *bfqq, *new_bfqq; -+ -+ /* -+ * Disallow merge of a sync bio into an async request. -+ */ -+ if (bfq_bio_sync(bio) && !rq_is_sync(rq)) -+ return 0; -+ -+ /* -+ * Lookup the bfqq that this bio will be queued with. Allow -+ * merge only if rq is queued there. -+ * Queue lock is held here. -+ */ -+ bic = bfq_bic_lookup(bfqd, current->io_context); -+ if (bic == NULL) -+ return 0; -+ -+ bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); -+ /* -+ * We take advantage of this function to perform an early merge -+ * of the queues of possible cooperating processes. -+ */ -+ if (bfqq != NULL) { -+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); -+ if (new_bfqq != NULL) { -+ bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); -+ /* -+ * If we get here, the bio will be queued in the -+ * shared queue, i.e., new_bfqq, so use new_bfqq -+ * to decide whether bio and rq can be merged. -+ */ -+ bfqq = new_bfqq; -+ } else -+ bfq_bfqq_increase_failed_cooperations(bfqq); -+ } -+ -+ return bfqq == RQ_BFQQ(rq); -+} -+ -+static void __bfq_set_in_service_queue(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq) -+{ -+ if (bfqq != NULL) { -+ bfq_mark_bfqq_must_alloc(bfqq); -+ bfq_mark_bfqq_budget_new(bfqq); -+ bfq_clear_bfqq_fifo_expire(bfqq); -+ -+ bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8; -+ -+ bfq_log_bfqq(bfqd, bfqq, -+ "set_in_service_queue, cur-budget = %lu", -+ bfqq->entity.budget); -+ } -+ -+ bfqd->in_service_queue = bfqq; -+} -+ -+/* -+ * Get and set a new queue for service. -+ */ -+static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) -+{ -+ struct bfq_queue *bfqq = bfq_get_next_queue(bfqd); -+ -+ __bfq_set_in_service_queue(bfqd, bfqq); -+ return bfqq; -+} -+ - /* - * If enough samples have been computed, return the current max budget - * stored in bfqd, which is dynamically updated according to the -@@ -1237,63 +1545,6 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq) - return rq; - } - --/* -- * Must be called with the queue_lock held. -- */ --static int bfqq_process_refs(struct bfq_queue *bfqq) --{ -- int process_refs, io_refs; -- -- io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; -- process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st; -- BUG_ON(process_refs < 0); -- return process_refs; --} -- --static void bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) --{ -- int process_refs, new_process_refs; -- struct bfq_queue *__bfqq; -- -- /* -- * If there are no process references on the new_bfqq, then it is -- * unsafe to follow the ->new_bfqq chain as other bfqq's in the chain -- * may have dropped their last reference (not just their last process -- * reference). -- */ -- if (!bfqq_process_refs(new_bfqq)) -- return; -- -- /* Avoid a circular list and skip interim queue merges. */ -- while ((__bfqq = new_bfqq->new_bfqq)) { -- if (__bfqq == bfqq) -- return; -- new_bfqq = __bfqq; -- } -- -- process_refs = bfqq_process_refs(bfqq); -- new_process_refs = bfqq_process_refs(new_bfqq); -- /* -- * If the process for the bfqq has gone away, there is no -- * sense in merging the queues. -- */ -- if (process_refs == 0 || new_process_refs == 0) -- return; -- -- /* -- * Merge in the direction of the lesser amount of work. -- */ -- if (new_process_refs >= process_refs) { -- bfqq->new_bfqq = new_bfqq; -- atomic_add(process_refs, &new_bfqq->ref); -- } else { -- new_bfqq->new_bfqq = bfqq; -- atomic_add(new_process_refs, &bfqq->ref); -- } -- bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", -- new_bfqq->pid); --} -- - static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq) - { - struct bfq_entity *entity = &bfqq->entity; -@@ -2011,7 +2262,7 @@ static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq) - */ - static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - { -- struct bfq_queue *bfqq, *new_bfqq = NULL; -+ struct bfq_queue *bfqq; - struct request *next_rq; - enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT; - -@@ -2021,17 +2272,6 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - - bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); - -- /* -- * If another queue has a request waiting within our mean seek -- * distance, let it run. The expire code will check for close -- * cooperators and put the close queue at the front of the -- * service tree. If possible, merge the expiring queue with the -- * new bfqq. -- */ -- new_bfqq = bfq_close_cooperator(bfqd, bfqq); -- if (new_bfqq != NULL && bfqq->new_bfqq == NULL) -- bfq_setup_merge(bfqq, new_bfqq); -- - if (bfq_may_expire_for_budg_timeout(bfqq) && - !timer_pending(&bfqd->idle_slice_timer) && - !bfq_bfqq_must_idle(bfqq)) -@@ -2070,10 +2310,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); - } -- if (new_bfqq == NULL) -- goto keep_queue; -- else -- goto expire; -+ goto keep_queue; - } - } - -@@ -2082,40 +2319,30 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) - * in flight (possibly waiting for a completion) or is idling for a - * new request, then keep it. - */ -- if (new_bfqq == NULL && (timer_pending(&bfqd->idle_slice_timer) || -- (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq)))) { -+ if (timer_pending(&bfqd->idle_slice_timer) || -+ (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) { - bfqq = NULL; - goto keep_queue; -- } else if (new_bfqq != NULL && timer_pending(&bfqd->idle_slice_timer)) { -- /* -- * Expiring the queue because there is a close cooperator, -- * cancel timer. -- */ -- bfq_clear_bfqq_wait_request(bfqq); -- del_timer(&bfqd->idle_slice_timer); - } - - reason = BFQ_BFQQ_NO_MORE_REQUESTS; - expire: - bfq_bfqq_expire(bfqd, bfqq, 0, reason); - new_queue: -- bfqq = bfq_set_in_service_queue(bfqd, new_bfqq); -+ bfqq = bfq_set_in_service_queue(bfqd); - bfq_log(bfqd, "select_queue: new queue %d returned", - bfqq != NULL ? bfqq->pid : 0); - keep_queue: - return bfqq; - } - --static void bfq_update_wr_data(struct bfq_data *bfqd, -- struct bfq_queue *bfqq) -+static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) - { -- if (bfqq->wr_coeff > 1) { /* queue is being boosted */ -- struct bfq_entity *entity = &bfqq->entity; -- -+ struct bfq_entity *entity = &bfqq->entity; -+ if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ - bfq_log_bfqq(bfqd, bfqq, - "raising period dur %u/%u msec, old coeff %u, w %d(%d)", -- jiffies_to_msecs(jiffies - -- bfqq->last_wr_start_finish), -+ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), - jiffies_to_msecs(bfqq->wr_cur_max_time), - bfqq->wr_coeff, - bfqq->entity.weight, bfqq->entity.orig_weight); -@@ -2124,11 +2351,15 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, - entity->orig_weight * bfqq->wr_coeff); - if (entity->ioprio_changed) - bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); -+ - /* - * If too much time has elapsed from the beginning -- * of this weight-raising, stop it. -+ * of this weight-raising period, or the queue has -+ * exceeded the acceptable number of cooperations, -+ * stop it. - */ -- if (time_is_before_jiffies(bfqq->last_wr_start_finish + -+ if (bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || -+ time_is_before_jiffies(bfqq->last_wr_start_finish + - bfqq->wr_cur_max_time)) { - bfqq->last_wr_start_finish = jiffies; - bfq_log_bfqq(bfqd, bfqq, -@@ -2136,11 +2367,13 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, - bfqq->last_wr_start_finish, - jiffies_to_msecs(bfqq->wr_cur_max_time)); - bfq_bfqq_end_wr(bfqq); -- __bfq_entity_update_weight_prio( -- bfq_entity_service_tree(entity), -- entity); - } - } -+ /* Update weight both if it must be raised and if it must be lowered */ -+ if ((entity->weight > entity->orig_weight) != (bfqq->wr_coeff > 1)) -+ __bfq_entity_update_weight_prio( -+ bfq_entity_service_tree(entity), -+ entity); - } - - /* -@@ -2377,6 +2610,25 @@ static inline void bfq_init_icq(struct io_cq *icq) - struct bfq_io_cq *bic = icq_to_bic(icq); - - bic->ttime.last_end_request = jiffies; -+ /* -+ * A newly created bic indicates that the process has just -+ * started doing I/O, and is probably mapping into memory its -+ * executable and libraries: it definitely needs weight raising. -+ * There is however the possibility that the process performs, -+ * for a while, I/O close to some other process. EQM intercepts -+ * this behavior and may merge the queue corresponding to the -+ * process with some other queue, BEFORE the weight of the queue -+ * is raised. Merged queues are not weight-raised (they are assumed -+ * to belong to processes that benefit only from high throughput). -+ * If the merge is basically the consequence of an accident, then -+ * the queue will be split soon and will get back its old weight. -+ * It is then important to write down somewhere that this queue -+ * does need weight raising, even if it did not make it to get its -+ * weight raised before being merged. To this purpose, we overload -+ * the field raising_time_left and assign 1 to it, to mark the queue -+ * as needing weight raising. -+ */ -+ bic->wr_time_left = 1; - } - - static void bfq_exit_icq(struct io_cq *icq) -@@ -2390,6 +2642,13 @@ static void bfq_exit_icq(struct io_cq *icq) - } - - if (bic->bfqq[BLK_RW_SYNC]) { -+ /* -+ * If the bic is using a shared queue, put the reference -+ * taken on the io_context when the bic started using a -+ * shared bfq_queue. -+ */ -+ if (bfq_bfqq_coop(bic->bfqq[BLK_RW_SYNC])) -+ put_io_context(icq->ioc); - bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); - bic->bfqq[BLK_RW_SYNC] = NULL; - } -@@ -2678,6 +2937,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, - if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) - return; - -+ /* Idle window just restored, statistics are meaningless. */ -+ if (bfq_bfqq_just_split(bfqq)) -+ return; -+ - enable_idle = bfq_bfqq_idle_window(bfqq); - - if (atomic_read(&bic->icq.ioc->active_ref) == 0 || -@@ -2725,6 +2988,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || - !BFQQ_SEEKY(bfqq)) - bfq_update_idle_window(bfqd, bfqq, bic); -+ bfq_clear_bfqq_just_split(bfqq); - - bfq_log_bfqq(bfqd, bfqq, - "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", -@@ -2785,13 +3049,49 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, - static void bfq_insert_request(struct request_queue *q, struct request *rq) - { - struct bfq_data *bfqd = q->elevator->elevator_data; -- struct bfq_queue *bfqq = RQ_BFQQ(rq); -+ struct bfq_queue *bfqq = RQ_BFQQ(rq), *new_bfqq; - - assert_spin_locked(bfqd->queue->queue_lock); -+ -+ /* -+ * An unplug may trigger a requeue of a request from the device -+ * driver: make sure we are in process context while trying to -+ * merge two bfq_queues. -+ */ -+ if (!in_interrupt()) { -+ new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); -+ if (new_bfqq != NULL) { -+ if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq) -+ new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1); -+ /* -+ * Release the request's reference to the old bfqq -+ * and make sure one is taken to the shared queue. -+ */ -+ new_bfqq->allocated[rq_data_dir(rq)]++; -+ bfqq->allocated[rq_data_dir(rq)]--; -+ atomic_inc(&new_bfqq->ref); -+ bfq_put_queue(bfqq); -+ if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) -+ bfq_merge_bfqqs(bfqd, RQ_BIC(rq), -+ bfqq, new_bfqq); -+ rq->elv.priv[1] = new_bfqq; -+ bfqq = new_bfqq; -+ } else -+ bfq_bfqq_increase_failed_cooperations(bfqq); -+ } -+ - bfq_init_prio_data(bfqq, RQ_BIC(rq)); - - bfq_add_request(rq); - -+ /* -+ * Here a newly-created bfq_queue has already started a weight-raising -+ * period: clear raising_time_left to prevent bfq_bfqq_save_state() -+ * from assigning it a full weight-raising period. See the detailed -+ * comments about this field in bfq_init_icq(). -+ */ -+ if (bfqq->bic != NULL) -+ bfqq->bic->wr_time_left = 0; - rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; - list_add_tail(&rq->queuelist, &bfqq->fifo); - -@@ -2956,18 +3256,6 @@ static void bfq_put_request(struct request *rq) - } - } - --static struct bfq_queue * --bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, -- struct bfq_queue *bfqq) --{ -- bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", -- (long unsigned)bfqq->new_bfqq->pid); -- bic_set_bfqq(bic, bfqq->new_bfqq, 1); -- bfq_mark_bfqq_coop(bfqq->new_bfqq); -- bfq_put_queue(bfqq); -- return bic_to_bfqq(bic, 1); --} -- - /* - * Returns NULL if a new bfqq should be allocated, or the old bfqq if this - * was the last process referring to said bfqq. -@@ -2976,6 +3264,9 @@ static struct bfq_queue * - bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) - { - bfq_log_bfqq(bfqq->bfqd, bfqq, "splitting queue"); -+ -+ put_io_context(bic->icq.ioc); -+ - if (bfqq_process_refs(bfqq) == 1) { - bfqq->pid = current->pid; - bfq_clear_bfqq_coop(bfqq); -@@ -3004,6 +3295,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, - struct bfq_queue *bfqq; - struct bfq_group *bfqg; - unsigned long flags; -+ bool split = false; - - might_sleep_if(gfp_mask & __GFP_WAIT); - -@@ -3022,24 +3314,14 @@ new_queue: - bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask); - bic_set_bfqq(bic, bfqq, is_sync); - } else { -- /* -- * If the queue was seeky for too long, break it apart. -- */ -+ /* If the queue was seeky for too long, break it apart. */ - if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { - bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); - bfqq = bfq_split_bfqq(bic, bfqq); -+ split = true; - if (!bfqq) - goto new_queue; - } -- -- /* -- * Check to see if this queue is scheduled to merge with -- * another closely cooperating queue. The merging of queues -- * happens here as it must be done in process context. -- * The reference on new_bfqq was taken in merge_bfqqs. -- */ -- if (bfqq->new_bfqq != NULL) -- bfqq = bfq_merge_bfqqs(bfqd, bic, bfqq); - } - - bfqq->allocated[rw]++; -@@ -3050,6 +3332,26 @@ new_queue: - rq->elv.priv[0] = bic; - rq->elv.priv[1] = bfqq; - -+ /* -+ * If a bfq_queue has only one process reference, it is owned -+ * by only one bfq_io_cq: we can set the bic field of the -+ * bfq_queue to the address of that structure. Also, if the -+ * queue has just been split, mark a flag so that the -+ * information is available to the other scheduler hooks. -+ */ -+ if (bfqq_process_refs(bfqq) == 1) { -+ bfqq->bic = bic; -+ if (split) { -+ bfq_mark_bfqq_just_split(bfqq); -+ /* -+ * If the queue has just been split from a shared -+ * queue, restore the idle window and the possible -+ * weight raising period. -+ */ -+ bfq_bfqq_resume_state(bfqq, bic); -+ } -+ } -+ - spin_unlock_irqrestore(q->queue_lock, flags); - - return 0; -diff --git a/block/bfq-sched.c b/block/bfq-sched.c -index c4831b7..546a254 100644 ---- a/block/bfq-sched.c -+++ b/block/bfq-sched.c -@@ -1084,34 +1084,6 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) - return bfqq; - } - --/* -- * Forced extraction of the given queue. -- */ --static void bfq_get_next_queue_forced(struct bfq_data *bfqd, -- struct bfq_queue *bfqq) --{ -- struct bfq_entity *entity; -- struct bfq_sched_data *sd; -- -- BUG_ON(bfqd->in_service_queue != NULL); -- -- entity = &bfqq->entity; -- /* -- * Bubble up extraction/update from the leaf to the root. -- */ -- for_each_entity(entity) { -- sd = entity->sched_data; -- bfq_update_budget(entity); -- bfq_update_vtime(bfq_entity_service_tree(entity)); -- bfq_active_extract(bfq_entity_service_tree(entity), entity); -- sd->in_service_entity = entity; -- sd->next_in_service = NULL; -- entity->service = 0; -- } -- -- return; --} -- - static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) - { - if (bfqd->in_service_bic != NULL) { -diff --git a/block/bfq.h b/block/bfq.h -index aeca08e..4834b70 100644 ---- a/block/bfq.h -+++ b/block/bfq.h -@@ -215,18 +215,21 @@ struct bfq_group; - * idle @bfq_queue with no outstanding requests, then - * the task associated with the queue it is deemed as - * soft real-time (see the comments to the function -- * bfq_bfqq_softrt_next_start()). -+ * bfq_bfqq_softrt_next_start()) - * @last_idle_bklogged: time of the last transition of the @bfq_queue from - * idle to backlogged - * @service_from_backlogged: cumulative service received from the @bfq_queue - * since the last transition from idle to - * backlogged -+ * @bic: pointer to the bfq_io_cq owning the bfq_queue, set to %NULL if the -+ * queue is shared - * -- * A bfq_queue is a leaf request queue; it can be associated with an io_context -- * or more, if it is async or shared between cooperating processes. @cgroup -- * holds a reference to the cgroup, to be sure that it does not disappear while -- * a bfqq still references it (mostly to avoid races between request issuing and -- * task migration followed by cgroup destruction). -+ * A bfq_queue is a leaf request queue; it can be associated with an -+ * io_context or more, if it is async or shared between cooperating -+ * processes. @cgroup holds a reference to the cgroup, to be sure that it -+ * does not disappear while a bfqq still references it (mostly to avoid -+ * races between request issuing and task migration followed by cgroup -+ * destruction). - * All the fields are protected by the queue lock of the containing bfqd. - */ - struct bfq_queue { -@@ -264,6 +267,7 @@ struct bfq_queue { - unsigned int requests_within_timer; - - pid_t pid; -+ struct bfq_io_cq *bic; - - /* weight-raising fields */ - unsigned long wr_cur_max_time; -@@ -293,12 +297,34 @@ struct bfq_ttime { - * @icq: associated io_cq structure - * @bfqq: array of two process queues, the sync and the async - * @ttime: associated @bfq_ttime struct -+ * @wr_time_left: snapshot of the time left before weight raising ends -+ * for the sync queue associated to this process; this -+ * snapshot is taken to remember this value while the weight -+ * raising is suspended because the queue is merged with a -+ * shared queue, and is used to set @raising_cur_max_time -+ * when the queue is split from the shared queue and its -+ * weight is raised again -+ * @saved_idle_window: same purpose as the previous field for the idle -+ * window -+ * @saved_IO_bound: same purpose as the previous two fields for the I/O -+ * bound classification of a queue -+ * @cooperations: counter of consecutive successful queue merges underwent -+ * by any of the process' @bfq_queues -+ * @failed_cooperations: counter of consecutive failed queue merges of any -+ * of the process' @bfq_queues - */ - struct bfq_io_cq { - struct io_cq icq; /* must be the first member */ - struct bfq_queue *bfqq[2]; - struct bfq_ttime ttime; - int ioprio; -+ -+ unsigned int wr_time_left; -+ unsigned int saved_idle_window; -+ unsigned int saved_IO_bound; -+ -+ unsigned int cooperations; -+ unsigned int failed_cooperations; - }; - - enum bfq_device_speed { -@@ -511,7 +537,7 @@ enum bfqq_state_flags { - BFQ_BFQQ_FLAG_prio_changed, /* task priority has changed */ - BFQ_BFQQ_FLAG_sync, /* synchronous queue */ - BFQ_BFQQ_FLAG_budget_new, /* no completion with this budget */ -- BFQ_BFQQ_FLAG_IO_bound, /* -+ BFQ_BFQQ_FLAG_IO_bound, /* - * bfqq has timed-out at least once - * having consumed at most 2/10 of - * its budget -@@ -520,12 +546,13 @@ enum bfqq_state_flags { - * bfqq has proved to be slow and - * seeky until budget timeout - */ -- BFQ_BFQQ_FLAG_softrt_update, /* -+ BFQ_BFQQ_FLAG_softrt_update, /* - * may need softrt-next-start - * update - */ - BFQ_BFQQ_FLAG_coop, /* bfqq is shared */ -- BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be splitted */ -+ BFQ_BFQQ_FLAG_split_coop, /* shared bfqq will be split */ -+ BFQ_BFQQ_FLAG_just_split, /* queue has just been split */ - }; - - #define BFQ_BFQQ_FNS(name) \ -@@ -554,6 +581,7 @@ BFQ_BFQQ_FNS(IO_bound); - BFQ_BFQQ_FNS(constantly_seeky); - BFQ_BFQQ_FNS(coop); - BFQ_BFQQ_FNS(split_coop); -+BFQ_BFQQ_FNS(just_split); - BFQ_BFQQ_FNS(softrt_update); - #undef BFQ_BFQQ_FNS - --- -1.9.3 - diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0004-tuxonice-for-linux.patch b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0004-tuxonice-for-linux.patch deleted file mode 100644 index 706f498ab..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0004-tuxonice-for-linux.patch +++ /dev/null @@ -1,22156 +0,0 @@ -# Calculate format=diff os_linux_system==desktop -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 30a8ad0d..06d87ee 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -3428,6 +3428,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. - HIGHMEM regardless of setting - of CONFIG_HIGHPTE. - -+ uuid_debug= (Boolean) whether to enable debugging of TuxOnIce's -+ uuid support. -+ - vdso= [X86,SH] - On X86_32, this is an alias for vdso32=. Otherwise: - -diff --git a/Documentation/power/tuxonice-internals.txt b/Documentation/power/tuxonice-internals.txt -new file mode 100644 -index 0000000..7a96186 ---- /dev/null -+++ b/Documentation/power/tuxonice-internals.txt -@@ -0,0 +1,477 @@ -+ TuxOnIce 3.0 Internal Documentation. -+ Updated to 26 March 2009 -+ -+1. Introduction. -+ -+ TuxOnIce 3.0 is an addition to the Linux Kernel, designed to -+ allow the user to quickly shutdown and quickly boot a computer, without -+ needing to close documents or programs. It is equivalent to the -+ hibernate facility in some laptops. This implementation, however, -+ requires no special BIOS or hardware support. -+ -+ The code in these files is based upon the original implementation -+ prepared by Gabor Kuti and additional work by Pavel Machek and a -+ host of others. This code has been substantially reworked by Nigel -+ Cunningham, again with the help and testing of many others, not the -+ least of whom is Michael Frank. At its heart, however, the operation is -+ essentially the same as Gabor's version. -+ -+2. Overview of operation. -+ -+ The basic sequence of operations is as follows: -+ -+ a. Quiesce all other activity. -+ b. Ensure enough memory and storage space are available, and attempt -+ to free memory/storage if necessary. -+ c. Allocate the required memory and storage space. -+ d. Write the image. -+ e. Power down. -+ -+ There are a number of complicating factors which mean that things are -+ not as simple as the above would imply, however... -+ -+ o The activity of each process must be stopped at a point where it will -+ not be holding locks necessary for saving the image, or unexpectedly -+ restart operations due to something like a timeout and thereby make -+ our image inconsistent. -+ -+ o It is desirous that we sync outstanding I/O to disk before calculating -+ image statistics. This reduces corruption if one should suspend but -+ then not resume, and also makes later parts of the operation safer (see -+ below). -+ -+ o We need to get as close as we can to an atomic copy of the data. -+ Inconsistencies in the image will result in inconsistent memory contents at -+ resume time, and thus in instability of the system and/or file system -+ corruption. This would appear to imply a maximum image size of one half of -+ the amount of RAM, but we have a solution... (again, below). -+ -+ o In 2.6, we choose to play nicely with the other suspend-to-disk -+ implementations. -+ -+3. Detailed description of internals. -+ -+ a. Quiescing activity. -+ -+ Safely quiescing the system is achieved using three separate but related -+ aspects. -+ -+ First, we note that the vast majority of processes don't need to run during -+ suspend. They can be 'frozen'. We therefore implement a refrigerator -+ routine, which processes enter and in which they remain until the cycle is -+ complete. Processes enter the refrigerator via try_to_freeze() invocations -+ at appropriate places. A process cannot be frozen in any old place. It -+ must not be holding locks that will be needed for writing the image or -+ freezing other processes. For this reason, userspace processes generally -+ enter the refrigerator via the signal handling code, and kernel threads at -+ the place in their event loops where they drop locks and yield to other -+ processes or sleep. -+ -+ The task of freezing processes is complicated by the fact that there can be -+ interdependencies between processes. Freezing process A before process B may -+ mean that process B cannot be frozen, because it stops at waiting for -+ process A rather than in the refrigerator. This issue is seen where -+ userspace waits on freezeable kernel threads or fuse filesystem threads. To -+ address this issue, we implement the following algorithm for quiescing -+ activity: -+ -+ - Freeze filesystems (including fuse - userspace programs starting -+ new requests are immediately frozen; programs already running -+ requests complete their work before being frozen in the next -+ step) -+ - Freeze userspace -+ - Thaw filesystems (this is safe now that userspace is frozen and no -+ fuse requests are outstanding). -+ - Invoke sys_sync (noop on fuse). -+ - Freeze filesystems -+ - Freeze kernel threads -+ -+ If we need to free memory, we thaw kernel threads and filesystems, but not -+ userspace. We can then free caches without worrying about deadlocks due to -+ swap files being on frozen filesystems or such like. -+ -+ b. Ensure enough memory & storage are available. -+ -+ We have a number of constraints to meet in order to be able to successfully -+ suspend and resume. -+ -+ First, the image will be written in two parts, described below. One of these -+ parts needs to have an atomic copy made, which of course implies a maximum -+ size of one half of the amount of system memory. The other part ('pageset') -+ is not atomically copied, and can therefore be as large or small as desired. -+ -+ Second, we have constraints on the amount of storage available. In these -+ calculations, we may also consider any compression that will be done. The -+ cryptoapi module allows the user to configure an expected compression ratio. -+ -+ Third, the user can specify an arbitrary limit on the image size, in -+ megabytes. This limit is treated as a soft limit, so that we don't fail the -+ attempt to suspend if we cannot meet this constraint. -+ -+ c. Allocate the required memory and storage space. -+ -+ Having done the initial freeze, we determine whether the above constraints -+ are met, and seek to allocate the metadata for the image. If the constraints -+ are not met, or we fail to allocate the required space for the metadata, we -+ seek to free the amount of memory that we calculate is needed and try again. -+ We allow up to four iterations of this loop before aborting the cycle. If we -+ do fail, it should only be because of a bug in TuxOnIce's calculations. -+ -+ These steps are merged together in the prepare_image function, found in -+ prepare_image.c. The functions are merged because of the cyclical nature -+ of the problem of calculating how much memory and storage is needed. Since -+ the data structures containing the information about the image must -+ themselves take memory and use storage, the amount of memory and storage -+ required changes as we prepare the image. Since the changes are not large, -+ only one or two iterations will be required to achieve a solution. -+ -+ The recursive nature of the algorithm is miminised by keeping user space -+ frozen while preparing the image, and by the fact that our records of which -+ pages are to be saved and which pageset they are saved in use bitmaps (so -+ that changes in number or fragmentation of the pages to be saved don't -+ feedback via changes in the amount of memory needed for metadata). The -+ recursiveness is thus limited to any extra slab pages allocated to store the -+ extents that record storage used, and the effects of seeking to free memory. -+ -+ d. Write the image. -+ -+ We previously mentioned the need to create an atomic copy of the data, and -+ the half-of-memory limitation that is implied in this. This limitation is -+ circumvented by dividing the memory to be saved into two parts, called -+ pagesets. -+ -+ Pageset2 contains most of the page cache - the pages on the active and -+ inactive LRU lists that aren't needed or modified while TuxOnIce is -+ running, so they can be safely written without an atomic copy. They are -+ therefore saved first and reloaded last. While saving these pages, -+ TuxOnIce carefully ensures that the work of writing the pages doesn't make -+ the image inconsistent. With the support for Kernel (Video) Mode Setting -+ going into the kernel at the time of writing, we need to check for pages -+ on the LRU that are used by KMS, and exclude them from pageset2. They are -+ atomically copied as part of pageset 1. -+ -+ Once pageset2 has been saved, we prepare to do the atomic copy of remaining -+ memory. As part of the preparation, we power down drivers, thereby providing -+ them with the opportunity to have their state recorded in the image. The -+ amount of memory allocated by drivers for this is usually negligible, but if -+ DRI is in use, video drivers may require significants amounts. Ideally we -+ would be able to query drivers while preparing the image as to the amount of -+ memory they will need. Unfortunately no such mechanism exists at the time of -+ writing. For this reason, TuxOnIce allows the user to set an -+ 'extra_pages_allowance', which is used to seek to ensure sufficient memory -+ is available for drivers at this point. TuxOnIce also lets the user set this -+ value to 0. In this case, a test driver suspend is done while preparing the -+ image, and the difference (plus a margin) used instead. TuxOnIce will also -+ automatically restart the hibernation process (twice at most) if it finds -+ that the extra pages allowance is not sufficient. It will then use what was -+ actually needed (plus a margin, again). Failure to hibernate should thus -+ be an extremely rare occurence. -+ -+ Having suspended the drivers, we save the CPU context before making an -+ atomic copy of pageset1, resuming the drivers and saving the atomic copy. -+ After saving the two pagesets, we just need to save our metadata before -+ powering down. -+ -+ As we mentioned earlier, the contents of pageset2 pages aren't needed once -+ they've been saved. We therefore use them as the destination of our atomic -+ copy. In the unlikely event that pageset1 is larger, extra pages are -+ allocated while the image is being prepared. This is normally only a real -+ possibility when the system has just been booted and the page cache is -+ small. -+ -+ This is where we need to be careful about syncing, however. Pageset2 will -+ probably contain filesystem meta data. If this is overwritten with pageset1 -+ and then a sync occurs, the filesystem will be corrupted - at least until -+ resume time and another sync of the restored data. Since there is a -+ possibility that the user might not resume or (may it never be!) that -+ TuxOnIce might oops, we do our utmost to avoid syncing filesystems after -+ copying pageset1. -+ -+ e. Power down. -+ -+ Powering down uses standard kernel routines. TuxOnIce supports powering down -+ using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off. -+ Supporting suspend to ram (S3) as a power off option might sound strange, -+ but it allows the user to quickly get their system up and running again if -+ the battery doesn't run out (we just need to re-read the overwritten pages) -+ and if the battery does run out (or the user removes power), they can still -+ resume. -+ -+4. Data Structures. -+ -+ TuxOnIce uses three main structures to store its metadata and configuration -+ information: -+ -+ a) Pageflags bitmaps. -+ -+ TuxOnIce records which pages will be in pageset1, pageset2, the destination -+ of the atomic copy and the source of the atomically restored image using -+ bitmaps. The code used is that written for swsusp, with small improvements -+ to match TuxOnIce's requirements. -+ -+ The pageset1 bitmap is thus easily stored in the image header for use at -+ resume time. -+ -+ As mentioned above, using bitmaps also means that the amount of memory and -+ storage required for recording the above information is constant. This -+ greatly simplifies the work of preparing the image. In earlier versions of -+ TuxOnIce, extents were used to record which pages would be stored. In that -+ case, however, eating memory could result in greater fragmentation of the -+ lists of pages, which in turn required more memory to store the extents and -+ more storage in the image header. These could in turn require further -+ freeing of memory, and another iteration. All of this complexity is removed -+ by having bitmaps. -+ -+ Bitmaps also make a lot of sense because TuxOnIce only ever iterates -+ through the lists. There is therefore no cost to not being able to find the -+ nth page in order 0 time. We only need to worry about the cost of finding -+ the n+1th page, given the location of the nth page. Bitwise optimisations -+ help here. -+ -+ b) Extents for block data. -+ -+ TuxOnIce supports writing the image to multiple block devices. In the case -+ of swap, multiple partitions and/or files may be in use, and we happily use -+ them all (with the exception of compcache pages, which we allocate but do -+ not use). This use of multiple block devices is accomplished as follows: -+ -+ Whatever the actual source of the allocated storage, the destination of the -+ image can be viewed in terms of one or more block devices, and on each -+ device, a list of sectors. To simplify matters, we only use contiguous, -+ PAGE_SIZE aligned sectors, like the swap code does. -+ -+ Since sector numbers on each bdev may well not start at 0, it makes much -+ more sense to use extents here. Contiguous ranges of pages can thus be -+ represented in the extents by contiguous values. -+ -+ Variations in block size are taken account of in transforming this data -+ into the parameters for bio submission. -+ -+ We can thus implement a layer of abstraction wherein the core of TuxOnIce -+ doesn't have to worry about which device we're currently writing to or -+ where in the device we are. It simply requests that the next page in the -+ pageset or header be written, leaving the details to this lower layer. -+ The lower layer remembers where in the sequence of devices and blocks each -+ pageset starts. The header always starts at the beginning of the allocated -+ storage. -+ -+ So extents are: -+ -+ struct extent { -+ unsigned long minimum, maximum; -+ struct extent *next; -+ } -+ -+ These are combined into chains of extents for a device: -+ -+ struct extent_chain { -+ int size; /* size of the extent ie sum (max-min+1) */ -+ int allocs, frees; -+ char *name; -+ struct extent *first, *last_touched; -+ }; -+ -+ For each bdev, we need to store a little more info: -+ -+ struct suspend_bdev_info { -+ struct block_device *bdev; -+ dev_t dev_t; -+ int bmap_shift; -+ int blocks_per_page; -+ }; -+ -+ The dev_t is used to identify the device in the stored image. As a result, -+ we expect devices at resume time to have the same major and minor numbers -+ as they had while suspending. This is primarily a concern where the user -+ utilises LVM for storage, as they will need to dmsetup their partitions in -+ such a way as to maintain this consistency at resume time. -+ -+ bmap_shift and blocks_per_page apply the effects of variations in blocks -+ per page settings for the filesystem and underlying bdev. For most -+ filesystems, these are the same, but for xfs, they can have independant -+ values. -+ -+ Combining these two structures together, we have everything we need to -+ record what devices and what blocks on each device are being used to -+ store the image, and to submit i/o using bio_submit. -+ -+ The last elements in the picture are a means of recording how the storage -+ is being used. -+ -+ We do this first and foremost by implementing a layer of abstraction on -+ top of the devices and extent chains which allows us to view however many -+ devices there might be as one long storage tape, with a single 'head' that -+ tracks a 'current position' on the tape: -+ -+ struct extent_iterate_state { -+ struct extent_chain *chains; -+ int num_chains; -+ int current_chain; -+ struct extent *current_extent; -+ unsigned long current_offset; -+ }; -+ -+ That is, *chains points to an array of size num_chains of extent chains. -+ For the filewriter, this is always a single chain. For the swapwriter, the -+ array is of size MAX_SWAPFILES. -+ -+ current_chain, current_extent and current_offset thus point to the current -+ index in the chains array (and into a matching array of struct -+ suspend_bdev_info), the current extent in that chain (to optimise access), -+ and the current value in the offset. -+ -+ The image is divided into three parts: -+ - The header -+ - Pageset 1 -+ - Pageset 2 -+ -+ The header always starts at the first device and first block. We know its -+ size before we begin to save the image because we carefully account for -+ everything that will be stored in it. -+ -+ The second pageset (LRU) is stored first. It begins on the next page after -+ the end of the header. -+ -+ The first pageset is stored second. It's start location is only known once -+ pageset2 has been saved, since pageset2 may be compressed as it is written. -+ This location is thus recorded at the end of saving pageset2. It is page -+ aligned also. -+ -+ Since this information is needed at resume time, and the location of extents -+ in memory will differ at resume time, this needs to be stored in a portable -+ way: -+ -+ struct extent_iterate_saved_state { -+ int chain_num; -+ int extent_num; -+ unsigned long offset; -+ }; -+ -+ We can thus implement a layer of abstraction wherein the core of TuxOnIce -+ doesn't have to worry about which device we're currently writing to or -+ where in the device we are. It simply requests that the next page in the -+ pageset or header be written, leaving the details to this layer, and -+ invokes the routines to remember and restore the position, without having -+ to worry about the details of how the data is arranged on disk or such like. -+ -+ c) Modules -+ -+ One aim in designing TuxOnIce was to make it flexible. We wanted to allow -+ for the implementation of different methods of transforming a page to be -+ written to disk and different methods of getting the pages stored. -+ -+ In early versions (the betas and perhaps Suspend1), compression support was -+ inlined in the image writing code, and the data structures and code for -+ managing swap were intertwined with the rest of the code. A number of people -+ had expressed interest in implementing image encryption, and alternative -+ methods of storing the image. -+ -+ In order to achieve this, TuxOnIce was given a modular design. -+ -+ A module is a single file which encapsulates the functionality needed -+ to transform a pageset of data (encryption or compression, for example), -+ or to write the pageset to a device. The former type of module is called -+ a 'page-transformer', the later a 'writer'. -+ -+ Modules are linked together in pipeline fashion. There may be zero or more -+ page transformers in a pipeline, and there is always exactly one writer. -+ The pipeline follows this pattern: -+ -+ --------------------------------- -+ | TuxOnIce Core | -+ --------------------------------- -+ | -+ | -+ --------------------------------- -+ | Page transformer 1 | -+ --------------------------------- -+ | -+ | -+ --------------------------------- -+ | Page transformer 2 | -+ --------------------------------- -+ | -+ | -+ --------------------------------- -+ | Writer | -+ --------------------------------- -+ -+ During the writing of an image, the core code feeds pages one at a time -+ to the first module. This module performs whatever transformations it -+ implements on the incoming data, completely consuming the incoming data and -+ feeding output in a similar manner to the next module. -+ -+ All routines are SMP safe, and the final result of the transformations is -+ written with an index (provided by the core) and size of the output by the -+ writer. As a result, we can have multithreaded I/O without needing to -+ worry about the sequence in which pages are written (or read). -+ -+ During reading, the pipeline works in the reverse direction. The core code -+ calls the first module with the address of a buffer which should be filled. -+ (Note that the buffer size is always PAGE_SIZE at this time). This module -+ will in turn request data from the next module and so on down until the -+ writer is made to read from the stored image. -+ -+ Part of definition of the structure of a module thus looks like this: -+ -+ int (*rw_init) (int rw, int stream_number); -+ int (*rw_cleanup) (int rw); -+ int (*write_chunk) (struct page *buffer_page); -+ int (*read_chunk) (struct page *buffer_page, int sync); -+ -+ It should be noted that the _cleanup routine may be called before the -+ full stream of data has been read or written. While writing the image, -+ the user may (depending upon settings) choose to abort suspending, and -+ if we are in the midst of writing the last portion of the image, a portion -+ of the second pageset may be reread. This may also happen if an error -+ occurs and we seek to abort the process of writing the image. -+ -+ The modular design is also useful in a number of other ways. It provides -+ a means where by we can add support for: -+ -+ - providing overall initialisation and cleanup routines; -+ - serialising configuration information in the image header; -+ - providing debugging information to the user; -+ - determining memory and image storage requirements; -+ - dis/enabling components at run-time; -+ - configuring the module (see below); -+ -+ ...and routines for writers specific to their work: -+ - Parsing a resume= location; -+ - Determining whether an image exists; -+ - Marking a resume as having been attempted; -+ - Invalidating an image; -+ -+ Since some parts of the core - the user interface and storage manager -+ support - have use for some of these functions, they are registered as -+ 'miscellaneous' modules as well. -+ -+ d) Sysfs data structures. -+ -+ This brings us naturally to support for configuring TuxOnIce. We desired to -+ provide a way to make TuxOnIce as flexible and configurable as possible. -+ The user shouldn't have to reboot just because they want to now hibernate to -+ a file instead of a partition, for example. -+ -+ To accomplish this, TuxOnIce implements a very generic means whereby the -+ core and modules can register new sysfs entries. All TuxOnIce entries use -+ a single _store and _show routine, both of which are found in -+ tuxonice_sysfs.c in the kernel/power directory. These routines handle the -+ most common operations - getting and setting the values of bits, integers, -+ longs, unsigned longs and strings in one place, and allow overrides for -+ customised get and set options as well as side-effect routines for all -+ reads and writes. -+ -+ When combined with some simple macros, a new sysfs entry can then be defined -+ in just a couple of lines: -+ -+ SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1, -+ 2048, 0, NULL), -+ -+ This defines a sysfs entry named "progress_granularity" which is rw and -+ allows the user to access an integer stored at &progress_granularity, giving -+ it a value between 1 and 2048 inclusive. -+ -+ Sysfs entries are registered under /sys/power/tuxonice, and entries for -+ modules are located in a subdirectory named after the module. -+ -diff --git a/Documentation/power/tuxonice.txt b/Documentation/power/tuxonice.txt -new file mode 100644 -index 0000000..3bf0575 ---- /dev/null -+++ b/Documentation/power/tuxonice.txt -@@ -0,0 +1,948 @@ -+ --- TuxOnIce, version 3.0 --- -+ -+1. What is it? -+2. Why would you want it? -+3. What do you need to use it? -+4. Why not just use the version already in the kernel? -+5. How do you use it? -+6. What do all those entries in /sys/power/tuxonice do? -+7. How do you get support? -+8. I think I've found a bug. What should I do? -+9. When will XXX be supported? -+10 How does it work? -+11. Who wrote TuxOnIce? -+ -+1. What is it? -+ -+ Imagine you're sitting at your computer, working away. For some reason, you -+ need to turn off your computer for a while - perhaps it's time to go home -+ for the day. When you come back to your computer next, you're going to want -+ to carry on where you left off. Now imagine that you could push a button and -+ have your computer store the contents of its memory to disk and power down. -+ Then, when you next start up your computer, it loads that image back into -+ memory and you can carry on from where you were, just as if you'd never -+ turned the computer off. You have far less time to start up, no reopening of -+ applications or finding what directory you put that file in yesterday. -+ That's what TuxOnIce does. -+ -+ TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who, -+ with some help from Pavel Machek, got an early version going in 1999. The -+ project was then taken over by Florent Chabaud while still in alpha version -+ numbers. Nigel Cunningham came on the scene when Florent was unable to -+ continue, moving the project into betas, then 1.0, 2.0 and so on up to -+ the present series. During the 2.0 series, the name was contracted to -+ Suspend2 and the website suspend2.net created. Beginning around July 2007, -+ a transition to calling the software TuxOnIce was made, to seek to help -+ make it clear that TuxOnIce is more concerned with hibernation than suspend -+ to ram. -+ -+ Pavel Machek's swsusp code, which was merged around 2.5.17 retains the -+ original name, and was essentially a fork of the beta code until Rafael -+ Wysocki came on the scene in 2005 and began to improve it further. -+ -+2. Why would you want it? -+ -+ Why wouldn't you want it? -+ -+ Being able to save the state of your system and quickly restore it improves -+ your productivity - you get a useful system in far less time than through -+ the normal boot process. You also get to be completely 'green', using zero -+ power, or as close to that as possible (the computer may still provide -+ minimal power to some devices, so they can initiate a power on, but that -+ will be the same amount of power as would be used if you told the computer -+ to shutdown. -+ -+3. What do you need to use it? -+ -+ a. Kernel Support. -+ -+ i) The TuxOnIce patch. -+ -+ TuxOnIce is part of the Linux Kernel. This version is not part of Linus's -+ 2.6 tree at the moment, so you will need to download the kernel source and -+ apply the latest patch. Having done that, enable the appropriate options in -+ make [menu|x]config (under Power Management Options - look for "Enhanced -+ Hibernation"), compile and install your kernel. TuxOnIce works with SMP, -+ Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64. -+ -+ TuxOnIce patches are available from http://tuxonice.net. -+ -+ ii) Compression support. -+ -+ Compression support is implemented via the cryptoapi. You will therefore want -+ to select any Cryptoapi transforms that you want to use on your image from -+ the Cryptoapi menu while configuring your kernel. We recommend the use of the -+ LZO compression method - it is very fast and still achieves good compression. -+ -+ You can also tell TuxOnIce to write its image to an encrypted and/or -+ compressed filesystem/swap partition. In that case, you don't need to do -+ anything special for TuxOnIce when it comes to kernel configuration. -+ -+ iii) Configuring other options. -+ -+ While you're configuring your kernel, try to configure as much as possible -+ to build as modules. We recommend this because there are a number of drivers -+ that are still in the process of implementing proper power management -+ support. In those cases, the best way to work around their current lack is -+ to build them as modules and remove the modules while hibernating. You might -+ also bug the driver authors to get their support up to speed, or even help! -+ -+ b. Storage. -+ -+ i) Swap. -+ -+ TuxOnIce can store the hibernation image in your swap partition, a swap file or -+ a combination thereof. Whichever combination you choose, you will probably -+ want to create enough swap space to store the largest image you could have, -+ plus the space you'd normally use for swap. A good rule of thumb would be -+ to calculate the amount of swap you'd want without using TuxOnIce, and then -+ add the amount of memory you have. This swapspace can be arranged in any way -+ you'd like. It can be in one partition or file, or spread over a number. The -+ only requirement is that they be active when you start a hibernation cycle. -+ -+ There is one exception to this requirement. TuxOnIce has the ability to turn -+ on one swap file or partition at the start of hibernating and turn it back off -+ at the end. If you want to ensure you have enough memory to store a image -+ when your memory is fully used, you might want to make one swap partition or -+ file for 'normal' use, and another for TuxOnIce to activate & deactivate -+ automatically. (Further details below). -+ -+ ii) Normal files. -+ -+ TuxOnIce includes a 'file allocator'. The file allocator can store your -+ image in a simple file. Since Linux has the concept of everything being a -+ file, this is more powerful than it initially sounds. If, for example, you -+ were to set up a network block device file, you could hibernate to a network -+ server. This has been tested and works to a point, but nbd itself isn't -+ stateless enough for our purposes. -+ -+ Take extra care when setting up the file allocator. If you just type -+ commands without thinking and then try to hibernate, you could cause -+ irreversible corruption on your filesystems! Make sure you have backups. -+ -+ Most people will only want to hibernate to a local file. To achieve that, do -+ something along the lines of: -+ -+ echo "TuxOnIce" > /hibernation-file -+ dd if=/dev/zero bs=1M count=512 >> /hibernation-file -+ -+ This will create a 512MB file called /hibernation-file. To get TuxOnIce to use -+ it: -+ -+ echo /hibernation-file > /sys/power/tuxonice/file/target -+ -+ Then -+ -+ cat /sys/power/tuxonice/resume -+ -+ Put the results of this into your bootloader's configuration (see also step -+ C, below): -+ -+ ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE--- -+ # cat /sys/power/tuxonice/resume -+ file:/dev/hda2:0x1e001 -+ -+ In this example, we would edit the append= line of our lilo.conf|menu.lst -+ so that it included: -+ -+ resume=file:/dev/hda2:0x1e001 -+ ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE--- -+ -+ For those who are thinking 'Could I make the file sparse?', the answer is -+ 'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in -+ a sparse file while hibernating. In the longer term (post merge!), I'd like -+ to change things so that the file could be dynamically resized and have -+ holes filled as needed. Right now, however, that's not possible and not a -+ priority. -+ -+ c. Bootloader configuration. -+ -+ Using TuxOnIce also requires that you add an extra parameter to -+ your lilo.conf or equivalent. Here's an example for a swap partition: -+ -+ append="resume=swap:/dev/hda1" -+ -+ This would tell TuxOnIce that /dev/hda1 is a swap partition you -+ have. TuxOnIce will use the swap signature of this partition as a -+ pointer to your data when you hibernate. This means that (in this example) -+ /dev/hda1 doesn't need to be _the_ swap partition where all of your data -+ is actually stored. It just needs to be a swap partition that has a -+ valid signature. -+ -+ You don't need to have a swap partition for this purpose. TuxOnIce -+ can also use a swap file, but usage is a little more complex. Having made -+ your swap file, turn it on and do -+ -+ cat /sys/power/tuxonice/swap/headerlocations -+ -+ (this assumes you've already compiled your kernel with TuxOnIce -+ support and booted it). The results of the cat command will tell you -+ what you need to put in lilo.conf: -+ -+ For swap partitions like /dev/hda1, simply use resume=/dev/hda1. -+ For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d. -+ -+ If the swapfile changes for any reason (it is moved to a different -+ location, it is deleted and recreated, or the filesystem is -+ defragmented) then you will have to check -+ /sys/power/tuxonice/swap/headerlocations for a new resume_block value. -+ -+ Once you've compiled and installed the kernel and adjusted your bootloader -+ configuration, you should only need to reboot for the most basic part -+ of TuxOnIce to be ready. -+ -+ If you only compile in the swap allocator, or only compile in the file -+ allocator, you don't need to add the "swap:" part of the resume= -+ parameters above. resume=/dev/hda2:0x242d will work just as well. If you -+ have compiled both and your storage is on swap, you can also use this -+ format (the swap allocator is the default allocator). -+ -+ When compiling your kernel, one of the options in the 'Power Management -+ Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is -+ called 'Default resume partition'. This can be used to set a default value -+ for the resume= parameter. -+ -+ d. The hibernate script. -+ -+ Since the driver model in 2.6 kernels is still being developed, you may need -+ to do more than just configure TuxOnIce. Users of TuxOnIce usually start the -+ process via a script which prepares for the hibernation cycle, tells the -+ kernel to do its stuff and then restore things afterwards. This script might -+ involve: -+ -+ - Switching to a text console and back if X doesn't like the video card -+ status on resume. -+ - Un/reloading drivers that don't play well with hibernation. -+ -+ Note that you might not be able to unload some drivers if there are -+ processes using them. You might have to kill off processes that hold -+ devices open. Hint: if your X server accesses an USB mouse, doing a -+ 'chvt' to a text console releases the device and you can unload the -+ module. -+ -+ Check out the latest script (available on tuxonice.net). -+ -+ e. The userspace user interface. -+ -+ TuxOnIce has very limited support for displaying status if you only apply -+ the kernel patch - it can printk messages, but that is all. In addition, -+ some of the functions mentioned in this document (such as cancelling a cycle -+ or performing interactive debugging) are unavailable. To utilise these -+ functions, or simply get a nice display, you need the 'userui' component. -+ Userui comes in three flavours, usplash, fbsplash and text. Text should -+ work on any console. Usplash and fbsplash require the appropriate -+ (distro specific?) support. -+ -+ To utilise a userui, TuxOnIce just needs to be told where to find the -+ userspace binary: -+ -+ echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program -+ -+ The hibernate script can do this for you, and a default value for this -+ setting can be configured when compiling the kernel. This path is also -+ stored in the image header, so if you have an initrd or initramfs, you can -+ use the userui during the first part of resuming (prior to the atomic -+ restore) by putting the binary in the same path in your initrd/ramfs. -+ Alternatively, you can put it in a different location and do an echo -+ similar to the above prior to the echo > do_resume. The value saved in the -+ image header will then be ignored. -+ -+4. Why not just use the version already in the kernel? -+ -+ The version in the vanilla kernel has a number of drawbacks. The most -+ serious of these are: -+ - it has a maximum image size of 1/2 total memory; -+ - it doesn't allocate storage until after it has snapshotted memory. -+ This means that you can't be sure hibernating will work until you -+ see it start to write the image; -+ - it does not allow you to press escape to cancel a cycle; -+ - it does not allow you to press escape to cancel resuming; -+ - it does not allow you to automatically swapon a file when -+ starting a cycle; -+ - it does not allow you to use multiple swap partitions or files; -+ - it does not allow you to use ordinary files; -+ - it just invalidates an image and continues to boot if you -+ accidentally boot the wrong kernel after hibernating; -+ - it doesn't support any sort of nice display while hibernating; -+ - it is moving toward requiring that you have an initrd/initramfs -+ to ever have a hope of resuming (uswsusp). While uswsusp will -+ address some of the concerns above, it won't address all of them, -+ and will be more complicated to get set up; -+ - it doesn't have support for suspend-to-both (write a hibernation -+ image, then suspend to ram; I think this is known as ReadySafe -+ under M$). -+ -+5. How do you use it? -+ -+ A hibernation cycle can be started directly by doing: -+ -+ echo > /sys/power/tuxonice/do_hibernate -+ -+ In practice, though, you'll probably want to use the hibernate script -+ to unload modules, configure the kernel the way you like it and so on. -+ In that case, you'd do (as root): -+ -+ hibernate -+ -+ See the hibernate script's man page for more details on the options it -+ takes. -+ -+ If you're using the text or splash user interface modules, one feature of -+ TuxOnIce that you might find useful is that you can press Escape at any time -+ during hibernating, and the process will be aborted. -+ -+ Due to the way hibernation works, this means you'll have your system back and -+ perfectly usable almost instantly. The only exception is when it's at the -+ very end of writing the image. Then it will need to reload a small (usually -+ 4-50MBs, depending upon the image characteristics) portion first. -+ -+ Likewise, when resuming, you can press escape and resuming will be aborted. -+ The computer will then powerdown again according to settings at that time for -+ the powerdown method or rebooting. -+ -+ You can change the settings for powering down while the image is being -+ written by pressing 'R' to toggle rebooting and 'O' to toggle between -+ suspending to ram and powering down completely). -+ -+ If you run into problems with resuming, adding the "noresume" option to -+ the kernel command line will let you skip the resume step and recover your -+ system. This option shouldn't normally be needed, because TuxOnIce modifies -+ the image header prior to the atomic restore, and will thus prompt you -+ if it detects that you've tried to resume an image before (this flag is -+ removed if you press Escape to cancel a resume, so you won't be prompted -+ then). -+ -+ Recent kernels (2.6.24 onwards) add support for resuming from a different -+ kernel to the one that was hibernated (thanks to Rafael for his work on -+ this - I've just embraced and enhanced the support for TuxOnIce). This -+ should further reduce the need for you to use the noresume option. -+ -+6. What do all those entries in /sys/power/tuxonice do? -+ -+ /sys/power/tuxonice is the directory which contains files you can use to -+ tune and configure TuxOnIce to your liking. The exact contents of -+ the directory will depend upon the version of TuxOnIce you're -+ running and the options you selected at compile time. In the following -+ descriptions, names in brackets refer to compile time options. -+ (Note that they're all dependant upon you having selected CONFIG_TUXONICE -+ in the first place!). -+ -+ Since the values of these settings can open potential security risks, the -+ writeable ones are accessible only to the root user. You may want to -+ configure sudo to allow you to invoke your hibernate script as an ordinary -+ user. -+ -+ - alloc/failure_test -+ -+ This debugging option provides a way of testing TuxOnIce's handling of -+ memory allocation failures. Each allocation type that TuxOnIce makes has -+ been given a unique number (see the source code). Echo the appropriate -+ number into this entry, and when TuxOnIce attempts to do that allocation, -+ it will pretend there was a failure and act accordingly. -+ -+ - alloc/find_max_mem_allocated -+ -+ This debugging option will cause TuxOnIce to find the maximum amount of -+ memory it used during a cycle, and report that information in debugging -+ information at the end of the cycle. -+ -+ - alt_resume_param -+ -+ Instead of powering down after writing a hibernation image, TuxOnIce -+ supports resuming from a different image. This entry lets you set the -+ location of the signature for that image (the resume= value you'd use -+ for it). Using an alternate image and keep_image mode, you can do things -+ like using an alternate image to power down an uninterruptible power -+ supply. -+ -+ - block_io/target_outstanding_io -+ -+ This value controls the amount of memory that the block I/O code says it -+ needs when the core code is calculating how much memory is needed for -+ hibernating and for resuming. It doesn't directly control the amount of -+ I/O that is submitted at any one time - that depends on the amount of -+ available memory (we may have more available than we asked for), the -+ throughput that is being achieved and the ability of the CPU to keep up -+ with disk throughput (particularly where we're compressing pages). -+ -+ - checksum/enabled -+ -+ Use cryptoapi hashing routines to verify that Pageset2 pages don't change -+ while we're saving the first part of the image, and to get any pages that -+ do change resaved in the atomic copy. This should normally not be needed, -+ but if you're seeing issues, please enable this. If your issues stop you -+ being able to resume, enable this option, hibernate and cancel the cycle -+ after the atomic copy is done. If the debugging info shows a non-zero -+ number of pages resaved, please report this to Nigel. -+ -+ - compression/algorithm -+ -+ Set the cryptoapi algorithm used for compressing the image. -+ -+ - compression/expected_compression -+ -+ These values allow you to set an expected compression ratio, which TuxOnice -+ will use in calculating whether it meets constraints on the image size. If -+ this expected compression ratio is not attained, the hibernation cycle will -+ abort, so it is wise to allow some spare. You can see what compression -+ ratio is achieved in the logs after hibernating. -+ -+ - debug_info: -+ -+ This file returns information about your configuration that may be helpful -+ in diagnosing problems with hibernating. -+ -+ - did_suspend_to_both: -+ -+ This file can be used when you hibernate with powerdown method 3 (ie suspend -+ to ram after writing the image). There can be two outcomes in this case. We -+ can resume from the suspend-to-ram before the battery runs out, or we can run -+ out of juice and and up resuming like normal. This entry lets you find out, -+ post resume, which way we went. If the value is 1, we resumed from suspend -+ to ram. This can be useful when actions need to be run post suspend-to-ram -+ that don't need to be run if we did the normal resume from power off. -+ -+ - do_hibernate: -+ -+ When anything is written to this file, the kernel side of TuxOnIce will -+ begin to attempt to write an image to disk and power down. You'll normally -+ want to run the hibernate script instead, to get modules unloaded first. -+ -+ - do_resume: -+ -+ When anything is written to this file TuxOnIce will attempt to read and -+ restore an image. If there is no image, it will return almost immediately. -+ If an image exists, the echo > will never return. Instead, the original -+ kernel context will be restored and the original echo > do_hibernate will -+ return. -+ -+ - */enabled -+ -+ These option can be used to temporarily disable various parts of TuxOnIce. -+ -+ - extra_pages_allowance -+ -+ When TuxOnIce does its atomic copy, it calls the driver model suspend -+ and resume methods. If you have DRI enabled with a driver such as fglrx, -+ this can result in the driver allocating a substantial amount of memory -+ for storing its state. Extra_pages_allowance tells TuxOnIce how much -+ extra memory it should ensure is available for those allocations. If -+ your attempts at hibernating end with a message in dmesg indicating that -+ insufficient extra pages were allowed, you need to increase this value. -+ -+ - file/target: -+ -+ Read this value to get the current setting. Write to it to point TuxOnice -+ at a new storage location for the file allocator. See section 3.b.ii above -+ for details of how to set up the file allocator. -+ -+ - freezer_test -+ -+ This entry can be used to get TuxOnIce to just test the freezer and prepare -+ an image without actually doing a hibernation cycle. It is useful for -+ diagnosing freezing and image preparation issues. -+ -+ - full_pageset2 -+ -+ TuxOnIce divides the pages that are stored in an image into two sets. The -+ difference between the two sets is that pages in pageset 1 are atomically -+ copied, and pages in pageset 2 are written to disk without being copied -+ first. A page CAN be written to disk without being copied first if and only -+ if its contents will not be modified or used at any time after userspace -+ processes are frozen. A page MUST be in pageset 1 if its contents are -+ modified or used at any time after userspace processes have been frozen. -+ -+ Normally (ie if this option is enabled), TuxOnIce will put all pages on the -+ per-zone LRUs in pageset2, then remove those pages used by any userspace -+ user interface helper and TuxOnIce storage manager that are running, -+ together with pages used by the GEM memory manager introduced around 2.6.28 -+ kernels. -+ -+ If this option is disabled, a much more conservative approach will be taken. -+ The only pages in pageset2 will be those belonging to userspace processes, -+ with the exclusion of those belonging to the TuxOnIce userspace helpers -+ mentioned above. This will result in a much smaller pageset2, and will -+ therefore result in smaller images than are possible with this option -+ enabled. -+ -+ - ignore_rootfs -+ -+ TuxOnIce records which device is mounted as the root filesystem when -+ writing the hibernation image. It will normally check at resume time that -+ this device isn't already mounted - that would be a cause of filesystem -+ corruption. In some particular cases (RAM based root filesystems), you -+ might want to disable this check. This option allows you to do that. -+ -+ - image_exists: -+ -+ Can be used in a script to determine whether a valid image exists at the -+ location currently pointed to by resume=. Returns up to three lines. -+ The first is whether an image exists (-1 for unsure, otherwise 0 or 1). -+ If an image eixsts, additional lines will return the machine and version. -+ Echoing anything to this entry removes any current image. -+ -+ - image_size_limit: -+ -+ The maximum size of hibernation image written to disk, measured in megabytes -+ (1024*1024). -+ -+ - last_result: -+ -+ The result of the last hibernation cycle, as defined in -+ include/linux/suspend-debug.h with the values SUSPEND_ABORTED to -+ SUSPEND_KEPT_IMAGE. This is a bitmask. -+ -+ - late_cpu_hotplug: -+ -+ This sysfs entry controls whether cpu hotplugging is done - as normal - just -+ before (unplug) and after (replug) the atomic copy/restore (so that all -+ CPUs/cores are available for multithreaded I/O). The alternative is to -+ unplug all secondary CPUs/cores at the start of hibernating/resuming, and -+ replug them at the end of resuming. No multithreaded I/O will be possible in -+ this configuration, but the odd machine has been reported to require it. -+ -+ - lid_file: -+ -+ This determines which ACPI button file we look in to determine whether the -+ lid is open or closed after resuming from suspend to disk or power off. -+ If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state -+ and check its contents at the appropriate moment. See post_wake_state below -+ for more details on how this entry is used. -+ -+ - log_everything (CONFIG_PM_DEBUG): -+ -+ Setting this option results in all messages printed being logged. Normally, -+ only a subset are logged, so as to not slow the process and not clutter the -+ logs. Useful for debugging. It can be toggled during a cycle by pressing -+ 'L'. -+ -+ - no_load_direct: -+ -+ This is a debugging option. If, when loading the atomically copied pages of -+ an image, TuxOnIce finds that the destination address for a page is free, -+ it will normally allocate the image, load the data directly into that -+ address and skip it in the atomic restore. If this option is disabled, the -+ page will be loaded somewhere else and atomically restored like other pages. -+ -+ - no_flusher_thread: -+ -+ When doing multithreaded I/O (see below), the first online CPU can be used -+ to _just_ submit compressed pages when writing the image, rather than -+ compressing and submitting data. This option is normally disabled, but has -+ been included because Nigel would like to see whether it will be more useful -+ as the number of cores/cpus in computers increases. -+ -+ - no_multithreaded_io: -+ -+ TuxOnIce will normally create one thread per cpu/core on your computer, -+ each of which will then perform I/O. This will generally result in -+ throughput that's the maximum the storage medium can handle. There -+ shouldn't be any reason to disable multithreaded I/O now, but this option -+ has been retained for debugging purposes. -+ -+ - no_pageset2 -+ -+ See the entry for full_pageset2 above for an explanation of pagesets. -+ Enabling this option causes TuxOnIce to do an atomic copy of all pages, -+ thereby limiting the maximum image size to 1/2 of memory, as swsusp does. -+ -+ - no_pageset2_if_unneeded -+ -+ See the entry for full_pageset2 above for an explanation of pagesets. -+ Enabling this option causes TuxOnIce to act like no_pageset2 was enabled -+ if and only it isn't needed anyway. This option may still make TuxOnIce -+ less reliable because pageset2 pages are normally used to store the -+ atomic copy - drivers that want to do allocations of larger amounts of -+ memory in one shot will be more likely to find that those amounts aren't -+ available if this option is enabled. -+ -+ - pause_between_steps (CONFIG_PM_DEBUG): -+ -+ This option is used during debugging, to make TuxOnIce pause between -+ each step of the process. It is ignored when the nice display is on. -+ -+ - post_wake_state: -+ -+ TuxOnIce provides support for automatically waking after a user-selected -+ delay, and using a different powerdown method if the lid is still closed. -+ (Yes, we're assuming a laptop). This entry lets you choose what state -+ should be entered next. The values are those described under -+ powerdown_method, below. It can be used to suspend to RAM after hibernating, -+ then powerdown properly (say) 20 minutes. It can also be used to power down -+ properly, then wake at (say) 6.30am and suspend to RAM until you're ready -+ to use the machine. -+ -+ - powerdown_method: -+ -+ Used to select a method by which TuxOnIce should powerdown after writing the -+ image. Currently: -+ -+ 0: Don't use ACPI to power off. -+ 3: Attempt to enter Suspend-to-ram. -+ 4: Attempt to enter ACPI S4 mode. -+ 5: Attempt to power down via ACPI S5 mode. -+ -+ Note that these options are highly dependant upon your hardware & software: -+ -+ 3: When succesful, your machine suspends to ram instead of powering off. -+ The advantage of using this mode is that it doesn't matter whether your -+ battery has enough charge to make it through to your next resume. If it -+ lasts, you will simply resume from suspend to ram (and the image on disk -+ will be discarded). If the battery runs out, you will resume from disk -+ instead. The disadvantage is that it takes longer than a normal -+ suspend-to-ram to enter the state, since the suspend-to-disk image needs -+ to be written first. -+ 4/5: When successful, your machine will be off and comsume (almost) no power. -+ But it might still react to some external events like opening the lid or -+ trafic on a network or usb device. For the bios, resume is then the same -+ as warm boot, similar to a situation where you used the command `reboot' -+ to reboot your machine. If your machine has problems on warm boot or if -+ you want to protect your machine with the bios password, this is probably -+ not the right choice. Mode 4 may be necessary on some machines where ACPI -+ wake up methods need to be run to properly reinitialise hardware after a -+ hibernation cycle. -+ 0: Switch the machine completely off. The only possible wakeup is the power -+ button. For the bios, resume is then the same as a cold boot, in -+ particular you would have to provide your bios boot password if your -+ machine uses that feature for booting. -+ -+ - progressbar_granularity_limit: -+ -+ This option can be used to limit the granularity of the progress bar -+ displayed with a bootsplash screen. The value is the maximum number of -+ steps. That is, 10 will make the progress bar jump in 10% increments. -+ -+ - reboot: -+ -+ This option causes TuxOnIce to reboot rather than powering down -+ at the end of saving an image. It can be toggled during a cycle by pressing -+ 'R'. -+ -+ - resume: -+ -+ This sysfs entry can be used to read and set the location in which TuxOnIce -+ will look for the signature of an image - the value set using resume= at -+ boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By -+ writing to this file as well as modifying your bootloader's configuration -+ file (eg menu.lst), you can set or reset the location of your image or the -+ method of storing the image without rebooting. -+ -+ - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP): -+ -+ This option makes -+ -+ echo disk > /sys/power/state -+ -+ activate TuxOnIce instead of swsusp. Regardless of whether this option is -+ enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce -+ to check for an image too. This is due to the fact that at resume time, we -+ can't know whether this option was enabled until we see if an image is there -+ for us to resume from. (And when an image exists, we don't care whether we -+ did replace swsusp anyway - we just want to resume). -+ -+ - resume_commandline: -+ -+ This entry can be read after resuming to see the commandline that was used -+ when resuming began. You might use this to set up two bootloader entries -+ that are the same apart from the fact that one includes a extra append= -+ argument "at_work=1". You could then grep resume_commandline in your -+ post-resume scripts and configure networking (for example) differently -+ depending upon whether you're at home or work. resume_commandline can be -+ set to arbitrary text if you wish to remove sensitive contents. -+ -+ - swap/swapfilename: -+ -+ This entry is used to specify the swapfile or partition that -+ TuxOnIce will attempt to swapon/swapoff automatically. Thus, if -+ I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically -+ for my hibernation image, I would -+ -+ echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile -+ -+ /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the -+ swapon and swapoff occur while other processes are frozen (including kswapd) -+ so this swap file will not be used up when attempting to free memory. The -+ parition/file is also given the highest priority, so other swapfiles/partitions -+ will only be used to save the image when this one is filled. -+ -+ The value of this file is used by headerlocations along with any currently -+ activated swapfiles/partitions. -+ -+ - swap/headerlocations: -+ -+ This option tells you the resume= options to use for swap devices you -+ currently have activated. It is particularly useful when you only want to -+ use a swap file to store your image. See above for further details. -+ -+ - test_bio -+ -+ This is a debugging option. When enabled, TuxOnIce will not hibernate. -+ Instead, when asked to write an image, it will skip the atomic copy, -+ just doing the writing of the image and then returning control to the -+ user at the point where it would have powered off. This is useful for -+ testing throughput in different configurations. -+ -+ - test_filter_speed -+ -+ This is a debugging option. When enabled, TuxOnIce will not hibernate. -+ Instead, when asked to write an image, it will not write anything or do -+ an atomic copy, but will only run any enabled compression algorithm on the -+ data that would have been written (the source pages of the atomic copy in -+ the case of pageset 1). This is useful for comparing the performance of -+ compression algorithms and for determining the extent to which an upgrade -+ to your storage method would improve hibernation speed. -+ -+ - user_interface/debug_sections (CONFIG_PM_DEBUG): -+ -+ This value, together with the console log level, controls what debugging -+ information is displayed. The console log level determines the level of -+ detail, and this value determines what detail is displayed. This value is -+ a bit vector, and the meaning of the bits can be found in the kernel tree -+ in include/linux/tuxonice.h. It can be overridden using the kernel's -+ command line option suspend_dbg. -+ -+ - user_interface/default_console_level (CONFIG_PM_DEBUG): -+ -+ This determines the value of the console log level at the start of a -+ hibernation cycle. If debugging is compiled in, the console log level can be -+ changed during a cycle by pressing the digit keys. Meanings are: -+ -+ 0: Nice display. -+ 1: Nice display plus numerical progress. -+ 2: Errors only. -+ 3: Low level debugging info. -+ 4: Medium level debugging info. -+ 5: High level debugging info. -+ 6: Verbose debugging info. -+ -+ - user_interface/enable_escape: -+ -+ Setting this to "1" will enable you abort a hibernation cycle or resuming by -+ pressing escape, "0" (default) disables this feature. Note that enabling -+ this option means that you cannot initiate a hibernation cycle and then walk -+ away from your computer, expecting it to be secure. With feature disabled, -+ you can validly have this expectation once TuxOnice begins to write the -+ image to disk. (Prior to this point, it is possible that TuxOnice might -+ about because of failure to freeze all processes or because constraints -+ on its ability to save the image are not met). -+ -+ - user_interface/program -+ -+ This entry is used to tell TuxOnice what userspace program to use for -+ providing a user interface while hibernating. The program uses a netlink -+ socket to pass messages back and forward to the kernel, allowing all of the -+ functions formerly implemented in the kernel user interface components. -+ -+ - version: -+ -+ The version of TuxOnIce you have compiled into the currently running kernel. -+ -+ - wake_alarm_dir: -+ -+ As mentioned above (post_wake_state), TuxOnIce supports automatically waking -+ after some delay. This entry allows you to select which wake alarm to use. -+ It should contain the value "rtc0" if you're wanting to use -+ /sys/class/rtc/rtc0. -+ -+ - wake_delay: -+ -+ This value determines the delay from the end of writing the image until the -+ wake alarm is triggered. You can set an absolute time by writing the desired -+ time into /sys/class/rtc//wakealarm and leaving these values -+ empty. -+ -+ Note that for the wakeup to actually occur, you may need to modify entries -+ in /proc/acpi/wakeup. This is done by echoing the name of the button in the -+ first column (eg PBTN) into the file. -+ -+7. How do you get support? -+ -+ Glad you asked. TuxOnIce is being actively maintained and supported -+ by Nigel (the guy doing most of the kernel coding at the moment), Bernard -+ (who maintains the hibernate script and userspace user interface components) -+ and its users. -+ -+ Resources availble include HowTos, FAQs and a Wiki, all available via -+ tuxonice.net. You can find the mailing lists there. -+ -+8. I think I've found a bug. What should I do? -+ -+ By far and a way, the most common problems people have with TuxOnIce -+ related to drivers not having adequate power management support. In this -+ case, it is not a bug with TuxOnIce, but we can still help you. As we -+ mentioned above, such issues can usually be worked around by building the -+ functionality as modules and unloading them while hibernating. Please visit -+ the Wiki for up-to-date lists of known issues and work arounds. -+ -+ If this information doesn't help, try running: -+ -+ hibernate --bug-report -+ -+ ..and sending the output to the users mailing list. -+ -+ Good information on how to provide us with useful information from an -+ oops is found in the file REPORTING-BUGS, in the top level directory -+ of the kernel tree. If you get an oops, please especially note the -+ information about running what is printed on the screen through ksymoops. -+ The raw information is useless. -+ -+9. When will XXX be supported? -+ -+ If there's a feature missing from TuxOnIce that you'd like, feel free to -+ ask. We try to be obliging, within reason. -+ -+ Patches are welcome. Please send to the list. -+ -+10. How does it work? -+ -+ TuxOnIce does its work in a number of steps. -+ -+ a. Freezing system activity. -+ -+ The first main stage in hibernating is to stop all other activity. This is -+ achieved in stages. Processes are considered in fours groups, which we will -+ describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE -+ flag, kernel threads without this flag, userspace processes with the -+ PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are -+ untouched by the refrigerator code. They are allowed to run during hibernating -+ and resuming, and are used to support user interaction, storage access or the -+ like. Other kernel threads (those unneeded while hibernating) are frozen last. -+ This leaves us with userspace processes that need to be frozen. When a -+ process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on -+ that process for the duration of that call. Processes that have this flag are -+ frozen after processes without it, so that we can seek to ensure that dirty -+ data is synced to disk as quickly as possible in a situation where other -+ processes may be submitting writes at the same time. Freezing the processes -+ that are submitting data stops new I/O from being submitted. Syncthreads can -+ then cleanly finish their work. So the order is: -+ -+ - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE; -+ - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE); -+ - Kernel processes without PF_NOFREEZE. -+ -+ b. Eating memory. -+ -+ For a successful hibernation cycle, you need to have enough disk space to store the -+ image and enough memory for the various limitations of TuxOnIce's -+ algorithm. You can also specify a maximum image size. In order to attain -+ to those constraints, TuxOnIce may 'eat' memory. If, after freezing -+ processes, the constraints aren't met, TuxOnIce will thaw all the -+ other processes and begin to eat memory until its calculations indicate -+ the constraints are met. It will then freeze processes again and recheck -+ its calculations. -+ -+ c. Allocation of storage. -+ -+ Next, TuxOnIce allocates the storage that will be used to save -+ the image. -+ -+ The core of TuxOnIce knows nothing about how or where pages are stored. We -+ therefore request the active allocator (remember you might have compiled in -+ more than one!) to allocate enough storage for our expect image size. If -+ this request cannot be fulfilled, we eat more memory and try again. If it -+ is fulfiled, we seek to allocate additional storage, just in case our -+ expected compression ratio (if any) isn't achieved. This time, however, we -+ just continue if we can't allocate enough storage. -+ -+ If these calls to our allocator change the characteristics of the image -+ such that we haven't allocated enough memory, we also loop. (The allocator -+ may well need to allocate space for its storage information). -+ -+ d. Write the first part of the image. -+ -+ TuxOnIce stores the image in two sets of pages called 'pagesets'. -+ Pageset 2 contains pages on the active and inactive lists; essentially -+ the page cache. Pageset 1 contains all other pages, including the kernel. -+ We use two pagesets for one important reason: We need to make an atomic copy -+ of the kernel to ensure consistency of the image. Without a second pageset, -+ that would limit us to an image that was at most half the amount of memory -+ available. Using two pagesets allows us to store a full image. Since pageset -+ 2 pages won't be needed in saving pageset 1, we first save pageset 2 pages. -+ We can then make our atomic copy of the remaining pages using both pageset 2 -+ pages and any other pages that are free. While saving both pagesets, we are -+ careful not to corrupt the image. Among other things, we use lowlevel block -+ I/O routines that don't change the pagecache contents. -+ -+ The next step, then, is writing pageset 2. -+ -+ e. Suspending drivers and storing processor context. -+ -+ Having written pageset2, TuxOnIce calls the power management functions to -+ notify drivers of the hibernation, and saves the processor state in preparation -+ for the atomic copy of memory we are about to make. -+ -+ f. Atomic copy. -+ -+ At this stage, everything else but the TuxOnIce code is halted. Processes -+ are frozen or idling, drivers are quiesced and have stored (ideally and where -+ necessary) their configuration in memory we are about to atomically copy. -+ In our lowlevel architecture specific code, we have saved the CPU state. -+ We can therefore now do our atomic copy before resuming drivers etc. -+ -+ g. Save the atomic copy (pageset 1). -+ -+ TuxOnice can then write the atomic copy of the remaining pages. Since we -+ have copied the pages into other locations, we can continue to use the -+ normal block I/O routines without fear of corruption our image. -+ -+ f. Save the image header. -+ -+ Nearly there! We save our settings and other parameters needed for -+ reloading pageset 1 in an 'image header'. We also tell our allocator to -+ serialise its data at this stage, so that it can reread the image at resume -+ time. -+ -+ g. Set the image header. -+ -+ Finally, we edit the header at our resume= location. The signature is -+ changed by the allocator to reflect the fact that an image exists, and to -+ point to the start of that data if necessary (swap allocator). -+ -+ h. Power down. -+ -+ Or reboot if we're debugging and the appropriate option is selected. -+ -+ Whew! -+ -+ Reloading the image. -+ -------------------- -+ -+ Reloading the image is essentially the reverse of all the above. We load -+ our copy of pageset 1, being careful to choose locations that aren't going -+ to be overwritten as we copy it back (We start very early in the boot -+ process, so there are no other processes to quiesce here). We then copy -+ pageset 1 back to its original location in memory and restore the process -+ context. We are now running with the original kernel. Next, we reload the -+ pageset 2 pages, free the memory and swap used by TuxOnIce, restore -+ the pageset header and restart processes. Sounds easy in comparison to -+ hibernating, doesn't it! -+ -+ There is of course more to TuxOnIce than this, but this explanation -+ should be a good start. If there's interest, I'll write further -+ documentation on range pages and the low level I/O. -+ -+11. Who wrote TuxOnIce? -+ -+ (Answer based on the writings of Florent Chabaud, credits in files and -+ Nigel's limited knowledge; apologies to anyone missed out!) -+ -+ The main developers of TuxOnIce have been... -+ -+ Gabor Kuti -+ Pavel Machek -+ Florent Chabaud -+ Bernard Blackham -+ Nigel Cunningham -+ -+ Significant portions of swsusp, the code in the vanilla kernel which -+ TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should -+ also be expressed to him. -+ -+ The above mentioned developers have been aided in their efforts by a host -+ of hundreds, if not thousands of testers and people who have submitted bug -+ fixes & suggestions. Of special note are the efforts of Michael Frank, who -+ had his computers repetitively hibernate and resume for literally tens of -+ thousands of cycles and developed scripts to stress the system and test -+ TuxOnIce far beyond the point most of us (Nigel included!) would consider -+ testing. His efforts have contributed as much to TuxOnIce as any of the -+ names above. -diff --git a/MAINTAINERS b/MAINTAINERS -index 6c484ac..cafc523 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -9124,6 +9124,13 @@ S: Maintained - F: drivers/tc/ - F: include/linux/tc.h - -+TUXONICE (ENHANCED HIBERNATION) -+P: Nigel Cunningham -+M: nigel@tuxonice.net -+L: tuxonice-devel@tuxonice.net -+W: http://tuxonice.net -+S: Maintained -+ - U14-34F SCSI DRIVER - M: Dario Ballabio - L: linux-scsi@vger.kernel.org -diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c -index 343a87f..2df8093 100644 ---- a/arch/powerpc/mm/pgtable_32.c -+++ b/arch/powerpc/mm/pgtable_32.c -@@ -437,6 +437,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) - - change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); - } -+EXPORT_SYMBOL_GPL(kernel_map_pages); - #endif /* CONFIG_DEBUG_PAGEALLOC */ - - static int fixmaps; -diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c -index 4b4c081..5667da2 100644 ---- a/arch/powerpc/platforms/83xx/suspend.c -+++ b/arch/powerpc/platforms/83xx/suspend.c -@@ -264,6 +264,8 @@ static int mpc83xx_suspend_begin(suspend_state_t state) - - static int agent_thread_fn(void *data) - { -+ set_freezable(); -+ - while (1) { - wait_event_interruptible(agent_wq, pci_pm_state >= 2); - try_to_freeze(); -diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c -index 3f175e8..b5d59c6 100644 ---- a/arch/powerpc/platforms/ps3/device-init.c -+++ b/arch/powerpc/platforms/ps3/device-init.c -@@ -841,6 +841,8 @@ static int ps3_probe_thread(void *data) - if (res) - goto fail_free_irq; - -+ set_freezable(); -+ - /* Loop here processing the requested notification events. */ - do { - try_to_freeze(); -diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c -index ae242a7..1f6f9b7 100644 ---- a/arch/x86/mm/pageattr.c -+++ b/arch/x86/mm/pageattr.c -@@ -1829,6 +1829,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) - arch_flush_lazy_mmu_mode(); - } - -+EXPORT_SYMBOL_GPL(kernel_map_pages); -+ - #ifdef CONFIG_HIBERNATION - - bool kernel_page_present(struct page *page) -@@ -1842,7 +1844,7 @@ bool kernel_page_present(struct page *page) - pte = lookup_address((unsigned long)page_address(page), &level); - return (pte_val(*pte) & _PAGE_PRESENT); - } -- -+EXPORT_SYMBOL_GPL(kernel_page_present); - #endif /* CONFIG_HIBERNATION */ - - #endif /* CONFIG_DEBUG_PAGEALLOC */ -diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c -index 424f4c9..41f9004 100644 ---- a/arch/x86/power/cpu.c -+++ b/arch/x86/power/cpu.c -@@ -122,9 +122,7 @@ void save_processor_state(void) - __save_processor_state(&saved_context); - x86_platform.save_sched_clock_state(); - } --#ifdef CONFIG_X86_32 - EXPORT_SYMBOL(save_processor_state); --#endif - - static void do_fpu_end(void) - { -diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c -index 7d28c88..4f1dd95 100644 ---- a/arch/x86/power/hibernate_32.c -+++ b/arch/x86/power/hibernate_32.c -@@ -9,6 +9,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -161,6 +162,7 @@ int swsusp_arch_resume(void) - restore_image(); - return 0; - } -+EXPORT_SYMBOL_GPL(swsusp_arch_resume); - - /* - * pfn_is_nosave - check if given pfn is in the 'nosave' section -diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c -index 35e2bb6..9f2545e 100644 ---- a/arch/x86/power/hibernate_64.c -+++ b/arch/x86/power/hibernate_64.c -@@ -11,8 +11,7 @@ - #include - #include - #include -- --#include -+#include - #include - #include - #include -@@ -41,21 +40,41 @@ pgd_t *temp_level4_pgt __visible; - - void *relocated_restore_code __visible; - --static void *alloc_pgt_page(void *context) -+static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) - { -- return (void *)get_safe_page(GFP_ATOMIC); -+ long i, j; -+ -+ i = pud_index(address); -+ pud = pud + i; -+ for (; i < PTRS_PER_PUD; pud++, i++) { -+ unsigned long paddr; -+ pmd_t *pmd; -+ -+ paddr = address + i*PUD_SIZE; -+ if (paddr >= end) -+ break; -+ -+ pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); -+ if (!pmd) -+ return -ENOMEM; -+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); -+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { -+ unsigned long pe; -+ -+ if (paddr >= end) -+ break; -+ pe = __PAGE_KERNEL_LARGE_EXEC | paddr; -+ pe &= __supported_pte_mask; -+ set_pmd(pmd, __pmd(pe)); -+ } -+ } -+ return 0; - } - - static int set_up_temporary_mappings(void) - { -- struct x86_mapping_info info = { -- .alloc_pgt_page = alloc_pgt_page, -- .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, -- .kernel_mapping = true, -- }; -- unsigned long mstart, mend; -- int result; -- int i; -+ unsigned long start, end, next; -+ int error; - - temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); - if (!temp_level4_pgt) -@@ -66,17 +85,21 @@ static int set_up_temporary_mappings(void) - init_level4_pgt[pgd_index(__START_KERNEL_map)]); - - /* Set up the direct mapping from scratch */ -- for (i = 0; i < nr_pfn_mapped; i++) { -- mstart = pfn_mapped[i].start << PAGE_SHIFT; -- mend = pfn_mapped[i].end << PAGE_SHIFT; -- -- result = kernel_ident_mapping_init(&info, temp_level4_pgt, -- mstart, mend); -- -- if (result) -- return result; -+ start = (unsigned long)pfn_to_kaddr(0); -+ end = (unsigned long)pfn_to_kaddr(max_pfn); -+ -+ for (; start < end; start = next) { -+ pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); -+ if (!pud) -+ return -ENOMEM; -+ next = start + PGDIR_SIZE; -+ if (next > end) -+ next = end; -+ if ((error = res_phys_pud_init(pud, __pa(start), __pa(next)))) -+ return error; -+ set_pgd(temp_level4_pgt + pgd_index(start), -+ mk_kernel_pgd(__pa(pud))); - } -- - return 0; - } - -@@ -97,6 +120,7 @@ int swsusp_arch_resume(void) - restore_image(); - return 0; - } -+EXPORT_SYMBOL_GPL(swsusp_arch_resume); - - /* - * pfn_is_nosave - check if given pfn is in the 'nosave' section -@@ -147,3 +171,4 @@ int arch_hibernation_header_restore(void *addr) - restore_cr3 = rdr->cr3; - return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; - } -+EXPORT_SYMBOL_GPL(arch_hibernation_header_restore); -diff --git a/block/Makefile b/block/Makefile -index 20645e8..f2c091d 100644 ---- a/block/Makefile -+++ b/block/Makefile -@@ -7,7 +7,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ - blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ - blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ -- genhd.o scsi_ioctl.o partition-generic.o partitions/ -+ uuid.o genhd.o scsi_ioctl.o partition-generic.o partitions/ - - obj-$(CONFIG_BLK_DEV_BSG) += bsg.o - obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o -diff --git a/block/blk-core.c b/block/blk-core.c -index a0e3096..f29de0f 100644 ---- a/block/blk-core.c -+++ b/block/blk-core.c -@@ -47,6 +47,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); - - DEFINE_IDA(blk_queue_ida); - -+int trap_non_toi_io; -+EXPORT_SYMBOL_GPL(trap_non_toi_io); -+ - /* - * For the allocated request tables - */ -@@ -1878,6 +1881,9 @@ void submit_bio(int rw, struct bio *bio) - { - bio->bi_rw |= rw; - -+ if (unlikely(trap_non_toi_io)) -+ BUG_ON(!(bio->bi_flags & BIO_TOI)); -+ - /* - * If it's a regular read/write or a barrier with data attached, - * go through the normal accounting stuff before submission. -diff --git a/block/genhd.c b/block/genhd.c -index 791f419..97985a4 100644 ---- a/block/genhd.c -+++ b/block/genhd.c -@@ -17,6 +17,8 @@ - #include - #include - #include -+#include -+#include - #include - #include - -@@ -1375,6 +1377,87 @@ int invalidate_partition(struct gendisk *disk, int partno) - - EXPORT_SYMBOL(invalidate_partition); - -+dev_t blk_lookup_fs_info(struct fs_info *seek) -+{ -+ dev_t devt = MKDEV(0, 0); -+ struct class_dev_iter iter; -+ struct device *dev; -+ int best_score = 0; -+ -+ class_dev_iter_init(&iter, &block_class, NULL, &disk_type); -+ while (best_score < 3 && (dev = class_dev_iter_next(&iter))) { -+ struct gendisk *disk = dev_to_disk(dev); -+ struct disk_part_iter piter; -+ struct hd_struct *part; -+ -+ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); -+ -+ while (best_score < 3 && (part = disk_part_iter_next(&piter))) { -+ int score = part_matches_fs_info(part, seek); -+ if (score > best_score) { -+ devt = part_devt(part); -+ best_score = score; -+ } -+ } -+ disk_part_iter_exit(&piter); -+ } -+ class_dev_iter_exit(&iter); -+ return devt; -+} -+EXPORT_SYMBOL_GPL(blk_lookup_fs_info); -+ -+/* Caller uses NULL, key to start. For each match found, we return a bdev on -+ * which we have done blkdev_get, and we do the blkdev_put on block devices -+ * that are passed to us. When no more matches are found, we return NULL. -+ */ -+struct block_device *next_bdev_of_type(struct block_device *last, -+ const char *key) -+{ -+ dev_t devt = MKDEV(0, 0); -+ struct class_dev_iter iter; -+ struct device *dev; -+ struct block_device *next = NULL, *bdev; -+ int got_last = 0; -+ -+ if (!key) -+ goto out; -+ -+ class_dev_iter_init(&iter, &block_class, NULL, &disk_type); -+ while (!devt && (dev = class_dev_iter_next(&iter))) { -+ struct gendisk *disk = dev_to_disk(dev); -+ struct disk_part_iter piter; -+ struct hd_struct *part; -+ -+ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); -+ -+ while ((part = disk_part_iter_next(&piter))) { -+ bdev = bdget(part_devt(part)); -+ if (last && !got_last) { -+ if (last == bdev) -+ got_last = 1; -+ continue; -+ } -+ -+ if (blkdev_get(bdev, FMODE_READ, 0)) -+ continue; -+ -+ if (bdev_matches_key(bdev, key)) { -+ next = bdev; -+ break; -+ } -+ -+ blkdev_put(bdev, FMODE_READ); -+ } -+ disk_part_iter_exit(&piter); -+ } -+ class_dev_iter_exit(&iter); -+out: -+ if (last) -+ blkdev_put(last, FMODE_READ); -+ return next; -+} -+EXPORT_SYMBOL_GPL(next_bdev_of_type); -+ - /* - * Disk events - monitor disk events like media change and eject request. - */ -diff --git a/block/uuid.c b/block/uuid.c -new file mode 100644 -index 0000000..6ab3f05 ---- /dev/null -+++ b/block/uuid.c -@@ -0,0 +1,511 @@ -+#include -+#include -+#include -+#include -+#include -+ -+static int debug_enabled; -+ -+#define PRINTK(fmt, args...) do { \ -+ if (debug_enabled) \ -+ printk(KERN_DEBUG fmt, ## args); \ -+ } while(0) -+ -+#define PRINT_HEX_DUMP(v1, v2, v3, v4, v5, v6, v7, v8) \ -+ do { \ -+ if (debug_enabled) \ -+ print_hex_dump(v1, v2, v3, v4, v5, v6, v7, v8); \ -+ } while(0) -+ -+/* -+ * Simple UUID translation -+ */ -+ -+struct uuid_info { -+ const char *key; -+ const char *name; -+ long bkoff; -+ unsigned sboff; -+ unsigned sig_len; -+ const char *magic; -+ int uuid_offset; -+ int last_mount_offset; -+ int last_mount_size; -+}; -+ -+/* -+ * Based on libuuid's blkid_magic array. Note that I don't -+ * have uuid offsets for all of these yet - mssing ones are 0x0. -+ * Further information welcome. -+ * -+ * Rearranged by page of fs signature for optimisation. -+ */ -+static struct uuid_info uuid_list[] = { -+ { NULL, "oracleasm", 0, 32, 8, "ORCLDISK", 0x0, 0, 0 }, -+ { "ntfs", "ntfs", 0, 3, 8, "NTFS ", 0x0, 0, 0 }, -+ { "vfat", "vfat", 0, 0x52, 5, "MSWIN", 0x0, 0, 0 }, -+ { "vfat", "vfat", 0, 0x52, 8, "FAT32 ", 0x0, 0, 0 }, -+ { "vfat", "vfat", 0, 0x36, 5, "MSDOS", 0x0, 0, 0 }, -+ { "vfat", "vfat", 0, 0x36, 8, "FAT16 ", 0x0, 0, 0 }, -+ { "vfat", "vfat", 0, 0x36, 8, "FAT12 ", 0x0, 0, 0 }, -+ { "vfat", "vfat", 0, 0, 1, "\353", 0x0, 0, 0 }, -+ { "vfat", "vfat", 0, 0, 1, "\351", 0x0, 0, 0 }, -+ { "vfat", "vfat", 0, 0x1fe, 2, "\125\252", 0x0, 0, 0 }, -+ { "xfs", "xfs", 0, 0, 4, "XFSB", 0x20, 0, 0 }, -+ { "romfs", "romfs", 0, 0, 8, "-rom1fs-", 0x0, 0, 0 }, -+ { "bfs", "bfs", 0, 0, 4, "\316\372\173\033", 0, 0, 0 }, -+ { "cramfs", "cramfs", 0, 0, 4, "E=\315\050", 0x0, 0, 0 }, -+ { "qnx4", "qnx4", 0, 4, 6, "QNX4FS", 0, 0, 0 }, -+ { NULL, "crypt_LUKS", 0, 0, 6, "LUKS\xba\xbe", 0x0, 0, 0 }, -+ { "squashfs", "squashfs", 0, 0, 4, "sqsh", 0, 0, 0 }, -+ { "squashfs", "squashfs", 0, 0, 4, "hsqs", 0, 0, 0 }, -+ { "ocfs", "ocfs", 0, 8, 9, "OracleCFS", 0x0, 0, 0 }, -+ { "lvm2pv", "lvm2pv", 0, 0x018, 8, "LVM2 001", 0x0, 0, 0 }, -+ { "sysv", "sysv", 0, 0x3f8, 4, "\020~\030\375", 0, 0, 0 }, -+ { "ext", "ext", 1, 0x38, 2, "\123\357", 0x468, 0x42c, 4 }, -+ { "minix", "minix", 1, 0x10, 2, "\177\023", 0, 0, 0 }, -+ { "minix", "minix", 1, 0x10, 2, "\217\023", 0, 0, 0 }, -+ { "minix", "minix", 1, 0x10, 2, "\150\044", 0, 0, 0 }, -+ { "minix", "minix", 1, 0x10, 2, "\170\044", 0, 0, 0 }, -+ { "lvm2pv", "lvm2pv", 1, 0x018, 8, "LVM2 001", 0x0, 0, 0 }, -+ { "vxfs", "vxfs", 1, 0, 4, "\365\374\001\245", 0, 0, 0 }, -+ { "hfsplus", "hfsplus", 1, 0, 2, "BD", 0x0, 0, 0 }, -+ { "hfsplus", "hfsplus", 1, 0, 2, "H+", 0x0, 0, 0 }, -+ { "hfsplus", "hfsplus", 1, 0, 2, "HX", 0x0, 0, 0 }, -+ { "hfs", "hfs", 1, 0, 2, "BD", 0x0, 0, 0 }, -+ { "ocfs2", "ocfs2", 1, 0, 6, "OCFSV2", 0x0, 0, 0 }, -+ { "lvm2pv", "lvm2pv", 0, 0x218, 8, "LVM2 001", 0x0, 0, 0 }, -+ { "lvm2pv", "lvm2pv", 1, 0x218, 8, "LVM2 001", 0x0, 0, 0 }, -+ { "ocfs2", "ocfs2", 2, 0, 6, "OCFSV2", 0x0, 0, 0 }, -+ { "swap", "swap", 0, 0xff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, -+ { "swap", "swap", 0, 0xff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0xff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0xff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0xff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, -+ { "ocfs2", "ocfs2", 4, 0, 6, "OCFSV2", 0x0, 0, 0 }, -+ { "ocfs2", "ocfs2", 8, 0, 6, "OCFSV2", 0x0, 0, 0 }, -+ { "hpfs", "hpfs", 8, 0, 4, "I\350\225\371", 0, 0, 0 }, -+ { "reiserfs", "reiserfs", 8, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 }, -+ { "reiserfs", "reiserfs", 8, 20, 8, "ReIsErFs", 0x10054, 0, 0 }, -+ { "zfs", "zfs", 8, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 }, -+ { "zfs", "zfs", 8, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 }, -+ { "ufs", "ufs", 8, 0x55c, 4, "T\031\001\000", 0, 0, 0 }, -+ { "swap", "swap", 0, 0x1ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, -+ { "swap", "swap", 0, 0x1ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0x1ff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0x1ff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0x1ff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, -+ { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr2Fs", 0x10054, 0, 0 }, -+ { "reiserfs", "reiserfs", 64, 0x34, 9, "ReIsEr3Fs", 0x10054, 0, 0 }, -+ { "reiserfs", "reiserfs", 64, 0x34, 8, "ReIsErFs", 0x10054, 0, 0 }, -+ { "reiser4", "reiser4", 64, 0, 7, "ReIsEr4", 0x100544, 0, 0 }, -+ { "gfs2", "gfs2", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 }, -+ { "gfs", "gfs", 64, 0, 4, "\x01\x16\x19\x70", 0x0, 0, 0 }, -+ { "btrfs", "btrfs", 64, 0x40, 8, "_BHRfS_M", 0x0, 0, 0 }, -+ { "swap", "swap", 0, 0x3ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, -+ { "swap", "swap", 0, 0x3ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0x3ff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0x3ff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0x3ff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, -+ { "udf", "udf", 32, 1, 5, "BEA01", 0x0, 0, 0 }, -+ { "udf", "udf", 32, 1, 5, "BOOT2", 0x0, 0, 0 }, -+ { "udf", "udf", 32, 1, 5, "CD001", 0x0, 0, 0 }, -+ { "udf", "udf", 32, 1, 5, "CDW02", 0x0, 0, 0 }, -+ { "udf", "udf", 32, 1, 5, "NSR02", 0x0, 0, 0 }, -+ { "udf", "udf", 32, 1, 5, "NSR03", 0x0, 0, 0 }, -+ { "udf", "udf", 32, 1, 5, "TEA01", 0x0, 0, 0 }, -+ { "iso9660", "iso9660", 32, 1, 5, "CD001", 0x0, 0, 0 }, -+ { "iso9660", "iso9660", 32, 9, 5, "CDROM", 0x0, 0, 0 }, -+ { "jfs", "jfs", 32, 0, 4, "JFS1", 0x88, 0, 0 }, -+ { "swap", "swap", 0, 0x7ff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, -+ { "swap", "swap", 0, 0x7ff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0x7ff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0x7ff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0x7ff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swap", 0, 0xfff6, 10, "SWAP-SPACE", 0x40c, 0, 0 }, -+ { "swap", "swap", 0, 0xfff6, 10, "SWAPSPACE2", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0xfff6, 9, "S1SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0xfff6, 9, "S2SUSPEND", 0x40c, 0, 0 }, -+ { "swap", "swsuspend", 0, 0xfff6, 9, "ULSUSPEND", 0x40c, 0, 0 }, -+ { "zfs", "zfs", 264, 0, 8, "\0\0\x02\xf5\xb0\x07\xb1\x0c", 0x0, 0, 0 }, -+ { "zfs", "zfs", 264, 0, 8, "\x0c\xb1\x07\xb0\xf5\x02\0\0", 0x0, 0, 0 }, -+ { NULL, NULL, 0, 0, 0, NULL, 0x0, 0, 0 } -+}; -+ -+static int null_uuid(const char *uuid) -+{ -+ int i; -+ -+ for (i = 0; i < 16 && !uuid[i]; i++); -+ -+ return (i == 16); -+} -+ -+ -+static void uuid_end_bio(struct bio *bio, int err) -+{ -+ struct page *page = bio->bi_io_vec[0].bv_page; -+ -+ if(!test_bit(BIO_UPTODATE, &bio->bi_flags)) -+ SetPageError(page); -+ -+ unlock_page(page); -+ bio_put(bio); -+} -+ -+ -+/** -+ * submit - submit BIO request -+ * @dev: The block device we're using. -+ * @page_num: The page we're reading. -+ * -+ * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the -+ * textbook - allocate and initialize the bio. If we're writing, make sure -+ * the page is marked as dirty. Then submit it and carry on." -+ **/ -+static struct page *read_bdev_page(struct block_device *dev, int page_num) -+{ -+ struct bio *bio = NULL; -+ struct page *page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); -+ -+ if (!page) { -+ printk(KERN_ERR "Failed to allocate a page for reading data " -+ "in UUID checks."); -+ return NULL; -+ } -+ -+ bio = bio_alloc(GFP_NOFS, 1); -+ bio->bi_bdev = dev; -+ bio->bi_iter.bi_sector = page_num << 3; -+ bio->bi_end_io = uuid_end_bio; -+ bio->bi_flags |= (1 << BIO_TOI); -+ -+ PRINTK("Submitting bio on device %lx, page %d using bio %p and page %p.\n", -+ (unsigned long) dev->bd_dev, page_num, bio, page); -+ -+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { -+ printk(KERN_DEBUG "ERROR: adding page to bio at %d\n", -+ page_num); -+ bio_put(bio); -+ __free_page(page); -+ printk(KERN_DEBUG "read_bdev_page freed page %p (in error " -+ "path).\n", page); -+ return NULL; -+ } -+ -+ lock_page(page); -+ submit_bio(READ | REQ_SYNC, bio); -+ -+ wait_on_page_locked(page); -+ if (PageError(page)) { -+ __free_page(page); -+ page = NULL; -+ } -+ return page; -+} -+ -+int bdev_matches_key(struct block_device *bdev, const char *key) -+{ -+ unsigned char *data = NULL; -+ struct page *data_page = NULL; -+ -+ int dev_offset, pg_num, pg_off, i; -+ int last_pg_num = -1; -+ int result = 0; -+ char buf[50]; -+ -+ if (null_uuid(key)) { -+ PRINTK("Refusing to find a NULL key.\n"); -+ return 0; -+ } -+ -+ if (!bdev->bd_disk) { -+ bdevname(bdev, buf); -+ PRINTK("bdev %s has no bd_disk.\n", buf); -+ return 0; -+ } -+ -+ if (!bdev->bd_disk->queue) { -+ bdevname(bdev, buf); -+ PRINTK("bdev %s has no queue.\n", buf); -+ return 0; -+ } -+ -+ for (i = 0; uuid_list[i].name; i++) { -+ struct uuid_info *dat = &uuid_list[i]; -+ -+ if (!dat->key || strcmp(dat->key, key)) -+ continue; -+ -+ dev_offset = (dat->bkoff << 10) + dat->sboff; -+ pg_num = dev_offset >> 12; -+ pg_off = dev_offset & 0xfff; -+ -+ if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1) -+ continue; -+ -+ if (pg_num != last_pg_num) { -+ if (data_page) { -+ kunmap(data_page); -+ __free_page(data_page); -+ } -+ data_page = read_bdev_page(bdev, pg_num); -+ if (!data_page) -+ continue; -+ data = kmap(data_page); -+ } -+ -+ last_pg_num = pg_num; -+ -+ if (strncmp(&data[pg_off], dat->magic, dat->sig_len)) -+ continue; -+ -+ result = 1; -+ break; -+ } -+ -+ if (data_page) { -+ kunmap(data_page); -+ __free_page(data_page); -+ } -+ -+ return result; -+} -+ -+/* -+ * part_matches_fs_info - Does the given partition match the details given? -+ * -+ * Returns a score saying how good the match is. -+ * 0 = no UUID match. -+ * 1 = UUID but last mount time differs. -+ * 2 = UUID, last mount time but not dev_t -+ * 3 = perfect match -+ * -+ * This lets us cope elegantly with probing resulting in dev_ts changing -+ * from boot to boot, and with the case where a user copies a partition -+ * (UUID is non unique), and we need to check the last mount time of the -+ * correct partition. -+ */ -+int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek) -+{ -+ struct block_device *bdev; -+ struct fs_info *got; -+ int result = 0; -+ char buf[50]; -+ -+ if (null_uuid((char *) &seek->uuid)) { -+ PRINTK("Refusing to find a NULL uuid.\n"); -+ return 0; -+ } -+ -+ bdev = bdget(part_devt(part)); -+ -+ PRINTK("part_matches fs info considering %x.\n", part_devt(part)); -+ -+ if (blkdev_get(bdev, FMODE_READ, 0)) { -+ PRINTK("blkdev_get failed.\n"); -+ return 0; -+ } -+ -+ if (!bdev->bd_disk) { -+ bdevname(bdev, buf); -+ PRINTK("bdev %s has no bd_disk.\n", buf); -+ goto out; -+ } -+ -+ if (!bdev->bd_disk->queue) { -+ bdevname(bdev, buf); -+ PRINTK("bdev %s has no queue.\n", buf); -+ goto out; -+ } -+ -+ got = fs_info_from_block_dev(bdev); -+ -+ if (got && !memcmp(got->uuid, seek->uuid, 16)) { -+ PRINTK(" Have matching UUID.\n"); -+ PRINTK(" Got: LMS %d, LM %p.\n", got->last_mount_size, got->last_mount); -+ PRINTK(" Seek: LMS %d, LM %p.\n", seek->last_mount_size, seek->last_mount); -+ result = 1; -+ -+ if (got->last_mount_size == seek->last_mount_size && -+ got->last_mount && seek->last_mount && -+ !memcmp(got->last_mount, seek->last_mount, -+ got->last_mount_size)) { -+ result = 2; -+ -+ PRINTK(" Matching last mount time.\n"); -+ -+ if (part_devt(part) == seek->dev_t) { -+ result = 3; -+ PRINTK(" Matching dev_t.\n"); -+ } else -+ PRINTK("Dev_ts differ (%x vs %x).\n", part_devt(part), seek->dev_t); -+ } -+ } -+ -+ PRINTK(" Score for %x is %d.\n", part_devt(part), result); -+ free_fs_info(got); -+out: -+ blkdev_put(bdev, FMODE_READ); -+ return result; -+} -+ -+void free_fs_info(struct fs_info *fs_info) -+{ -+ if (!fs_info || IS_ERR(fs_info)) -+ return; -+ -+ if (fs_info->last_mount) -+ kfree(fs_info->last_mount); -+ -+ kfree(fs_info); -+} -+EXPORT_SYMBOL_GPL(free_fs_info); -+ -+struct fs_info *fs_info_from_block_dev(struct block_device *bdev) -+{ -+ unsigned char *data = NULL; -+ struct page *data_page = NULL; -+ -+ int dev_offset, pg_num, pg_off; -+ int uuid_pg_num, uuid_pg_off, i; -+ unsigned char *uuid_data = NULL; -+ struct page *uuid_data_page = NULL; -+ -+ int last_pg_num = -1, last_uuid_pg_num = 0; -+ char buf[50]; -+ struct fs_info *fs_info = NULL; -+ -+ bdevname(bdev, buf); -+ -+ PRINTK("uuid_from_block_dev looking for partition type of %s.\n", buf); -+ -+ for (i = 0; uuid_list[i].name; i++) { -+ struct uuid_info *dat = &uuid_list[i]; -+ dev_offset = (dat->bkoff << 10) + dat->sboff; -+ pg_num = dev_offset >> 12; -+ pg_off = dev_offset & 0xfff; -+ uuid_pg_num = dat->uuid_offset >> 12; -+ uuid_pg_off = dat->uuid_offset & 0xfff; -+ -+ if ((((pg_num + 1) << 3) - 1) > bdev->bd_part->nr_sects >> 1) -+ continue; -+ -+ /* Ignore partition types with no UUID offset */ -+ if (!dat->uuid_offset) -+ continue; -+ -+ if (pg_num != last_pg_num) { -+ if (data_page) { -+ kunmap(data_page); -+ __free_page(data_page); -+ } -+ data_page = read_bdev_page(bdev, pg_num); -+ if (!data_page) -+ continue; -+ data = kmap(data_page); -+ } -+ -+ last_pg_num = pg_num; -+ -+ if (strncmp(&data[pg_off], dat->magic, dat->sig_len)) -+ continue; -+ -+ PRINTK("This partition looks like %s.\n", dat->name); -+ -+ fs_info = kzalloc(sizeof(struct fs_info), GFP_KERNEL); -+ -+ if (!fs_info) { -+ PRINTK("Failed to allocate fs_info struct."); -+ fs_info = ERR_PTR(-ENOMEM); -+ break; -+ } -+ -+ /* UUID can't be off the end of the disk */ -+ if ((uuid_pg_num > bdev->bd_part->nr_sects >> 3) || -+ !dat->uuid_offset) -+ goto no_uuid; -+ -+ if (!uuid_data || uuid_pg_num != last_uuid_pg_num) { -+ /* No need to reread the page from above */ -+ if (uuid_pg_num == pg_num && uuid_data) -+ memcpy(uuid_data, data, PAGE_SIZE); -+ else { -+ if (uuid_data_page) { -+ kunmap(uuid_data_page); -+ __free_page(uuid_data_page); -+ } -+ uuid_data_page = read_bdev_page(bdev, uuid_pg_num); -+ if (!uuid_data_page) -+ continue; -+ uuid_data = kmap(uuid_data_page); -+ } -+ } -+ -+ last_uuid_pg_num = uuid_pg_num; -+ memcpy(&fs_info->uuid, &uuid_data[uuid_pg_off], 16); -+ fs_info->dev_t = bdev->bd_dev; -+ -+no_uuid: -+ PRINT_HEX_DUMP(KERN_EMERG, "fs_info_from_block_dev " -+ "returning uuid ", DUMP_PREFIX_NONE, 16, 1, -+ fs_info->uuid, 16, 0); -+ -+ if (dat->last_mount_size) { -+ int pg = dat->last_mount_offset >> 12, sz; -+ int off = dat->last_mount_offset & 0xfff; -+ struct page *last_mount = read_bdev_page(bdev, pg); -+ unsigned char *last_mount_data; -+ char *ptr; -+ -+ if (!last_mount) { -+ fs_info = ERR_PTR(-ENOMEM); -+ break; -+ } -+ last_mount_data = kmap(last_mount); -+ sz = dat->last_mount_size; -+ ptr = kmalloc(sz, GFP_KERNEL); -+ -+ if (!ptr) { -+ printk(KERN_EMERG "fs_info_from_block_dev " -+ "failed to get memory for last mount " -+ "timestamp."); -+ free_fs_info(fs_info); -+ fs_info = ERR_PTR(-ENOMEM); -+ } else { -+ fs_info->last_mount = ptr; -+ fs_info->last_mount_size = sz; -+ memcpy(ptr, &last_mount_data[off], sz); -+ } -+ -+ kunmap(last_mount); -+ __free_page(last_mount); -+ } -+ break; -+ } -+ -+ if (data_page) { -+ kunmap(data_page); -+ __free_page(data_page); -+ } -+ -+ if (uuid_data_page) { -+ kunmap(uuid_data_page); -+ __free_page(uuid_data_page); -+ } -+ -+ return fs_info; -+} -+EXPORT_SYMBOL_GPL(fs_info_from_block_dev); -+ -+static int __init uuid_debug_setup(char *str) -+{ -+ int value; -+ -+ if (sscanf(str, "=%d", &value)) -+ debug_enabled = value; -+ -+ return 1; -+} -+ -+__setup("uuid_debug", uuid_debug_setup); -diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c -index 37d7302..49ba4a3 100644 ---- a/drivers/acpi/acpi_pad.c -+++ b/drivers/acpi/acpi_pad.c -@@ -153,6 +153,7 @@ static int power_saving_thread(void *data) - u64 last_jiffies = 0; - - sched_setscheduler(current, SCHED_RR, ¶m); -+ set_freezable(); - - while (!kthread_should_stop()) { - int cpu; -diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c -index 86d5e4f..af5d673 100644 ---- a/drivers/base/power/main.c -+++ b/drivers/base/power/main.c -@@ -870,6 +870,7 @@ void dpm_resume(pm_message_t state) - - cpufreq_resume(); - } -+EXPORT_SYMBOL_GPL(dpm_resume); - - /** - * device_complete - Complete a PM transition for given device. -@@ -946,6 +947,7 @@ void dpm_complete(pm_message_t state) - list_splice(&list, &dpm_list); - mutex_unlock(&dpm_list_mtx); - } -+EXPORT_SYMBOL_GPL(dpm_complete); - - /** - * dpm_resume_end - Execute "resume" callbacks and complete system transition. -@@ -1474,6 +1476,7 @@ int dpm_suspend(pm_message_t state) - dpm_show_time(starttime, state, NULL); - return error; - } -+EXPORT_SYMBOL_GPL(dpm_suspend); - - /** - * device_prepare - Prepare a device for system power transition. -@@ -1578,6 +1581,7 @@ int dpm_prepare(pm_message_t state) - mutex_unlock(&dpm_list_mtx); - return error; - } -+EXPORT_SYMBOL_GPL(dpm_prepare); - - /** - * dpm_suspend_start - Prepare devices for PM transition and suspend them. -diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c -index 2d56f41..2f530c8 100644 ---- a/drivers/base/power/wakeup.c -+++ b/drivers/base/power/wakeup.c -@@ -23,6 +23,7 @@ - * if wakeup events are registered during or immediately before the transition. - */ - bool events_check_enabled __read_mostly; -+EXPORT_SYMBOL_GPL(events_check_enabled); - - /* - * Combined counters of registered wakeup events and wakeup events in progress. -@@ -715,6 +716,7 @@ bool pm_wakeup_pending(void) - - return ret; - } -+EXPORT_SYMBOL_GPL(pm_wakeup_pending); - - /** - * pm_get_wakeup_count - Read the number of registered wakeup events. -diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c -index 64c60ed..3ca69f3 100644 ---- a/drivers/block/xen-blkback/blkback.c -+++ b/drivers/block/xen-blkback/blkback.c -@@ -577,6 +577,7 @@ int xen_blkif_schedule(void *arg) - int ret; - - xen_blkif_get(blkif); -+ set_freezable(); - - while (!kthread_should_stop()) { - if (try_to_freeze()) -diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c -index 9909bef..b37783f 100644 ---- a/drivers/gpu/drm/drm_gem.c -+++ b/drivers/gpu/drm/drm_gem.c -@@ -135,7 +135,7 @@ int drm_gem_object_init(struct drm_device *dev, - - drm_gem_private_object_init(dev, obj, size); - -- filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); -+ filp = shmem_file_setup("drm mm object", size, VM_NORESERVE, 1); - if (IS_ERR(filp)) - return PTR_ERR(filp); - -diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c -index 75f3190..8f9b4c1 100644 ---- a/drivers/gpu/drm/ttm/ttm_tt.c -+++ b/drivers/gpu/drm/ttm/ttm_tt.c -@@ -336,7 +336,7 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage) - if (!persistent_swap_storage) { - swap_storage = shmem_file_setup("ttm swap", - ttm->num_pages << PAGE_SHIFT, -- 0); -+ 0, 0); - if (unlikely(IS_ERR(swap_storage))) { - pr_err("Failed allocating swap storage\n"); - return PTR_ERR(swap_storage); -diff --git a/drivers/md/md.c b/drivers/md/md.c -index 2382cfc..85c2a98 100644 ---- a/drivers/md/md.c -+++ b/drivers/md/md.c -@@ -33,6 +33,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -7418,6 +7419,8 @@ void md_do_sync(struct md_thread *thread) - * - */ - -+ set_freezable(); -+ - do { - mddev->curr_resync = 2; - -@@ -7441,6 +7444,9 @@ void md_do_sync(struct md_thread *thread) - * time 'round when curr_resync == 2 - */ - continue; -+ -+ try_to_freeze(); -+ - /* We need to wait 'interruptible' so as not to - * contribute to the load average, and not to - * be caught by 'softlockup' -@@ -7453,6 +7459,7 @@ void md_do_sync(struct md_thread *thread) - " share one or more physical units)\n", - desc, mdname(mddev), mdname(mddev2)); - mddev_put(mddev2); -+ try_to_freeze(); - if (signal_pending(current)) - flush_signals(current); - schedule(); -@@ -7784,8 +7791,10 @@ no_add: - */ - void md_check_recovery(struct mddev *mddev) - { -- if (mddev->suspended) -+#ifdef CONFIG_FREEZER -+ if (mddev->suspended || unlikely(atomic_read(&system_freezing_cnt))) - return; -+#endif - - if (mddev->bitmap) - bitmap_daemon_work(mddev); -diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c -index dd1bd10..9eb8719 100644 ---- a/drivers/net/irda/stir4200.c -+++ b/drivers/net/irda/stir4200.c -@@ -738,7 +738,9 @@ static int stir_transmit_thread(void *arg) - struct net_device *dev = stir->netdev; - struct sk_buff *skb; - -- while (!kthread_should_stop()) { -+ set_freezable(); -+ -+ while (!kthread_freezable_should_stop(NULL)) { - #ifdef CONFIG_PM - /* if suspending, then power off and wait */ - if (unlikely(freezing(current))) { -diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c -index 713a972..f877e48 100644 ---- a/drivers/staging/android/ashmem.c -+++ b/drivers/staging/android/ashmem.c -@@ -387,7 +387,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) - name = asma->name; - - /* ... and allocate the backing shmem file */ -- vmfile = shmem_file_setup(name, asma->size, vma->vm_flags); -+ vmfile = shmem_file_setup(name, asma->size, vma->vm_flags, 0); - if (unlikely(IS_ERR(vmfile))) { - ret = PTR_ERR(vmfile); - goto out; -diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c -index 3ad0b61..ca51d69 100644 ---- a/drivers/tty/vt/vt.c -+++ b/drivers/tty/vt/vt.c -@@ -2437,6 +2437,7 @@ int vt_kmsg_redirect(int new) - else - return kmsg_con; - } -+EXPORT_SYMBOL_GPL(vt_kmsg_redirect); - - /* - * Console on virtual terminal -diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c -index bdcb13c..ce8fc9c 100644 ---- a/drivers/uwb/uwbd.c -+++ b/drivers/uwb/uwbd.c -@@ -271,6 +271,7 @@ static int uwbd(void *param) - struct uwb_event *evt; - int should_stop = 0; - -+ set_freezable(); - while (1) { - wait_event_interruptible_timeout( - rc->uwbd.wq, -diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c -index 9833149..c64eba5 100644 ---- a/fs/btrfs/disk-io.c -+++ b/fs/btrfs/disk-io.c -@@ -1743,6 +1743,8 @@ static int cleaner_kthread(void *arg) - struct btrfs_root *root = arg; - int again; - -+ set_freezable(); -+ - do { - again = 0; - -@@ -1774,11 +1776,11 @@ static int cleaner_kthread(void *arg) - sleep: - if (!try_to_freeze() && !again) { - set_current_state(TASK_INTERRUPTIBLE); -- if (!kthread_should_stop()) -+ if (!kthread_freezable_should_stop(NULL)) - schedule(); - __set_current_state(TASK_RUNNING); - } -- } while (!kthread_should_stop()); -+ } while (!kthread_freezable_should_stop(NULL)); - return 0; - } - -@@ -1792,6 +1794,8 @@ static int transaction_kthread(void *arg) - unsigned long delay; - bool cannot_commit; - -+ set_freezable(); -+ - do { - cannot_commit = false; - delay = HZ * root->fs_info->commit_interval; -@@ -1836,13 +1840,13 @@ sleep: - btrfs_cleanup_transaction(root); - if (!try_to_freeze()) { - set_current_state(TASK_INTERRUPTIBLE); -- if (!kthread_should_stop() && -+ if (!kthread_freezable_should_stop(NULL) && - (!btrfs_transaction_blocked(root->fs_info) || - cannot_commit)) - schedule_timeout(delay); - __set_current_state(TASK_RUNNING); - } -- } while (!kthread_should_stop()); -+ } while (!kthread_freezable_should_stop(NULL)); - return 0; - } - -diff --git a/fs/drop_caches.c b/fs/drop_caches.c -index 9280202..ae20186 100644 ---- a/fs/drop_caches.c -+++ b/fs/drop_caches.c -@@ -8,6 +8,7 @@ - #include - #include - #include -+#include - #include "internal.h" - - /* A global variable is a bit ugly, but it keeps the code simple */ -@@ -50,6 +51,13 @@ static void drop_slab(void) - } while (nr_objects > 10); - } - -+/* For TuxOnIce */ -+void drop_pagecache(void) -+{ -+ iterate_supers(drop_pagecache_sb, NULL); -+} -+EXPORT_SYMBOL_GPL(drop_pagecache); -+ - int drop_caches_sysctl_handler(ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) - { -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 6f9e6fa..8e528d0 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -2926,6 +2926,7 @@ static int ext4_lazyinit_thread(void *arg) - unsigned long next_wakeup, cur; - - BUG_ON(NULL == eli); -+ set_freezable(); - - cont_thread: - while (true) { -@@ -2965,7 +2966,7 @@ cont_thread: - - schedule_timeout_interruptible(next_wakeup - cur); - -- if (kthread_should_stop()) { -+ if (kthread_freezable_should_stop(NULL)) { - ext4_clear_request_list(); - goto exit_thread; - } -diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c -index 4a14d50..7da4bda 100644 ---- a/fs/gfs2/log.c -+++ b/fs/gfs2/log.c -@@ -878,7 +878,9 @@ int gfs2_logd(void *data) - unsigned long t = 1; - DEFINE_WAIT(wait); - -- while (!kthread_should_stop()) { -+ set_freezable(); -+ -+ while (!kthread_freezable_should_stop(NULL)) { - - if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { - gfs2_ail1_empty(sdp); -@@ -904,11 +906,11 @@ int gfs2_logd(void *data) - TASK_INTERRUPTIBLE); - if (!gfs2_ail_flush_reqd(sdp) && - !gfs2_jrnl_flush_reqd(sdp) && -- !kthread_should_stop()) -+ !kthread_freezable_should_stop(NULL)) - t = schedule_timeout(t); - } while(t && !gfs2_ail_flush_reqd(sdp) && - !gfs2_jrnl_flush_reqd(sdp) && -- !kthread_should_stop()); -+ !kthread_freezable_should_stop(NULL)); - finish_wait(&sdp->sd_logd_waitq, &wait); - } - -diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c -index c4effff..c66cad1 100644 ---- a/fs/gfs2/quota.c -+++ b/fs/gfs2/quota.c -@@ -1433,7 +1433,9 @@ int gfs2_quotad(void *data) - DEFINE_WAIT(wait); - int empty; - -- while (!kthread_should_stop()) { -+ set_freezable(); -+ -+ while (!kthread_freezable_should_stop(NULL)) { - - /* Update the master statfs file */ - if (sdp->sd_statfs_force_sync) { -diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c -index 8d811e0..12510c4 100644 ---- a/fs/jfs/jfs_logmgr.c -+++ b/fs/jfs/jfs_logmgr.c -@@ -2342,6 +2342,8 @@ int jfsIOWait(void *arg) - { - struct lbuf *bp; - -+ set_freezable(); -+ - do { - spin_lock_irq(&log_redrive_lock); - while ((bp = log_redrive_list)) { -@@ -2361,7 +2363,7 @@ int jfsIOWait(void *arg) - schedule(); - __set_current_state(TASK_RUNNING); - } -- } while (!kthread_should_stop()); -+ } while (!kthread_freezable_should_stop(NULL)); - - jfs_info("jfsIOWait being killed!"); - return 0; -diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c -index 564c4f2..0a622bc 100644 ---- a/fs/jfs/jfs_txnmgr.c -+++ b/fs/jfs/jfs_txnmgr.c -@@ -2752,6 +2752,8 @@ int jfs_lazycommit(void *arg) - unsigned long flags; - struct jfs_sb_info *sbi; - -+ set_freezable(); -+ - do { - LAZY_LOCK(flags); - jfs_commit_thread_waking = 0; /* OK to wake another thread */ -@@ -2811,7 +2813,7 @@ int jfs_lazycommit(void *arg) - __set_current_state(TASK_RUNNING); - remove_wait_queue(&jfs_commit_thread_wait, &wq); - } -- } while (!kthread_should_stop()); -+ } while (!kthread_freezable_should_stop(NULL)); - - if (!list_empty(&TxAnchor.unlock_queue)) - jfs_err("jfs_lazycommit being killed w/pending transactions!"); -@@ -2936,6 +2938,8 @@ int jfs_sync(void *arg) - struct jfs_inode_info *jfs_ip; - tid_t tid; - -+ set_freezable(); -+ - do { - /* - * write each inode on the anonymous inode list -@@ -2998,7 +3002,7 @@ int jfs_sync(void *arg) - schedule(); - __set_current_state(TASK_RUNNING); - } -- } while (!kthread_should_stop()); -+ } while (!kthread_freezable_should_stop(NULL)); - - jfs_info("jfs_sync being killed"); - return 0; -diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c -index a1a1916..7bb1322 100644 ---- a/fs/nilfs2/segment.c -+++ b/fs/nilfs2/segment.c -@@ -2449,6 +2449,8 @@ static int nilfs_segctor_thread(void *arg) - struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - int timeout = 0; - -+ set_freezable(); -+ - sci->sc_timer.data = (unsigned long)current; - sci->sc_timer.function = nilfs_construction_timeout; - -diff --git a/fs/super.c b/fs/super.c -index 48377f7..8cdbfa3 100644 ---- a/fs/super.c -+++ b/fs/super.c -@@ -38,6 +38,8 @@ - - - LIST_HEAD(super_blocks); -+EXPORT_SYMBOL_GPL(super_blocks); -+ - DEFINE_SPINLOCK(sb_lock); - - static char *sb_writers_name[SB_FREEZE_LEVELS] = { -diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c -index a728735..a5d9536 100644 ---- a/fs/xfs/xfs_trans_ail.c -+++ b/fs/xfs/xfs_trans_ail.c -@@ -498,9 +498,10 @@ xfsaild( - struct xfs_ail *ailp = data; - long tout = 0; /* milliseconds */ - -+ set_freezable(); - current->flags |= PF_MEMALLOC; - -- while (!kthread_should_stop()) { -+ while (!kthread_freezable_should_stop(NULL)) { - if (tout && tout <= 20) - __set_current_state(TASK_KILLABLE); - else -@@ -522,6 +523,7 @@ xfsaild( - ailp->xa_target == ailp->xa_target_prev) { - spin_unlock(&ailp->xa_lock); - schedule(); -+ try_to_freeze(); - tout = 0; - continue; - } -diff --git a/include/linux/bio.h b/include/linux/bio.h -index bba5508..d341bdf 100644 ---- a/include/linux/bio.h -+++ b/include/linux/bio.h -@@ -32,6 +32,8 @@ - /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ - #include - -+extern int trap_non_toi_io; -+ - #define BIO_DEBUG - - #ifdef BIO_DEBUG -diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h -index aa0eaa2..a15689c 100644 ---- a/include/linux/blk_types.h -+++ b/include/linux/blk_types.h -@@ -122,13 +122,14 @@ struct bio { - #define BIO_QUIET 10 /* Make BIO Quiet */ - #define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ - #define BIO_SNAP_STABLE 12 /* bio data must be snapshotted during write */ -+#define BIO_TOI 13 /* bio is TuxOnIce submitted */ - - /* - * Flags starting here get preserved by bio_reset() - this includes - * BIO_POOL_IDX() - */ --#define BIO_RESET_BITS 13 --#define BIO_OWNS_VEC 13 /* bio_free() should free bvec */ -+#define BIO_RESET_BITS 14 -+#define BIO_OWNS_VEC 14 /* bio_free() should free bvec */ - - #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) - -diff --git a/include/linux/fs.h b/include/linux/fs.h -index 8780312..94610ae 100644 ---- a/include/linux/fs.h -+++ b/include/linux/fs.h -@@ -1572,6 +1572,8 @@ struct super_operations { - #define S_IMA 1024 /* Inode has an associated IMA struct */ - #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ - #define S_NOSEC 4096 /* no suid or xattr security attributes */ -+#define S_ATOMIC_COPY 8192 /* Pages mapped with this inode need to be -+ atomically copied (gem) */ - - /* - * Note that nosuid etc flags are inode-specific: setting some file-system -@@ -2069,6 +2071,13 @@ extern struct super_block *freeze_bdev(struct block_device *); - extern void emergency_thaw_all(void); - extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); - extern int fsync_bdev(struct block_device *); -+extern int fsync_super(struct super_block *); -+extern int fsync_no_super(struct block_device *); -+#define FS_FREEZER_FUSE 1 -+#define FS_FREEZER_NORMAL 2 -+#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL) -+void freeze_filesystems(int which); -+void thaw_filesystems(int which); - extern int sb_is_blkdev_sb(struct super_block *sb); - #else - static inline void bd_forget(struct inode *inode) {} -diff --git a/include/linux/fs_uuid.h b/include/linux/fs_uuid.h -new file mode 100644 -index 0000000..3234135 ---- /dev/null -+++ b/include/linux/fs_uuid.h -@@ -0,0 +1,19 @@ -+#include -+ -+struct hd_struct; -+struct block_device; -+ -+struct fs_info { -+ char uuid[16]; -+ dev_t dev_t; -+ char *last_mount; -+ int last_mount_size; -+}; -+ -+int part_matches_fs_info(struct hd_struct *part, struct fs_info *seek); -+dev_t blk_lookup_fs_info(struct fs_info *seek); -+struct fs_info *fs_info_from_block_dev(struct block_device *bdev); -+void free_fs_info(struct fs_info *fs_info); -+int bdev_matches_key(struct block_device *bdev, const char *key); -+struct block_device *next_bdev_of_type(struct block_device *last, -+ const char *key); -diff --git a/include/linux/mm.h b/include/linux/mm.h -index d677706..03f2b4f 100644 ---- a/include/linux/mm.h -+++ b/include/linux/mm.h -@@ -2019,6 +2019,7 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, - unsigned long shrink_slab(struct shrink_control *shrink, - unsigned long nr_pages_scanned, - unsigned long lru_pages); -+void drop_pagecache(void); - - #ifndef CONFIG_MMU - #define randomize_va_space 0 -diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h -index d1fe1a7..7cef674 100644 ---- a/include/linux/page-flags.h -+++ b/include/linux/page-flags.h -@@ -109,6 +109,12 @@ enum pageflags { - #ifdef CONFIG_TRANSPARENT_HUGEPAGE - PG_compound_lock, - #endif -+#ifdef CONFIG_TOI_INCREMENTAL -+ PG_toi_ignore, /* Ignore this page */ -+ PG_toi_ro, /* Page was made RO by TOI */ -+ PG_toi_cbw, /* Copy the page before it is written to */ -+ PG_toi_dirty, /* Page has been modified */ -+#endif - __NR_PAGEFLAGS, - - /* Filesystems */ -@@ -274,6 +280,12 @@ TESTSCFLAG(HWPoison, hwpoison) - PAGEFLAG_FALSE(HWPoison) - #define __PG_HWPOISON 0 - #endif -+#ifdef CONFIG_TOI_INCREMENTAL -+PAGEFLAG(TOI_RO, toi_ro) -+PAGEFLAG(TOI_Dirty, toi_dirty) -+PAGEFLAG(TOI_Ignore, toi_ignore) -+PAGEFLAG(TOI_CBW, toi_cbw) -+#endif - - u64 stable_page_flags(struct page *page); - -diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h -index 4d1771c..20ab982 100644 ---- a/include/linux/shmem_fs.h -+++ b/include/linux/shmem_fs.h -@@ -46,9 +46,10 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) - extern int shmem_init(void); - extern int shmem_fill_super(struct super_block *sb, void *data, int silent); - extern struct file *shmem_file_setup(const char *name, -- loff_t size, unsigned long flags); -+ loff_t size, unsigned long flags, -+ int atomic_copy); - extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, -- unsigned long flags); -+ unsigned long flags, int atomic_copy); - extern int shmem_zero_setup(struct vm_area_struct *); - extern int shmem_lock(struct file *file, int lock, struct user_struct *user); - extern bool shmem_mapping(struct address_space *mapping); -diff --git a/include/linux/suspend.h b/include/linux/suspend.h -index f73cabf..5fde316 100644 ---- a/include/linux/suspend.h -+++ b/include/linux/suspend.h -@@ -419,6 +419,74 @@ extern bool pm_print_times_enabled; - #define pm_print_times_enabled (false) - #endif - -+enum { -+ TOI_CAN_HIBERNATE, -+ TOI_CAN_RESUME, -+ TOI_RESUME_DEVICE_OK, -+ TOI_NORESUME_SPECIFIED, -+ TOI_SANITY_CHECK_PROMPT, -+ TOI_CONTINUE_REQ, -+ TOI_RESUMED_BEFORE, -+ TOI_BOOT_TIME, -+ TOI_NOW_RESUMING, -+ TOI_IGNORE_LOGLEVEL, -+ TOI_TRYING_TO_RESUME, -+ TOI_LOADING_ALT_IMAGE, -+ TOI_STOP_RESUME, -+ TOI_IO_STOPPED, -+ TOI_NOTIFIERS_PREPARE, -+ TOI_CLUSTER_MODE, -+ TOI_BOOT_KERNEL, -+ TOI_DEVICE_HOTPLUG_LOCKED, -+}; -+ -+#ifdef CONFIG_TOI -+ -+/* Used in init dir files */ -+extern unsigned long toi_state; -+#define set_toi_state(bit) (set_bit(bit, &toi_state)) -+#define clear_toi_state(bit) (clear_bit(bit, &toi_state)) -+#define test_toi_state(bit) (test_bit(bit, &toi_state)) -+extern int toi_running; -+ -+#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action)) -+extern int try_tuxonice_hibernate(void); -+ -+#else /* !CONFIG_TOI */ -+ -+#define toi_state (0) -+#define set_toi_state(bit) do { } while (0) -+#define clear_toi_state(bit) do { } while (0) -+#define test_toi_state(bit) (0) -+#define toi_running (0) -+ -+static inline int try_tuxonice_hibernate(void) { return 0; } -+#define test_action_state(bit) (0) -+ -+#endif /* CONFIG_TOI */ -+ -+#ifdef CONFIG_HIBERNATION -+#ifdef CONFIG_TOI -+extern void try_tuxonice_resume(void); -+#else -+#define try_tuxonice_resume() do { } while (0) -+#endif -+ -+extern int resume_attempted; -+extern int software_resume(void); -+ -+static inline void check_resume_attempted(void) -+{ -+ if (resume_attempted) -+ return; -+ -+ software_resume(); -+} -+#else -+#define check_resume_attempted() do { } while (0) -+#define resume_attempted (0) -+#endif -+ - #ifdef CONFIG_PM_AUTOSLEEP - - /* kernel/power/autosleep.c */ -diff --git a/include/linux/swap.h b/include/linux/swap.h -index 3507115..c133781 100644 ---- a/include/linux/swap.h -+++ b/include/linux/swap.h -@@ -301,6 +301,7 @@ extern unsigned long totalram_pages; - extern unsigned long totalreserve_pages; - extern unsigned long dirty_balance_reserve; - extern unsigned long nr_free_buffer_pages(void); -+extern unsigned long nr_unallocated_buffer_pages(void); - extern unsigned long nr_free_pagecache_pages(void); - - /* Definition of global_page_state not available yet */ -@@ -350,6 +351,8 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, - struct zone *zone, - unsigned long *nr_scanned); - extern unsigned long shrink_all_memory(unsigned long nr_pages); -+extern unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, -+ gfp_t mask); - extern int vm_swappiness; - extern int remove_mapping(struct address_space *mapping, struct page *page); - extern unsigned long vm_total_pages; -@@ -463,13 +466,17 @@ extern void swapcache_free(swp_entry_t, struct page *page); - extern int free_swap_and_cache(swp_entry_t); - extern int swap_type_of(dev_t, sector_t, struct block_device **); - extern unsigned int count_swap_pages(int, int); -+extern sector_t map_swap_entry(swp_entry_t entry, struct block_device **); - extern sector_t map_swap_page(struct page *, struct block_device **); - extern sector_t swapdev_block(int, pgoff_t); -+extern struct swap_info_struct *get_swap_info_struct(unsigned); - extern int page_swapcount(struct page *); - extern struct swap_info_struct *page_swap_info(struct page *); - extern int reuse_swap_page(struct page *); - extern int try_to_free_swap(struct page *); - struct backing_dev_info; -+extern void get_swap_range_of_type(int type, swp_entry_t *start, -+ swp_entry_t *end, unsigned int limit); - - #ifdef CONFIG_MEMCG - extern void -diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h -index 1a85940..3025606 100644 ---- a/include/uapi/linux/netlink.h -+++ b/include/uapi/linux/netlink.h -@@ -27,6 +27,8 @@ - #define NETLINK_ECRYPTFS 19 - #define NETLINK_RDMA 20 - #define NETLINK_CRYPTO 21 /* Crypto layer */ -+#define NETLINK_TOI_USERUI 22 /* TuxOnIce's userui */ -+#define NETLINK_TOI_USM 23 /* Userspace storage manager */ - - #define NETLINK_INET_DIAG NETLINK_SOCK_DIAG - -diff --git a/init/do_mounts.c b/init/do_mounts.c -index 82f2288..e35fb52 100644 ---- a/init/do_mounts.c -+++ b/init/do_mounts.c -@@ -285,6 +285,7 @@ fail: - done: - return res; - } -+EXPORT_SYMBOL_GPL(name_to_dev_t); - - static int __init root_dev_setup(char *line) - { -@@ -586,6 +587,8 @@ void __init prepare_namespace(void) - if (is_floppy && rd_doload && rd_load_disk(0)) - ROOT_DEV = Root_RAM0; - -+ check_resume_attempted(); -+ - mount_root(); - out: - devtmpfs_mount("dev"); -diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c -index 3e0878e..a49c596 100644 ---- a/init/do_mounts_initrd.c -+++ b/init/do_mounts_initrd.c -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -79,6 +80,11 @@ static void __init handle_initrd(void) - - current->flags &= ~PF_FREEZER_SKIP; - -+ if (!resume_attempted) -+ printk(KERN_ERR "TuxOnIce: No attempt was made to resume from " -+ "any image that might exist.\n"); -+ clear_toi_state(TOI_BOOT_TIME); -+ - /* move initrd to rootfs' /old */ - sys_mount("..", ".", NULL, MS_MOVE, NULL); - /* switch root and cwd back to / of rootfs */ -diff --git a/init/main.c b/init/main.c -index 48655ce..d08511c 100644 ---- a/init/main.c -+++ b/init/main.c -@@ -123,6 +123,7 @@ void (*__initdata late_time_init)(void); - char __initdata boot_command_line[COMMAND_LINE_SIZE]; - /* Untouched saved command line (eg. for /proc) */ - char *saved_command_line; -+EXPORT_SYMBOL_GPL(saved_command_line); - /* Command line for parameter parsing */ - static char *static_command_line; - /* Command line for per-initcall parameter parsing */ -diff --git a/ipc/shm.c b/ipc/shm.c -index 7645961..c1b7257 100644 ---- a/ipc/shm.c -+++ b/ipc/shm.c -@@ -537,7 +537,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) - if ((shmflg & SHM_NORESERVE) && - sysctl_overcommit_memory != OVERCOMMIT_NEVER) - acctflag = VM_NORESERVE; -- file = shmem_file_setup(name, size, acctflag); -+ file = shmem_file_setup(name, size, acctflag, 0); - } - error = PTR_ERR(file); - if (IS_ERR(file)) -diff --git a/kernel/cpu.c b/kernel/cpu.c -index 247979a..ac131de 100644 ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -542,6 +542,7 @@ int disable_nonboot_cpus(void) - cpu_maps_update_done(); - return error; - } -+EXPORT_SYMBOL_GPL(disable_nonboot_cpus); - - void __weak arch_enable_nonboot_cpus_begin(void) - { -@@ -580,6 +581,7 @@ void __ref enable_nonboot_cpus(void) - out: - cpu_maps_update_done(); - } -+EXPORT_SYMBOL_GPL(enable_nonboot_cpus); - - static int __init alloc_frozen_cpus(void) - { -diff --git a/kernel/kmod.c b/kernel/kmod.c -index 6b375af..e426523 100644 ---- a/kernel/kmod.c -+++ b/kernel/kmod.c -@@ -461,6 +461,7 @@ void __usermodehelper_set_disable_depth(enum umh_disable_depth depth) - wake_up(&usermodehelper_disabled_waitq); - up_write(&umhelper_sem); - } -+EXPORT_SYMBOL_GPL(__usermodehelper_set_disable_depth); - - /** - * __usermodehelper_disable - Prevent new helpers from being started. -@@ -494,6 +495,7 @@ int __usermodehelper_disable(enum umh_disable_depth depth) - __usermodehelper_set_disable_depth(UMH_ENABLED); - return -EAGAIN; - } -+EXPORT_SYMBOL_GPL(__usermodehelper_disable); - - static void helper_lock(void) - { -diff --git a/kernel/kthread.c b/kernel/kthread.c -index 9a130ec..47605e1 100644 ---- a/kernel/kthread.c -+++ b/kernel/kthread.c -@@ -550,6 +550,8 @@ int kthread_worker_fn(void *worker_ptr) - - WARN_ON(worker->task); - worker->task = current; -+ set_freezable(); -+ - repeat: - set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ - -diff --git a/kernel/pid.c b/kernel/pid.c -index 9b9a266..ad91ea4 100644 ---- a/kernel/pid.c -+++ b/kernel/pid.c -@@ -450,6 +450,7 @@ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) - " protection"); - return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); - } -+EXPORT_SYMBOL_GPL(find_task_by_pid_ns); - - struct task_struct *find_task_by_vpid(pid_t vnr) - { -diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig -index 2fac9cc..bd51a10 100644 ---- a/kernel/power/Kconfig -+++ b/kernel/power/Kconfig -@@ -91,6 +91,291 @@ config PM_STD_PARTITION - suspended image to. It will simply pick the first available swap - device. - -+menuconfig TOI_CORE -+ tristate "Enhanced Hibernation (TuxOnIce)" -+ depends on HIBERNATION -+ default y -+ ---help--- -+ TuxOnIce is the 'new and improved' suspend support. -+ -+ See the TuxOnIce home page (tuxonice.net) -+ for FAQs, HOWTOs and other documentation. -+ -+ comment "Image Storage (you need at least one allocator)" -+ depends on TOI_CORE -+ -+ config TOI_FILE -+ tristate "File Allocator" -+ depends on TOI_CORE -+ default y -+ ---help--- -+ This option enables support for storing an image in a -+ simple file. You might want this if your swap is -+ sometimes full enough that you don't have enough spare -+ space to store an image. -+ -+ config TOI_SWAP -+ tristate "Swap Allocator" -+ depends on TOI_CORE && SWAP -+ default y -+ ---help--- -+ This option enables support for storing an image in your -+ swap space. -+ -+ comment "General Options" -+ depends on TOI_CORE -+ -+ config TOI_PRUNE -+ tristate "Image pruning support" -+ depends on TOI_CORE && CRYPTO && BROKEN -+ default y -+ ---help--- -+ This option adds support for using cryptoapi hashing -+ algorithms to identify pages with the same content. We -+ then write a much smaller pointer to the first copy of -+ the data instead of a complete (perhaps compressed) -+ additional copy. -+ -+ You probably want this, so say Y here. -+ -+ comment "No image pruning support available without Cryptoapi support." -+ depends on TOI_CORE && !CRYPTO -+ -+ config TOI_CRYPTO -+ tristate "Compression support" -+ depends on TOI_CORE && CRYPTO -+ default y -+ ---help--- -+ This option adds support for using cryptoapi compression -+ algorithms. Compression is particularly useful as it can -+ more than double your suspend and resume speed (depending -+ upon how well your image compresses). -+ -+ You probably want this, so say Y here. -+ -+ comment "No compression support available without Cryptoapi support." -+ depends on TOI_CORE && !CRYPTO -+ -+ config TOI_USERUI -+ tristate "Userspace User Interface support" -+ depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE) -+ default y -+ ---help--- -+ This option enabled support for a userspace based user interface -+ to TuxOnIce, which allows you to have a nice display while suspending -+ and resuming, and also enables features such as pressing escape to -+ cancel a cycle or interactive debugging. -+ -+ config TOI_USERUI_DEFAULT_PATH -+ string "Default userui program location" -+ default "/usr/local/sbin/tuxoniceui_text" -+ depends on TOI_USERUI -+ ---help--- -+ This entry allows you to specify a default path to the userui binary. -+ -+ config TOI_DEFAULT_IMAGE_SIZE_LIMIT -+ int "Default image size limit" -+ range -2 65536 -+ default "-2" -+ depends on TOI_CORE -+ ---help--- -+ This entry allows you to specify a default image size limit. It can -+ be overridden at run-time using /sys/power/tuxonice/image_size_limit. -+ -+ config TOI_KEEP_IMAGE -+ bool "Allow Keep Image Mode" -+ depends on TOI_CORE -+ ---help--- -+ This option allows you to keep and image and reuse it. It is intended -+ __ONLY__ for use with systems where all filesystems are mounted read- -+ only (kiosks, for example). To use it, compile this option in and boot -+ normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend. -+ When you resume, the image will not be removed. You will be unable to turn -+ off swap partitions (assuming you are using the swap allocator), but future -+ suspends simply do a power-down. The image can be updated using the -+ kernel command line parameter suspend_act= to turn off the keep image -+ bit. Keep image mode is a little less user friendly on purpose - it -+ should not be used without thought! -+ -+ config TOI_INCREMENTAL -+ bool "Incremental Image Support" -+ depends on TOI_CORE && 64BIT -+ default n -+ ---help--- -+ This option enables the work in progress toward using the dirty page -+ tracking to record changes to pages. It is hoped that -+ this will be an initial step toward implementing storing just -+ the differences between consecutive images, which will -+ increase the amount of storage needed for the image, but also -+ increase the speed at which writing an image occurs and -+ reduce the wear and tear on drives. -+ -+ At the moment, all that is implemented is the first step of keeping -+ an existing image and then comparing it to the contents in memory -+ (by setting /sys/power/tuxonice/verify_image to 1 and triggering a -+ (fake) resume) to see what the page change tracking should find to be -+ different. If you have verify_image set to 1, TuxOnIce will automatically -+ invalidate the old image when you next try to hibernate, so there's no -+ greater chance of disk corruption than normal. -+ -+ comment "No incremental image support available without Keep Image support." -+ depends on TOI_CORE && !TOI_KEEP_IMAGE -+ -+ config TOI_REPLACE_SWSUSP -+ bool "Replace swsusp by default" -+ default y -+ depends on TOI_CORE -+ ---help--- -+ TuxOnIce can replace swsusp. This option makes that the default state, -+ requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want -+ to use the vanilla kernel functionality. Note that your initrd/ramfs will -+ need to do this before trying to resume, too. -+ With overriding swsusp enabled, echoing disk to /sys/power/state will -+ start a TuxOnIce cycle. If resume= doesn't specify an allocator and both -+ the swap and file allocators are compiled in, the swap allocator will be -+ used by default. -+ -+ config TOI_IGNORE_LATE_INITCALL -+ bool "Wait for initrd/ramfs to run, by default" -+ default n -+ depends on TOI_CORE -+ ---help--- -+ When booting, TuxOnIce can check for an image and start to resume prior -+ to any initrd/ramfs running (via a late initcall). -+ -+ If you don't have an initrd/ramfs, this is what you want to happen - -+ otherwise you won't be able to safely resume. You should set this option -+ to 'No'. -+ -+ If, however, you want your initrd/ramfs to run anyway before resuming, -+ you need to tell TuxOnIce to ignore that earlier opportunity to resume. -+ This can be done either by using this compile time option, or by -+ overriding this option with the boot-time parameter toi_initramfs_resume_only=1. -+ -+ Note that if TuxOnIce can't resume at the earlier opportunity, the -+ value of this option won't matter - the initramfs/initrd (if any) will -+ run anyway. -+ -+ menuconfig TOI_CLUSTER -+ tristate "Cluster support" -+ default n -+ depends on TOI_CORE && NET && BROKEN -+ ---help--- -+ Support for linking multiple machines in a cluster so that they suspend -+ and resume together. -+ -+ config TOI_DEFAULT_CLUSTER_INTERFACE -+ string "Default cluster interface" -+ depends on TOI_CLUSTER -+ ---help--- -+ The default interface on which to communicate with other nodes in -+ the cluster. -+ -+ If no value is set here, cluster support will be disabled by default. -+ -+ config TOI_DEFAULT_CLUSTER_KEY -+ string "Default cluster key" -+ default "Default" -+ depends on TOI_CLUSTER -+ ---help--- -+ The default key used by this node. All nodes in the same cluster -+ have the same key. Multiple clusters may coexist on the same lan -+ by using different values for this key. -+ -+ config TOI_CLUSTER_IMAGE_TIMEOUT -+ int "Timeout when checking for image" -+ default 15 -+ depends on TOI_CLUSTER -+ ---help--- -+ Timeout (seconds) before continuing to boot when waiting to see -+ whether other nodes might have an image. Set to -1 to wait -+ indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue -+ booting sooner than this timeout. -+ -+ config TOI_CLUSTER_WAIT_UNTIL_NODES -+ int "Nodes without image before continuing" -+ default 0 -+ depends on TOI_CLUSTER -+ ---help--- -+ When booting and no image is found, we wait to see if other nodes -+ have an image before continuing to boot. This value lets us -+ continue after seeing a certain number of nodes without an image, -+ instead of continuing to wait for the timeout. Set to 0 to only -+ use the timeout. -+ -+ config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE -+ string "Default pre-hibernate script" -+ depends on TOI_CLUSTER -+ ---help--- -+ The default script to be called when starting to hibernate. -+ -+ config TOI_DEFAULT_CLUSTER_POST_HIBERNATE -+ string "Default post-hibernate script" -+ depends on TOI_CLUSTER -+ ---help--- -+ The default script to be called after resuming from hibernation. -+ -+ config TOI_DEFAULT_WAIT -+ int "Default waiting time for emergency boot messages" -+ default "25" -+ range -1 32768 -+ depends on TOI_CORE -+ help -+ TuxOnIce can display warnings very early in the process of resuming, -+ if (for example) it appears that you have booted a kernel that doesn't -+ match an image on disk. It can then give you the opportunity to either -+ continue booting that kernel, or reboot the machine. This option can be -+ used to control how long to wait in such circumstances. -1 means wait -+ forever. 0 means don't wait at all (do the default action, which will -+ generally be to continue booting and remove the image). Values of 1 or -+ more indicate a number of seconds (up to 255) to wait before doing the -+ default. -+ -+ config TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE -+ int "Default extra pages allowance" -+ default "2000" -+ range 500 32768 -+ depends on TOI_CORE -+ help -+ This value controls the default for the allowance TuxOnIce makes for -+ drivers to allocate extra memory during the atomic copy. The default -+ value of 2000 will be okay in most cases. If you are using -+ DRI, the easiest way to find what value to use is to try to hibernate -+ and look at how many pages were actually needed in the sysfs entry -+ /sys/power/tuxonice/debug_info (first number on the last line), adding -+ a little extra because the value is not always the same. -+ -+ config TOI_CHECKSUM -+ bool "Checksum pageset2" -+ default n -+ depends on TOI_CORE -+ select CRYPTO -+ select CRYPTO_ALGAPI -+ select CRYPTO_MD4 -+ ---help--- -+ Adds support for checksumming pageset2 pages, to ensure you really get an -+ atomic copy. Since some filesystems (XFS especially) change metadata even -+ when there's no other activity, we need this to check for pages that have -+ been changed while we were saving the page cache. If your debugging output -+ always says no pages were resaved, you may be able to safely disable this -+ option. -+ -+config TOI -+ bool -+ depends on TOI_CORE!=n -+ default y -+ -+config TOI_EXPORTS -+ bool -+ depends on TOI_SWAP=m || TOI_FILE=m || \ -+ TOI_CRYPTO=m || TOI_CLUSTER=m || \ -+ TOI_USERUI=m || TOI_CORE=m -+ default y -+ -+config TOI_ZRAM_SUPPORT -+ def_bool y -+ depends on TOI && ZRAM!=n -+ - config PM_SLEEP - def_bool y - depends on SUSPEND || HIBERNATE_CALLBACKS -diff --git a/kernel/power/Makefile b/kernel/power/Makefile -index 29472bf..dd5d4f2 100644 ---- a/kernel/power/Makefile -+++ b/kernel/power/Makefile -@@ -1,6 +1,37 @@ - - ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG - -+tuxonice_core-y := tuxonice_modules.o -+ -+obj-$(CONFIG_TOI) += tuxonice_builtin.o -+ -+tuxonice_core-$(CONFIG_PM_DEBUG) += tuxonice_alloc.o -+ -+# Compile these in after allocation debugging, if used. -+ -+tuxonice_core-y += tuxonice_sysfs.o tuxonice_highlevel.o \ -+ tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \ -+ tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \ -+ tuxonice_power_off.o tuxonice_atomic_copy.o -+ -+tuxonice_core-$(CONFIG_TOI_CHECKSUM) += tuxonice_checksum.o -+ -+tuxonice_core-$(CONFIG_NET) += tuxonice_storage.o tuxonice_netlink.o -+ -+obj-$(CONFIG_TOI_CORE) += tuxonice_core.o -+obj-$(CONFIG_TOI_PRUNE) += tuxonice_prune.o -+obj-$(CONFIG_TOI_INCREMENTAL) += tuxonice_incremental.o -+obj-$(CONFIG_TOI_CRYPTO) += tuxonice_compress.o -+ -+tuxonice_bio-y := tuxonice_bio_core.o tuxonice_bio_chains.o \ -+ tuxonice_bio_signature.o -+ -+obj-$(CONFIG_TOI_SWAP) += tuxonice_bio.o tuxonice_swap.o -+obj-$(CONFIG_TOI_FILE) += tuxonice_bio.o tuxonice_file.o -+obj-$(CONFIG_TOI_CLUSTER) += tuxonice_cluster.o -+ -+obj-$(CONFIG_TOI_USERUI) += tuxonice_userui.o -+ - obj-y += qos.o - obj-$(CONFIG_PM) += main.o - obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o -diff --git a/kernel/power/console.c b/kernel/power/console.c -index aba9c54..856fe7f 100644 ---- a/kernel/power/console.c -+++ b/kernel/power/console.c -@@ -138,6 +138,7 @@ int pm_prepare_console(void) - orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE); - return 0; - } -+EXPORT_SYMBOL_GPL(pm_prepare_console); - - void pm_restore_console(void) - { -@@ -149,3 +150,4 @@ void pm_restore_console(void) - vt_kmsg_redirect(orig_kmsg); - } - } -+EXPORT_SYMBOL_GPL(pm_restore_console); -diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c -index f4f2073..72760fa 100644 ---- a/kernel/power/hibernate.c -+++ b/kernel/power/hibernate.c -@@ -29,14 +29,15 @@ - #include - #include - --#include "power.h" -+#include "tuxonice.h" - - - static int nocompress; - static int noresume; - static int resume_wait; - static int resume_delay; --static char resume_file[256] = CONFIG_PM_STD_PARTITION; -+char resume_file[256] = CONFIG_PM_STD_PARTITION; -+EXPORT_SYMBOL_GPL(resume_file); - dev_t swsusp_resume_device; - sector_t swsusp_resume_block; - __visible int in_suspend __nosavedata; -@@ -115,21 +116,23 @@ static int hibernation_test(int level) { return 0; } - * platform_begin - Call platform to start hibernation. - * @platform_mode: Whether or not to use the platform driver. - */ --static int platform_begin(int platform_mode) -+int platform_begin(int platform_mode) - { - return (platform_mode && hibernation_ops) ? - hibernation_ops->begin() : 0; - } -+EXPORT_SYMBOL_GPL(platform_begin); - - /** - * platform_end - Call platform to finish transition to the working state. - * @platform_mode: Whether or not to use the platform driver. - */ --static void platform_end(int platform_mode) -+void platform_end(int platform_mode) - { - if (platform_mode && hibernation_ops) - hibernation_ops->end(); - } -+EXPORT_SYMBOL_GPL(platform_end); - - /** - * platform_pre_snapshot - Call platform to prepare the machine for hibernation. -@@ -139,11 +142,12 @@ static void platform_end(int platform_mode) - * if so configured, and return an error code if that fails. - */ - --static int platform_pre_snapshot(int platform_mode) -+int platform_pre_snapshot(int platform_mode) - { - return (platform_mode && hibernation_ops) ? - hibernation_ops->pre_snapshot() : 0; - } -+EXPORT_SYMBOL_GPL(platform_pre_snapshot); - - /** - * platform_leave - Call platform to prepare a transition to the working state. -@@ -154,11 +158,12 @@ static int platform_pre_snapshot(int platform_mode) - * - * This routine is called on one CPU with interrupts disabled. - */ --static void platform_leave(int platform_mode) -+void platform_leave(int platform_mode) - { - if (platform_mode && hibernation_ops) - hibernation_ops->leave(); - } -+EXPORT_SYMBOL_GPL(platform_leave); - - /** - * platform_finish - Call platform to switch the system to the working state. -@@ -169,11 +174,12 @@ static void platform_leave(int platform_mode) - * - * This routine must be called after platform_prepare(). - */ --static void platform_finish(int platform_mode) -+void platform_finish(int platform_mode) - { - if (platform_mode && hibernation_ops) - hibernation_ops->finish(); - } -+EXPORT_SYMBOL_GPL(platform_finish); - - /** - * platform_pre_restore - Prepare for hibernate image restoration. -@@ -185,11 +191,12 @@ static void platform_finish(int platform_mode) - * If the restore fails after this function has been called, - * platform_restore_cleanup() must be called. - */ --static int platform_pre_restore(int platform_mode) -+int platform_pre_restore(int platform_mode) - { - return (platform_mode && hibernation_ops) ? - hibernation_ops->pre_restore() : 0; - } -+EXPORT_SYMBOL_GPL(platform_pre_restore); - - /** - * platform_restore_cleanup - Switch to the working state after failing restore. -@@ -202,21 +209,23 @@ static int platform_pre_restore(int platform_mode) - * function must be called too, regardless of the result of - * platform_pre_restore(). - */ --static void platform_restore_cleanup(int platform_mode) -+void platform_restore_cleanup(int platform_mode) - { - if (platform_mode && hibernation_ops) - hibernation_ops->restore_cleanup(); - } -+EXPORT_SYMBOL_GPL(platform_restore_cleanup); - - /** - * platform_recover - Recover from a failure to suspend devices. - * @platform_mode: Whether or not to use the platform driver. - */ --static void platform_recover(int platform_mode) -+void platform_recover(int platform_mode) - { - if (platform_mode && hibernation_ops && hibernation_ops->recover) - hibernation_ops->recover(); - } -+EXPORT_SYMBOL_GPL(platform_recover); - - /** - * swsusp_show_speed - Print time elapsed between two events during hibernation. -@@ -574,6 +583,7 @@ int hibernation_platform_enter(void) - - return error; - } -+EXPORT_SYMBOL_GPL(hibernation_platform_enter); - - /** - * power_down - Shut the machine down for hibernation. -@@ -633,6 +643,9 @@ int hibernate(void) - { - int error; - -+ if (test_action_state(TOI_REPLACE_SWSUSP)) -+ return try_tuxonice_hibernate(); -+ - lock_system_sleep(); - /* The snapshot device should not be opened while we're running */ - if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { -@@ -717,11 +730,19 @@ int hibernate(void) - * attempts to recover gracefully and make the kernel return to the normal mode - * of operation. - */ --static int software_resume(void) -+int software_resume(void) - { - int error; - unsigned int flags; - -+ resume_attempted = 1; -+ -+ /* -+ * We can't know (until an image header - if any - is loaded), whether -+ * we did override swsusp. We therefore ensure that both are tried. -+ */ -+ try_tuxonice_resume(); -+ - /* - * If the user said "noresume".. bail out early. - */ -@@ -1098,6 +1119,7 @@ static int __init hibernate_setup(char *str) - static int __init noresume_setup(char *str) - { - noresume = 1; -+ set_toi_state(TOI_NORESUME_SPECIFIED); - return 1; - } - -diff --git a/kernel/power/main.c b/kernel/power/main.c -index 6271bc4..bcf87ed 100644 ---- a/kernel/power/main.c -+++ b/kernel/power/main.c -@@ -19,12 +19,14 @@ - #include "power.h" - - DEFINE_MUTEX(pm_mutex); -+EXPORT_SYMBOL_GPL(pm_mutex); - - #ifdef CONFIG_PM_SLEEP - - /* Routines for PM-transition notifications */ - --static BLOCKING_NOTIFIER_HEAD(pm_chain_head); -+BLOCKING_NOTIFIER_HEAD(pm_chain_head); -+EXPORT_SYMBOL_GPL(pm_chain_head); - - int register_pm_notifier(struct notifier_block *nb) - { -@@ -44,6 +46,7 @@ int pm_notifier_call_chain(unsigned long val) - - return notifier_to_errno(ret); - } -+EXPORT_SYMBOL_GPL(pm_notifier_call_chain); - - /* If set, devices may be suspended and resumed asynchronously. */ - int pm_async_enabled = 1; -@@ -277,6 +280,7 @@ static inline void pm_print_times_init(void) {} - #endif /* CONFIG_PM_SLEEP_DEBUG */ - - struct kobject *power_kobj; -+EXPORT_SYMBOL_GPL(power_kobj); - - /** - * state - control system power state. -diff --git a/kernel/power/power.h b/kernel/power/power.h -index 15f37ea..906ea21 100644 ---- a/kernel/power/power.h -+++ b/kernel/power/power.h -@@ -36,8 +36,12 @@ static inline char *check_image_kernel(struct swsusp_info *info) - return arch_hibernation_header_restore(info) ? - "architecture specific data" : NULL; - } -+#else -+extern char *check_image_kernel(struct swsusp_info *info); - #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ -+extern int init_header(struct swsusp_info *info); - -+extern char resume_file[256]; - /* - * Keep some memory free so that I/O operations can succeed without paging - * [Might this be more than 4 MB?] -@@ -58,6 +62,7 @@ extern bool freezer_test_done; - extern int hibernation_snapshot(int platform_mode); - extern int hibernation_restore(int platform_mode); - extern int hibernation_platform_enter(void); -+extern void platform_recover(int platform_mode); - - #else /* !CONFIG_HIBERNATION */ - -@@ -77,6 +82,8 @@ static struct kobj_attribute _name##_attr = { \ - .store = _name##_store, \ - } - -+extern struct pbe *restore_pblist; -+ - /* Preferred image size in bytes (default 500 MB) */ - extern unsigned long image_size; - /* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */ -@@ -271,6 +278,90 @@ static inline void suspend_thaw_processes(void) - } - #endif - -+extern struct page *saveable_page(struct zone *z, unsigned long p); -+#ifdef CONFIG_HIGHMEM -+extern struct page *saveable_highmem_page(struct zone *z, unsigned long p); -+#else -+static -+inline struct page *saveable_highmem_page(struct zone *z, unsigned long p) -+{ -+ return NULL; -+} -+#endif -+ -+#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe)) -+extern struct list_head nosave_regions; -+ -+/** -+ * This structure represents a range of page frames the contents of which -+ * should not be saved during the suspend. -+ */ -+ -+struct nosave_region { -+ struct list_head list; -+ unsigned long start_pfn; -+ unsigned long end_pfn; -+}; -+ -+#define BM_END_OF_MAP (~0UL) -+ -+#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) -+ -+struct bm_block { -+ struct list_head hook; /* hook into a list of bitmap blocks */ -+ unsigned long start_pfn; /* pfn represented by the first bit */ -+ unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ -+ unsigned long *data; /* bitmap representing pages */ -+}; -+ -+/* struct bm_position is used for browsing memory bitmaps */ -+ -+struct bm_position { -+ struct bm_block *block; -+ int bit; -+}; -+ -+struct memory_bitmap { -+ struct list_head blocks; /* list of bitmap blocks */ -+ struct linked_page *p_list; /* list of pages used to store zone -+ * bitmap objects and bitmap block -+ * objects -+ */ -+ struct bm_position *states; /* most recently used bit position */ -+ int num_states; /* when iterating over a bitmap and -+ * number of states we support. -+ */ -+}; -+ -+extern int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, -+ int safe_needed); -+extern int memory_bm_create_index(struct memory_bitmap *bm, gfp_t gfp_mask, -+ int safe_needed, int index); -+extern void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); -+extern void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn); -+extern void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn); -+extern void memory_bm_clear_bit_index(struct memory_bitmap *bm, unsigned long pfn, int index); -+extern int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn); -+extern int memory_bm_test_bit_index(struct memory_bitmap *bm, unsigned long pfn, int index); -+extern unsigned long memory_bm_next_pfn(struct memory_bitmap *bm); -+extern unsigned long memory_bm_next_pfn_index(struct memory_bitmap *bm, -+ int index); -+extern void memory_bm_position_reset(struct memory_bitmap *bm); -+extern void memory_bm_clear(struct memory_bitmap *bm); -+extern void memory_bm_copy(struct memory_bitmap *source, -+ struct memory_bitmap *dest); -+extern void memory_bm_dup(struct memory_bitmap *source, -+ struct memory_bitmap *dest); -+extern int memory_bm_set_iterators(struct memory_bitmap *bm, int number); -+ -+#ifdef CONFIG_TOI -+struct toi_module_ops; -+extern int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk) -+ (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)); -+extern int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk) -+ (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)); -+#endif -+ - #ifdef CONFIG_PM_AUTOSLEEP - - /* kernel/power/autosleep.c */ -diff --git a/kernel/power/process.c b/kernel/power/process.c -index 06ec886..4004a83 100644 ---- a/kernel/power/process.c -+++ b/kernel/power/process.c -@@ -143,6 +143,7 @@ int freeze_processes(void) - thaw_processes(); - return error; - } -+EXPORT_SYMBOL_GPL(freeze_processes); - - /** - * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. -@@ -169,6 +170,7 @@ int freeze_kernel_threads(void) - thaw_kernel_threads(); - return error; - } -+EXPORT_SYMBOL_GPL(freeze_kernel_threads); - - void thaw_processes(void) - { -@@ -202,6 +204,7 @@ void thaw_processes(void) - schedule(); - printk("done.\n"); - } -+EXPORT_SYMBOL_GPL(thaw_processes); - - void thaw_kernel_threads(void) - { -@@ -222,3 +225,4 @@ void thaw_kernel_threads(void) - schedule(); - printk("done.\n"); - } -+EXPORT_SYMBOL_GPL(thaw_kernel_threads); -diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c -index 1ea328a..7314819 100644 ---- a/kernel/power/snapshot.c -+++ b/kernel/power/snapshot.c -@@ -36,6 +36,8 @@ - #include - - #include "power.h" -+#include "tuxonice_builtin.h" -+#include "tuxonice_pagedir.h" - - static int swsusp_page_is_free(struct page *); - static void swsusp_set_page_forbidden(struct page *); -@@ -72,6 +74,10 @@ void __init hibernate_image_size_init(void) - * directly to their "original" page frames. - */ - struct pbe *restore_pblist; -+EXPORT_SYMBOL_GPL(restore_pblist); -+ -+int resume_attempted; -+EXPORT_SYMBOL_GPL(resume_attempted); - - /* Pointer to an auxiliary buffer (1 page) */ - static void *buffer; -@@ -114,6 +120,9 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) - - unsigned long get_safe_page(gfp_t gfp_mask) - { -+ if (toi_running) -+ return toi_get_nonconflicting_page(); -+ - return (unsigned long)get_image_page(gfp_mask, PG_SAFE); - } - -@@ -250,47 +259,53 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) - * the represented memory area. - */ - --#define BM_END_OF_MAP (~0UL) -- --#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) -- --struct bm_block { -- struct list_head hook; /* hook into a list of bitmap blocks */ -- unsigned long start_pfn; /* pfn represented by the first bit */ -- unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ -- unsigned long *data; /* bitmap representing pages */ --}; -- - static inline unsigned long bm_block_bits(struct bm_block *bb) - { - return bb->end_pfn - bb->start_pfn; - } - --/* strcut bm_position is used for browsing memory bitmaps */ -+/* Functions that operate on memory bitmaps */ - --struct bm_position { -- struct bm_block *block; -- int bit; --}; -+void memory_bm_position_reset_index(struct memory_bitmap *bm, int index) -+{ -+ bm->states[index].block = list_entry(bm->blocks.next, -+ struct bm_block, hook); -+ bm->states[index].bit = 0; -+} -+EXPORT_SYMBOL_GPL(memory_bm_position_reset_index); - --struct memory_bitmap { -- struct list_head blocks; /* list of bitmap blocks */ -- struct linked_page *p_list; /* list of pages used to store zone -- * bitmap objects and bitmap block -- * objects -- */ -- struct bm_position cur; /* most recently used bit position */ --}; -+void memory_bm_position_reset(struct memory_bitmap *bm) -+{ -+ int i; - --/* Functions that operate on memory bitmaps */ -+ for (i = 0; i < bm->num_states; i++) { -+ bm->states[i].block = list_entry(bm->blocks.next, -+ struct bm_block, hook); -+ bm->states[i].bit = 0; -+ } -+} -+EXPORT_SYMBOL_GPL(memory_bm_position_reset); - --static void memory_bm_position_reset(struct memory_bitmap *bm) -+int memory_bm_set_iterators(struct memory_bitmap *bm, int number) - { -- bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); -- bm->cur.bit = 0; --} -+ int bytes = number * sizeof(struct bm_position); -+ struct bm_position *new_states; -+ -+ if (number < bm->num_states) -+ return 0; - --static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); -+ new_states = kmalloc(bytes, GFP_KERNEL); -+ if (!new_states) -+ return -ENOMEM; -+ -+ if (bm->states) -+ kfree(bm->states); -+ -+ bm->states = new_states; -+ bm->num_states = number; -+ return 0; -+} -+EXPORT_SYMBOL_GPL(memory_bm_set_iterators); - - /** - * create_bm_block_list - create a list of block bitmap objects -@@ -398,8 +413,8 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) - /** - * memory_bm_create - allocate memory for a memory bitmap - */ --static int --memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) -+int memory_bm_create_index(struct memory_bitmap *bm, gfp_t gfp_mask, -+ int safe_needed, int states) - { - struct chain_allocator ca; - struct list_head mem_extents; -@@ -443,6 +458,9 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) - } - } - -+ if (!error) -+ error = memory_bm_set_iterators(bm, states); -+ - bm->p_list = ca.chain; - memory_bm_position_reset(bm); - Exit: -@@ -454,11 +472,18 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) - memory_bm_free(bm, PG_UNSAFE_CLEAR); - goto Exit; - } -+EXPORT_SYMBOL_GPL(memory_bm_create_index); -+ -+int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) -+{ -+ return memory_bm_create_index(bm, gfp_mask, safe_needed, 1); -+} -+EXPORT_SYMBOL_GPL(memory_bm_create); - - /** - * memory_bm_free - free memory occupied by the memory bitmap @bm - */ --static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) -+void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) - { - struct bm_block *bb; - -@@ -469,15 +494,22 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) - free_list_of_pages(bm->p_list, clear_nosave_free); - - INIT_LIST_HEAD(&bm->blocks); -+ -+ if (bm->states) { -+ kfree(bm->states); -+ bm->states = NULL; -+ bm->num_states = 0; -+ } - } -+EXPORT_SYMBOL_GPL(memory_bm_free); - - /** - * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds - * to given pfn. The cur_zone_bm member of @bm and the cur_block member -- * of @bm->cur_zone_bm are updated. -+ * of @bm->states[i]_zone_bm are updated. - */ --static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, -- void **addr, unsigned int *bit_nr) -+static int memory_bm_find_bit_index(struct memory_bitmap *bm, unsigned long pfn, -+ void **addr, unsigned int *bit_nr, int state) - { - struct bm_block *bb; - -@@ -485,7 +517,7 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, - * Check if the pfn corresponds to the current bitmap block and find - * the block where it fits if this is not the case. - */ -- bb = bm->cur.block; -+ bb = bm->states[state].block; - if (pfn < bb->start_pfn) - list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) - if (pfn >= bb->start_pfn) -@@ -500,15 +532,21 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, - return -EFAULT; - - /* The block has been found */ -- bm->cur.block = bb; -+ bm->states[state].block = bb; - pfn -= bb->start_pfn; -- bm->cur.bit = pfn + 1; -+ bm->states[state].bit = pfn + 1; - *bit_nr = pfn; - *addr = bb->data; - return 0; - } - --static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) -+static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, -+ void **addr, unsigned int *bit_nr) -+{ -+ return memory_bm_find_bit_index(bm, pfn, addr, bit_nr, 0); -+} -+ -+void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) - { - void *addr; - unsigned int bit; -@@ -518,6 +556,7 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) - BUG_ON(error); - set_bit(bit, addr); - } -+EXPORT_SYMBOL_GPL(memory_bm_set_bit); - - static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) - { -@@ -531,27 +570,43 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) - return error; - } - --static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) -+void memory_bm_clear_bit_index(struct memory_bitmap *bm, unsigned long pfn, -+ int index) - { - void *addr; - unsigned int bit; - int error; - -- error = memory_bm_find_bit(bm, pfn, &addr, &bit); -+ error = memory_bm_find_bit_index(bm, pfn, &addr, &bit, index); - BUG_ON(error); - clear_bit(bit, addr); - } -+EXPORT_SYMBOL_GPL(memory_bm_clear_bit_index); -+ -+void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) -+{ -+ memory_bm_clear_bit_index(bm, pfn, 0); -+} -+EXPORT_SYMBOL_GPL(memory_bm_clear_bit); - --static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) -+int memory_bm_test_bit_index(struct memory_bitmap *bm, unsigned long pfn, -+ int index) - { - void *addr; - unsigned int bit; - int error; - -- error = memory_bm_find_bit(bm, pfn, &addr, &bit); -+ error = memory_bm_find_bit_index(bm, pfn, &addr, &bit, index); - BUG_ON(error); - return test_bit(bit, addr); - } -+EXPORT_SYMBOL_GPL(memory_bm_test_bit_index); -+ -+int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) -+{ -+ return memory_bm_test_bit_index(bm, pfn, 0); -+} -+EXPORT_SYMBOL_GPL(memory_bm_test_bit); - - static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) - { -@@ -570,43 +625,185 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) - * this function. - */ - --static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) -+unsigned long memory_bm_next_pfn_index(struct memory_bitmap *bm, int index) - { - struct bm_block *bb; - int bit; - -- bb = bm->cur.block; -+ bb = bm->states[index].block; - do { -- bit = bm->cur.bit; -+ bit = bm->states[index].bit; - bit = find_next_bit(bb->data, bm_block_bits(bb), bit); - if (bit < bm_block_bits(bb)) - goto Return_pfn; - - bb = list_entry(bb->hook.next, struct bm_block, hook); -- bm->cur.block = bb; -- bm->cur.bit = 0; -+ bm->states[index].block = bb; -+ bm->states[index].bit = 0; - } while (&bb->hook != &bm->blocks); - -- memory_bm_position_reset(bm); -+ memory_bm_position_reset_index(bm, index); - return BM_END_OF_MAP; - - Return_pfn: -- bm->cur.bit = bit + 1; -+ bm->states[index].bit = bit + 1; - return bb->start_pfn + bit; - } -+EXPORT_SYMBOL_GPL(memory_bm_next_pfn_index); - --/** -- * This structure represents a range of page frames the contents of which -- * should not be saved during the suspend. -- */ -+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) -+{ -+ return memory_bm_next_pfn_index(bm, 0); -+} -+EXPORT_SYMBOL_GPL(memory_bm_next_pfn); - --struct nosave_region { -- struct list_head list; -- unsigned long start_pfn; -- unsigned long end_pfn; --}; -+void memory_bm_clear(struct memory_bitmap *bm) -+{ -+ unsigned long pfn; - --static LIST_HEAD(nosave_regions); -+ memory_bm_position_reset(bm); -+ pfn = memory_bm_next_pfn(bm); -+ while (pfn != BM_END_OF_MAP) { -+ memory_bm_clear_bit(bm, pfn); -+ pfn = memory_bm_next_pfn(bm); -+ } -+} -+EXPORT_SYMBOL_GPL(memory_bm_clear); -+ -+void memory_bm_copy(struct memory_bitmap *source, struct memory_bitmap *dest) -+{ -+ unsigned long pfn; -+ -+ memory_bm_position_reset(source); -+ pfn = memory_bm_next_pfn(source); -+ while (pfn != BM_END_OF_MAP) { -+ memory_bm_set_bit(dest, pfn); -+ pfn = memory_bm_next_pfn(source); -+ } -+} -+EXPORT_SYMBOL_GPL(memory_bm_copy); -+ -+void memory_bm_dup(struct memory_bitmap *source, struct memory_bitmap *dest) -+{ -+ memory_bm_clear(dest); -+ memory_bm_copy(source, dest); -+} -+EXPORT_SYMBOL_GPL(memory_bm_dup); -+ -+#ifdef CONFIG_TOI -+#define DEFINE_MEMORY_BITMAP(name) \ -+struct memory_bitmap *name; \ -+EXPORT_SYMBOL_GPL(name) -+ -+DEFINE_MEMORY_BITMAP(pageset1_map); -+DEFINE_MEMORY_BITMAP(pageset1_copy_map); -+DEFINE_MEMORY_BITMAP(pageset2_map); -+DEFINE_MEMORY_BITMAP(page_resave_map); -+DEFINE_MEMORY_BITMAP(io_map); -+DEFINE_MEMORY_BITMAP(nosave_map); -+DEFINE_MEMORY_BITMAP(free_map); -+DEFINE_MEMORY_BITMAP(compare_map); -+ -+int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk) -+ (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)) -+{ -+ int result = 0; -+ unsigned int nr = 0; -+ struct bm_block *bb; -+ -+ if (!bm) -+ return result; -+ -+ list_for_each_entry(bb, &bm->blocks, hook) -+ nr++; -+ -+ result = (*rw_chunk)(WRITE, NULL, (char *) &nr, sizeof(unsigned int)); -+ if (result) -+ return result; -+ -+ list_for_each_entry(bb, &bm->blocks, hook) { -+ result = (*rw_chunk)(WRITE, NULL, (char *) &bb->start_pfn, -+ 2 * sizeof(unsigned long)); -+ if (result) -+ return result; -+ -+ result = (*rw_chunk)(WRITE, NULL, (char *) bb->data, PAGE_SIZE); -+ if (result) -+ return result; -+ } -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(memory_bm_write); -+ -+int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk) -+ (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size)) -+{ -+ int result = 0; -+ unsigned int nr, i; -+ struct bm_block *bb; -+ -+ if (!bm) -+ return result; -+ -+ result = memory_bm_create(bm, GFP_KERNEL, 0); -+ -+ if (result) -+ return result; -+ -+ result = (*rw_chunk)(READ, NULL, (char *) &nr, sizeof(unsigned int)); -+ if (result) -+ goto Free; -+ -+ for (i = 0; i < nr; i++) { -+ unsigned long pfn; -+ -+ result = (*rw_chunk)(READ, NULL, (char *) &pfn, -+ sizeof(unsigned long)); -+ if (result) -+ goto Free; -+ -+ list_for_each_entry(bb, &bm->blocks, hook) -+ if (bb->start_pfn == pfn) -+ break; -+ -+ if (&bb->hook == &bm->blocks) { -+ printk(KERN_ERR -+ "TuxOnIce: Failed to load memory bitmap.\n"); -+ result = -EINVAL; -+ goto Free; -+ } -+ -+ result = (*rw_chunk)(READ, NULL, (char *) &pfn, -+ sizeof(unsigned long)); -+ if (result) -+ goto Free; -+ -+ if (pfn != bb->end_pfn) { -+ printk(KERN_ERR -+ "TuxOnIce: Failed to load memory bitmap. " -+ "End PFN doesn't match what was saved.\n"); -+ result = -EINVAL; -+ goto Free; -+ } -+ -+ result = (*rw_chunk)(READ, NULL, (char *) bb->data, PAGE_SIZE); -+ -+ if (result) -+ goto Free; -+ } -+ -+ return 0; -+ -+Free: -+ memory_bm_free(bm, PG_ANY); -+ return result; -+} -+EXPORT_SYMBOL_GPL(memory_bm_read); -+#endif -+ -+LIST_HEAD(nosave_regions); -+EXPORT_SYMBOL_GPL(nosave_regions); - - /** - * register_nosave_region - register a range of page frames the contents -@@ -849,7 +1046,7 @@ static unsigned int count_free_highmem_pages(void) - * We should save the page if it isn't Nosave or NosaveFree, or Reserved, - * and it isn't a part of a free chunk of pages. - */ --static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) -+struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) - { - struct page *page; - -@@ -871,6 +1068,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) - - return page; - } -+EXPORT_SYMBOL_GPL(saveable_highmem_page); - - /** - * count_highmem_pages - compute the total number of saveable highmem -@@ -896,11 +1094,6 @@ static unsigned int count_highmem_pages(void) - } - return n; - } --#else --static inline void *saveable_highmem_page(struct zone *z, unsigned long p) --{ -- return NULL; --} - #endif /* CONFIG_HIGHMEM */ - - /** -@@ -911,7 +1104,7 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p) - * of pages statically defined as 'unsaveable', and it isn't a part of - * a free chunk of pages. - */ --static struct page *saveable_page(struct zone *zone, unsigned long pfn) -+struct page *saveable_page(struct zone *zone, unsigned long pfn) - { - struct page *page; - -@@ -936,6 +1129,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn) - - return page; - } -+EXPORT_SYMBOL_GPL(saveable_page); - - /** - * count_data_pages - compute the total number of saveable non-highmem -@@ -1590,6 +1784,9 @@ asmlinkage __visible int swsusp_save(void) - { - unsigned int nr_pages, nr_highmem; - -+ if (toi_running) -+ return toi_post_context_save(); -+ - printk(KERN_INFO "PM: Creating hibernation image:\n"); - - drain_local_pages(NULL); -@@ -1630,14 +1827,14 @@ asmlinkage __visible int swsusp_save(void) - } - - #ifndef CONFIG_ARCH_HIBERNATION_HEADER --static int init_header_complete(struct swsusp_info *info) -+int init_header_complete(struct swsusp_info *info) - { - memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); - info->version_code = LINUX_VERSION_CODE; - return 0; - } - --static char *check_image_kernel(struct swsusp_info *info) -+char *check_image_kernel(struct swsusp_info *info) - { - if (info->version_code != LINUX_VERSION_CODE) - return "kernel version"; -@@ -1651,6 +1848,7 @@ static char *check_image_kernel(struct swsusp_info *info) - return "machine"; - return NULL; - } -+EXPORT_SYMBOL_GPL(check_image_kernel); - #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ - - unsigned long snapshot_get_image_size(void) -@@ -1658,7 +1856,7 @@ unsigned long snapshot_get_image_size(void) - return nr_copy_pages + nr_meta_pages + 1; - } - --static int init_header(struct swsusp_info *info) -+int init_header(struct swsusp_info *info) - { - memset(info, 0, sizeof(struct swsusp_info)); - info->num_physpages = get_num_physpages(); -@@ -1668,6 +1866,7 @@ static int init_header(struct swsusp_info *info) - info->size <<= PAGE_SHIFT; - return init_header_complete(info); - } -+EXPORT_SYMBOL_GPL(init_header); - - /** - * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm -diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c -index 8233cd4..2a37f76 100644 ---- a/kernel/power/suspend.c -+++ b/kernel/power/suspend.c -@@ -302,6 +302,7 @@ int suspend_devices_and_enter(suspend_state_t state) - suspend_ops->recover(); - goto Resume_devices; - } -+EXPORT_SYMBOL_GPL(suspend_devices_and_enter); - - /** - * suspend_finish - Clean up before finishing the suspend sequence. -diff --git a/kernel/power/tuxonice.h b/kernel/power/tuxonice.h -new file mode 100644 -index 0000000..0a511cb3 ---- /dev/null -+++ b/kernel/power/tuxonice.h -@@ -0,0 +1,227 @@ -+/* -+ * kernel/power/tuxonice.h -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * It contains declarations used throughout swsusp. -+ * -+ */ -+ -+#ifndef KERNEL_POWER_TOI_H -+#define KERNEL_POWER_TOI_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "tuxonice_pageflags.h" -+#include "power.h" -+ -+#define TOI_CORE_VERSION "3.3" -+#define TOI_HEADER_VERSION 3 -+#define MY_BOOT_KERNEL_DATA_VERSION 4 -+ -+struct toi_boot_kernel_data { -+ int version; -+ int size; -+ unsigned long toi_action; -+ unsigned long toi_debug_state; -+ u32 toi_default_console_level; -+ int toi_io_time[2][2]; -+ char toi_nosave_commandline[COMMAND_LINE_SIZE]; -+ unsigned long pages_used[33]; -+ unsigned long incremental_bytes_in; -+ unsigned long incremental_bytes_out; -+ unsigned long compress_bytes_in; -+ unsigned long compress_bytes_out; -+ unsigned long pruned_pages; -+}; -+ -+extern struct toi_boot_kernel_data toi_bkd; -+ -+/* Location of book kernel data struct in kernel being resumed */ -+extern unsigned long boot_kernel_data_buffer; -+ -+/* == Action states == */ -+ -+enum { -+ TOI_REBOOT, -+ TOI_PAUSE, -+ TOI_LOGALL, -+ TOI_CAN_CANCEL, -+ TOI_KEEP_IMAGE, -+ TOI_FREEZER_TEST, -+ TOI_SINGLESTEP, -+ TOI_PAUSE_NEAR_PAGESET_END, -+ TOI_TEST_FILTER_SPEED, -+ TOI_TEST_BIO, -+ TOI_NO_PAGESET2, -+ TOI_IGNORE_ROOTFS, -+ TOI_REPLACE_SWSUSP, -+ TOI_PAGESET2_FULL, -+ TOI_ABORT_ON_RESAVE_NEEDED, -+ TOI_NO_MULTITHREADED_IO, -+ TOI_NO_DIRECT_LOAD, /* Obsolete */ -+ TOI_LATE_CPU_HOTPLUG, -+ TOI_GET_MAX_MEM_ALLOCD, -+ TOI_NO_FLUSHER_THREAD, -+ TOI_NO_PS2_IF_UNNEEDED, -+ TOI_POST_RESUME_BREAKPOINT, -+ TOI_NO_READAHEAD, -+}; -+ -+extern unsigned long toi_bootflags_mask; -+ -+#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action)) -+ -+/* == Result states == */ -+ -+enum { -+ TOI_ABORTED, -+ TOI_ABORT_REQUESTED, -+ TOI_NOSTORAGE_AVAILABLE, -+ TOI_INSUFFICIENT_STORAGE, -+ TOI_FREEZING_FAILED, -+ TOI_KEPT_IMAGE, -+ TOI_WOULD_EAT_MEMORY, -+ TOI_UNABLE_TO_FREE_ENOUGH_MEMORY, -+ TOI_PM_SEM, -+ TOI_DEVICE_REFUSED, -+ TOI_SYSDEV_REFUSED, -+ TOI_EXTRA_PAGES_ALLOW_TOO_SMALL, -+ TOI_UNABLE_TO_PREPARE_IMAGE, -+ TOI_FAILED_MODULE_INIT, -+ TOI_FAILED_MODULE_CLEANUP, -+ TOI_FAILED_IO, -+ TOI_OUT_OF_MEMORY, -+ TOI_IMAGE_ERROR, -+ TOI_PLATFORM_PREP_FAILED, -+ TOI_CPU_HOTPLUG_FAILED, -+ TOI_ARCH_PREPARE_FAILED, /* Removed Linux-3.0 */ -+ TOI_RESAVE_NEEDED, -+ TOI_CANT_SUSPEND, -+ TOI_NOTIFIERS_PREPARE_FAILED, -+ TOI_PRE_SNAPSHOT_FAILED, -+ TOI_PRE_RESTORE_FAILED, -+ TOI_USERMODE_HELPERS_ERR, -+ TOI_CANT_USE_ALT_RESUME, -+ TOI_HEADER_TOO_BIG, -+ TOI_WAKEUP_EVENT, -+ TOI_SYSCORE_REFUSED, -+ TOI_DPM_PREPARE_FAILED, -+ TOI_DPM_SUSPEND_FAILED, -+ TOI_NUM_RESULT_STATES /* Used in printing debug info only */ -+}; -+ -+extern unsigned long toi_result; -+ -+#define set_result_state(bit) (test_and_set_bit(bit, &toi_result)) -+#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \ -+ test_and_set_bit(bit, &toi_result)) -+#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result)) -+#define test_result_state(bit) (test_bit(bit, &toi_result)) -+ -+/* == Debug sections and levels == */ -+ -+/* debugging levels. */ -+enum { -+ TOI_STATUS = 0, -+ TOI_ERROR = 2, -+ TOI_LOW, -+ TOI_MEDIUM, -+ TOI_HIGH, -+ TOI_VERBOSE, -+}; -+ -+enum { -+ TOI_ANY_SECTION, -+ TOI_EAT_MEMORY, -+ TOI_IO, -+ TOI_HEADER, -+ TOI_WRITER, -+ TOI_MEMORY, -+ TOI_PAGEDIR, -+ TOI_COMPRESS, -+ TOI_BIO, -+}; -+ -+#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state)) -+#define clear_debug_state(bit) \ -+ (test_and_clear_bit(bit, &toi_bkd.toi_debug_state)) -+#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state)) -+ -+/* == Steps in hibernating == */ -+ -+enum { -+ STEP_HIBERNATE_PREPARE_IMAGE, -+ STEP_HIBERNATE_SAVE_IMAGE, -+ STEP_HIBERNATE_POWERDOWN, -+ STEP_RESUME_CAN_RESUME, -+ STEP_RESUME_LOAD_PS1, -+ STEP_RESUME_DO_RESTORE, -+ STEP_RESUME_READ_PS2, -+ STEP_RESUME_GO, -+ STEP_RESUME_ALT_IMAGE, -+ STEP_CLEANUP, -+ STEP_QUIET_CLEANUP -+}; -+ -+/* == TuxOnIce states == -+ (see also include/linux/suspend.h) */ -+ -+#define get_toi_state() (toi_state) -+#define restore_toi_state(saved_state) \ -+ do { toi_state = saved_state; } while (0) -+ -+/* == Module support == */ -+ -+struct toi_core_fns { -+ int (*post_context_save)(void); -+ unsigned long (*get_nonconflicting_page)(void); -+ int (*try_hibernate)(void); -+ void (*try_resume)(void); -+}; -+ -+extern struct toi_core_fns *toi_core_fns; -+ -+/* == All else == */ -+#define KB(x) ((x) << (PAGE_SHIFT - 10)) -+#define MB(x) ((x) >> (20 - PAGE_SHIFT)) -+ -+extern int toi_start_anything(int toi_or_resume); -+extern void toi_finish_anything(int toi_or_resume); -+ -+extern int save_image_part1(void); -+extern int toi_atomic_restore(void); -+ -+extern int toi_try_hibernate(void); -+extern void toi_try_resume(void); -+ -+extern int __toi_post_context_save(void); -+ -+extern unsigned int nr_hibernates; -+extern char alt_resume_param[256]; -+ -+extern void copyback_post(void); -+extern int toi_hibernate(void); -+extern unsigned long extra_pd1_pages_used; -+ -+#define SECTOR_SIZE 512 -+ -+extern void toi_early_boot_message(int can_erase_image, int default_answer, -+ char *warning_reason, ...); -+ -+extern int do_check_can_resume(void); -+extern int do_toi_step(int step); -+extern int toi_launch_userspace_program(char *command, int channel_no, -+ int wait, int debug); -+ -+extern char tuxonice_signature[9]; -+ -+extern int toi_start_other_threads(void); -+extern void toi_stop_other_threads(void); -+#endif -diff --git a/kernel/power/tuxonice_alloc.c b/kernel/power/tuxonice_alloc.c -new file mode 100644 -index 0000000..fa44532 ---- /dev/null -+++ b/kernel/power/tuxonice_alloc.c -@@ -0,0 +1,314 @@ -+/* -+ * kernel/power/tuxonice_alloc.c -+ * -+ * Copyright (C) 2008-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ */ -+ -+#ifdef CONFIG_PM_DEBUG -+#include -+#include -+#include "tuxonice_modules.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice.h" -+ -+#define TOI_ALLOC_PATHS 40 -+ -+static DEFINE_MUTEX(toi_alloc_mutex); -+ -+static struct toi_module_ops toi_alloc_ops; -+ -+static int toi_fail_num; -+ -+static atomic_t toi_alloc_count[TOI_ALLOC_PATHS], -+ toi_free_count[TOI_ALLOC_PATHS], -+ toi_test_count[TOI_ALLOC_PATHS], -+ toi_fail_count[TOI_ALLOC_PATHS]; -+static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS]; -+static int cur_allocd, max_allocd; -+ -+static char *toi_alloc_desc[TOI_ALLOC_PATHS] = { -+ "", /* 0 */ -+ "get_io_info_struct", -+ "extent", -+ "extent (loading chain)", -+ "userui channel", -+ "userui arg", /* 5 */ -+ "attention list metadata", -+ "extra pagedir memory metadata", -+ "bdev metadata", -+ "extra pagedir memory", -+ "header_locations_read", /* 10 */ -+ "bio queue", -+ "prepare_readahead", -+ "i/o buffer", -+ "writer buffer in bio_init", -+ "checksum buffer", /* 15 */ -+ "compression buffer", -+ "filewriter signature op", -+ "set resume param alloc1", -+ "set resume param alloc2", -+ "debugging info buffer", /* 20 */ -+ "check can resume buffer", -+ "write module config buffer", -+ "read module config buffer", -+ "write image header buffer", -+ "read pageset1 buffer", /* 25 */ -+ "get_have_image_data buffer", -+ "checksum page", -+ "worker rw loop", -+ "get nonconflicting page", -+ "ps1 load addresses", /* 30 */ -+ "remove swap image", -+ "swap image exists", -+ "swap parse sig location", -+ "sysfs kobj", -+ "swap mark resume attempted buffer", /* 35 */ -+ "cluster member", -+ "boot kernel data buffer", -+ "setting swap signature", -+ "block i/o bdev struct" -+}; -+ -+#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \ -+ do { \ -+ BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \ -+ \ -+ if (FAIL_NUM == toi_fail_num) { \ -+ atomic_inc(&toi_test_count[FAIL_NUM]); \ -+ toi_fail_num = 0; \ -+ return FAIL_VAL; \ -+ } \ -+ } while (0) -+ -+static void alloc_update_stats(int fail_num, void *result, int size) -+{ -+ if (!result) { -+ atomic_inc(&toi_fail_count[fail_num]); -+ return; -+ } -+ -+ atomic_inc(&toi_alloc_count[fail_num]); -+ if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) { -+ mutex_lock(&toi_alloc_mutex); -+ toi_cur_allocd[fail_num]++; -+ cur_allocd += size; -+ if (unlikely(cur_allocd > max_allocd)) { -+ int i; -+ -+ for (i = 0; i < TOI_ALLOC_PATHS; i++) -+ toi_max_allocd[i] = toi_cur_allocd[i]; -+ max_allocd = cur_allocd; -+ } -+ mutex_unlock(&toi_alloc_mutex); -+ } -+} -+ -+static void free_update_stats(int fail_num, int size) -+{ -+ BUG_ON(fail_num >= TOI_ALLOC_PATHS); -+ atomic_inc(&toi_free_count[fail_num]); -+ if (unlikely(atomic_read(&toi_free_count[fail_num]) > -+ atomic_read(&toi_alloc_count[fail_num]))) -+ dump_stack(); -+ if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) { -+ mutex_lock(&toi_alloc_mutex); -+ cur_allocd -= size; -+ toi_cur_allocd[fail_num]--; -+ mutex_unlock(&toi_alloc_mutex); -+ } -+} -+ -+void *toi_kzalloc(int fail_num, size_t size, gfp_t flags) -+{ -+ void *result; -+ -+ if (toi_alloc_ops.enabled) -+ MIGHT_FAIL(fail_num, NULL); -+ result = kzalloc(size, flags); -+ if (toi_alloc_ops.enabled) -+ alloc_update_stats(fail_num, result, size); -+ if (fail_num == toi_trace_allocs) -+ dump_stack(); -+ return result; -+} -+EXPORT_SYMBOL_GPL(toi_kzalloc); -+ -+unsigned long toi_get_free_pages(int fail_num, gfp_t mask, -+ unsigned int order) -+{ -+ unsigned long result; -+ -+ if (toi_alloc_ops.enabled) -+ MIGHT_FAIL(fail_num, 0); -+ result = __get_free_pages(mask, order); -+ if (toi_alloc_ops.enabled) -+ alloc_update_stats(fail_num, (void *) result, -+ PAGE_SIZE << order); -+ if (fail_num == toi_trace_allocs) -+ dump_stack(); -+ return result; -+} -+EXPORT_SYMBOL_GPL(toi_get_free_pages); -+ -+struct page *toi_alloc_page(int fail_num, gfp_t mask) -+{ -+ struct page *result; -+ -+ if (toi_alloc_ops.enabled) -+ MIGHT_FAIL(fail_num, NULL); -+ result = alloc_page(mask); -+ if (toi_alloc_ops.enabled) -+ alloc_update_stats(fail_num, (void *) result, PAGE_SIZE); -+ if (fail_num == toi_trace_allocs) -+ dump_stack(); -+ return result; -+} -+EXPORT_SYMBOL_GPL(toi_alloc_page); -+ -+unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask) -+{ -+ unsigned long result; -+ -+ if (toi_alloc_ops.enabled) -+ MIGHT_FAIL(fail_num, 0); -+ result = get_zeroed_page(mask); -+ if (toi_alloc_ops.enabled) -+ alloc_update_stats(fail_num, (void *) result, PAGE_SIZE); -+ if (fail_num == toi_trace_allocs) -+ dump_stack(); -+ return result; -+} -+EXPORT_SYMBOL_GPL(toi_get_zeroed_page); -+ -+void toi_kfree(int fail_num, const void *arg, int size) -+{ -+ if (arg && toi_alloc_ops.enabled) -+ free_update_stats(fail_num, size); -+ -+ if (fail_num == toi_trace_allocs) -+ dump_stack(); -+ kfree(arg); -+} -+EXPORT_SYMBOL_GPL(toi_kfree); -+ -+void toi_free_page(int fail_num, unsigned long virt) -+{ -+ if (virt && toi_alloc_ops.enabled) -+ free_update_stats(fail_num, PAGE_SIZE); -+ -+ if (fail_num == toi_trace_allocs) -+ dump_stack(); -+ free_page(virt); -+} -+EXPORT_SYMBOL_GPL(toi_free_page); -+ -+void toi__free_page(int fail_num, struct page *page) -+{ -+ if (page && toi_alloc_ops.enabled) -+ free_update_stats(fail_num, PAGE_SIZE); -+ -+ if (fail_num == toi_trace_allocs) -+ dump_stack(); -+ __free_page(page); -+} -+EXPORT_SYMBOL_GPL(toi__free_page); -+ -+void toi_free_pages(int fail_num, struct page *page, int order) -+{ -+ if (page && toi_alloc_ops.enabled) -+ free_update_stats(fail_num, PAGE_SIZE << order); -+ -+ if (fail_num == toi_trace_allocs) -+ dump_stack(); -+ __free_pages(page, order); -+} -+ -+void toi_alloc_print_debug_stats(void) -+{ -+ int i, header_done = 0; -+ -+ if (!toi_alloc_ops.enabled) -+ return; -+ -+ for (i = 0; i < TOI_ALLOC_PATHS; i++) -+ if (atomic_read(&toi_alloc_count[i]) != -+ atomic_read(&toi_free_count[i])) { -+ if (!header_done) { -+ printk(KERN_INFO "Idx Allocs Frees Tests " -+ " Fails Max Description\n"); -+ header_done = 1; -+ } -+ -+ printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i, -+ atomic_read(&toi_alloc_count[i]), -+ atomic_read(&toi_free_count[i]), -+ atomic_read(&toi_test_count[i]), -+ atomic_read(&toi_fail_count[i]), -+ toi_max_allocd[i], -+ toi_alloc_desc[i]); -+ } -+} -+EXPORT_SYMBOL_GPL(toi_alloc_print_debug_stats); -+ -+static int toi_alloc_initialise(int starting_cycle) -+{ -+ int i; -+ -+ if (!starting_cycle) -+ return 0; -+ -+ if (toi_trace_allocs) -+ dump_stack(); -+ -+ for (i = 0; i < TOI_ALLOC_PATHS; i++) { -+ atomic_set(&toi_alloc_count[i], 0); -+ atomic_set(&toi_free_count[i], 0); -+ atomic_set(&toi_test_count[i], 0); -+ atomic_set(&toi_fail_count[i], 0); -+ toi_cur_allocd[i] = 0; -+ toi_max_allocd[i] = 0; -+ }; -+ -+ max_allocd = 0; -+ cur_allocd = 0; -+ return 0; -+} -+ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL), -+ SYSFS_INT("trace", SYSFS_RW, &toi_trace_allocs, 0, TOI_ALLOC_PATHS, 0, -+ NULL), -+ SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_GET_MAX_MEM_ALLOCD, 0), -+ SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0, -+ NULL) -+}; -+ -+static struct toi_module_ops toi_alloc_ops = { -+ .type = MISC_HIDDEN_MODULE, -+ .name = "allocation debugging", -+ .directory = "alloc", -+ .module = THIS_MODULE, -+ .early = 1, -+ .initialise = toi_alloc_initialise, -+ -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+int toi_alloc_init(void) -+{ -+ int result = toi_register_module(&toi_alloc_ops); -+ return result; -+} -+ -+void toi_alloc_exit(void) -+{ -+ toi_unregister_module(&toi_alloc_ops); -+} -+#endif -diff --git a/kernel/power/tuxonice_alloc.h b/kernel/power/tuxonice_alloc.h -new file mode 100644 -index 0000000..6e8167e ---- /dev/null -+++ b/kernel/power/tuxonice_alloc.h -@@ -0,0 +1,54 @@ -+/* -+ * kernel/power/tuxonice_alloc.h -+ * -+ * Copyright (C) 2008-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ */ -+ -+#include -+#define TOI_WAIT_GFP (GFP_NOFS | __GFP_NOWARN) -+#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN) -+ -+#ifdef CONFIG_PM_DEBUG -+extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags); -+extern void toi_kfree(int fail_num, const void *arg, int size); -+ -+extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask, -+ unsigned int order); -+#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0) -+extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask); -+extern void toi_free_page(int fail_num, unsigned long buf); -+extern void toi__free_page(int fail_num, struct page *page); -+extern void toi_free_pages(int fail_num, struct page *page, int order); -+extern struct page *toi_alloc_page(int fail_num, gfp_t mask); -+extern int toi_alloc_init(void); -+extern void toi_alloc_exit(void); -+ -+extern void toi_alloc_print_debug_stats(void); -+ -+#else /* CONFIG_PM_DEBUG */ -+ -+#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS)) -+#define toi_kfree(FAIL, ALLOCN, SIZE) (kfree(ALLOCN)) -+ -+#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER) -+#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS) -+#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS) -+#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0) -+#define toi__free_page(FAIL, PAGE) __free_page(PAGE) -+#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER) -+#define toi_alloc_page(FAIL, MASK) alloc_page(MASK) -+static inline int toi_alloc_init(void) -+{ -+ return 0; -+} -+ -+static inline void toi_alloc_exit(void) { } -+ -+static inline void toi_alloc_print_debug_stats(void) { } -+ -+#endif -+ -+extern int toi_trace_allocs; -diff --git a/kernel/power/tuxonice_atomic_copy.c b/kernel/power/tuxonice_atomic_copy.c -new file mode 100644 -index 0000000..7b7b1cd ---- /dev/null -+++ b/kernel/power/tuxonice_atomic_copy.c -@@ -0,0 +1,473 @@ -+/* -+ * kernel/power/tuxonice_atomic_copy.c -+ * -+ * Copyright 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ * Routines for doing the atomic save/restore. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "tuxonice.h" -+#include "tuxonice_storage.h" -+#include "tuxonice_power_off.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_io.h" -+#include "tuxonice_prepare_image.h" -+#include "tuxonice_pageflags.h" -+#include "tuxonice_checksum.h" -+#include "tuxonice_builtin.h" -+#include "tuxonice_atomic_copy.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_modules.h" -+ -+unsigned long extra_pd1_pages_used; -+ -+/** -+ * free_pbe_list - free page backup entries used by the atomic copy code. -+ * @list: List to free. -+ * @highmem: Whether the list is in highmem. -+ * -+ * Normally, this function isn't used. If, however, we need to abort before -+ * doing the atomic copy, we use this to free the pbes previously allocated. -+ **/ -+static void free_pbe_list(struct pbe **list, int highmem) -+{ -+ while (*list) { -+ int i; -+ struct pbe *free_pbe, *next_page = NULL; -+ struct page *page; -+ -+ if (highmem) { -+ page = (struct page *) *list; -+ free_pbe = (struct pbe *) kmap(page); -+ } else { -+ page = virt_to_page(*list); -+ free_pbe = *list; -+ } -+ -+ for (i = 0; i < PBES_PER_PAGE; i++) { -+ if (!free_pbe) -+ break; -+ if (highmem) -+ toi__free_page(29, free_pbe->address); -+ else -+ toi_free_page(29, -+ (unsigned long) free_pbe->address); -+ free_pbe = free_pbe->next; -+ } -+ -+ if (highmem) { -+ if (free_pbe) -+ next_page = free_pbe; -+ kunmap(page); -+ } else { -+ if (free_pbe) -+ next_page = free_pbe; -+ } -+ -+ toi__free_page(29, page); -+ *list = (struct pbe *) next_page; -+ }; -+} -+ -+/** -+ * copyback_post - post atomic-restore actions -+ * -+ * After doing the atomic restore, we have a few more things to do: -+ * 1) We want to retain some values across the restore, so we now copy -+ * these from the nosave variables to the normal ones. -+ * 2) Set the status flags. -+ * 3) Resume devices. -+ * 4) Tell userui so it can redraw & restore settings. -+ * 5) Reread the page cache. -+ **/ -+void copyback_post(void) -+{ -+ struct toi_boot_kernel_data *bkd = -+ (struct toi_boot_kernel_data *) boot_kernel_data_buffer; -+ -+ if (toi_activate_storage(1)) -+ panic("Failed to reactivate our storage."); -+ -+ toi_post_atomic_restore_modules(bkd); -+ -+ toi_cond_pause(1, "About to reload secondary pagedir."); -+ -+ if (read_pageset2(0)) -+ panic("Unable to successfully reread the page cache."); -+ -+ /* -+ * If the user wants to sleep again after resuming from full-off, -+ * it's most likely to be in order to suspend to ram, so we'll -+ * do this check after loading pageset2, to give them the fastest -+ * wakeup when they are ready to use the computer again. -+ */ -+ toi_check_resleep(); -+} -+ -+/** -+ * toi_copy_pageset1 - do the atomic copy of pageset1 -+ * -+ * Make the atomic copy of pageset1. We can't use copy_page (as we once did) -+ * because we can't be sure what side effects it has. On my old Duron, with -+ * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt -+ * count at resume time 4 instead of 3. -+ * -+ * We don't want to call kmap_atomic unconditionally because it has the side -+ * effect of incrementing the preempt count, which will leave it one too high -+ * post resume (the page containing the preempt count will be copied after -+ * its incremented. This is essentially the same problem. -+ **/ -+void toi_copy_pageset1(void) -+{ -+ int i; -+ unsigned long source_index, dest_index; -+ -+ memory_bm_position_reset(pageset1_map); -+ memory_bm_position_reset(pageset1_copy_map); -+ -+ source_index = memory_bm_next_pfn(pageset1_map); -+ dest_index = memory_bm_next_pfn(pageset1_copy_map); -+ -+ for (i = 0; i < pagedir1.size; i++) { -+ unsigned long *origvirt, *copyvirt; -+ struct page *origpage, *copypage; -+ int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1, -+ was_present1, was_present2; -+ -+ origpage = pfn_to_page(source_index); -+ copypage = pfn_to_page(dest_index); -+ -+ origvirt = PageHighMem(origpage) ? -+ kmap_atomic(origpage) : -+ page_address(origpage); -+ -+ copyvirt = PageHighMem(copypage) ? -+ kmap_atomic(copypage) : -+ page_address(copypage); -+ -+ was_present1 = kernel_page_present(origpage); -+ if (!was_present1) -+ kernel_map_pages(origpage, 1, 1); -+ -+ was_present2 = kernel_page_present(copypage); -+ if (!was_present2) -+ kernel_map_pages(copypage, 1, 1); -+ -+ while (loop >= 0) { -+ *(copyvirt + loop) = *(origvirt + loop); -+ loop--; -+ } -+ -+ if (!was_present1) -+ kernel_map_pages(origpage, 1, 0); -+ -+ if (!was_present2) -+ kernel_map_pages(copypage, 1, 0); -+ -+ if (PageHighMem(origpage)) -+ kunmap_atomic(origvirt); -+ -+ if (PageHighMem(copypage)) -+ kunmap_atomic(copyvirt); -+ -+ source_index = memory_bm_next_pfn(pageset1_map); -+ dest_index = memory_bm_next_pfn(pageset1_copy_map); -+ } -+} -+ -+/** -+ * __toi_post_context_save - steps after saving the cpu context -+ * -+ * Steps taken after saving the CPU state to make the actual -+ * atomic copy. -+ * -+ * Called from swsusp_save in snapshot.c via toi_post_context_save. -+ **/ -+int __toi_post_context_save(void) -+{ -+ unsigned long old_ps1_size = pagedir1.size; -+ -+ check_checksums(); -+ -+ free_checksum_pages(); -+ -+ toi_recalculate_image_contents(1); -+ -+ extra_pd1_pages_used = pagedir1.size > old_ps1_size ? -+ pagedir1.size - old_ps1_size : 0; -+ -+ if (extra_pd1_pages_used > extra_pd1_pages_allowance) { -+ printk(KERN_INFO "Pageset1 has grown by %lu pages. " -+ "extra_pages_allowance is currently only %lu.\n", -+ pagedir1.size - old_ps1_size, -+ extra_pd1_pages_allowance); -+ -+ /* -+ * Highlevel code will see this, clear the state and -+ * retry if we haven't already done so twice. -+ */ -+ if (any_to_free(1)) { -+ set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); -+ return 1; -+ } -+ if (try_allocate_extra_memory()) { -+ printk(KERN_INFO "Failed to allocate the extra memory" -+ " needed. Restarting the process."); -+ set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); -+ return 1; -+ } -+ printk(KERN_INFO "However it looks like there's enough" -+ " free ram and storage to handle this, so " -+ " continuing anyway."); -+ /* -+ * What if try_allocate_extra_memory above calls -+ * toi_allocate_extra_pagedir_memory and it allocs a new -+ * slab page via toi_kzalloc which should be in ps1? So... -+ */ -+ toi_recalculate_image_contents(1); -+ } -+ -+ if (!test_action_state(TOI_TEST_FILTER_SPEED) && -+ !test_action_state(TOI_TEST_BIO)) -+ toi_copy_pageset1(); -+ -+ return 0; -+} -+ -+/** -+ * toi_hibernate - high level code for doing the atomic copy -+ * -+ * High-level code which prepares to do the atomic copy. Loosely based -+ * on the swsusp version, but with the following twists: -+ * - We set toi_running so the swsusp code uses our code paths. -+ * - We give better feedback regarding what goes wrong if there is a -+ * problem. -+ * - We use an extra function to call the assembly, just in case this code -+ * is in a module (return address). -+ **/ -+int toi_hibernate(void) -+{ -+ int error; -+ -+ toi_running = 1; /* For the swsusp code we use :< */ -+ -+ error = toi_lowlevel_builtin(); -+ -+ if (!error) { -+ struct toi_boot_kernel_data *bkd = -+ (struct toi_boot_kernel_data *) boot_kernel_data_buffer; -+ -+ /* -+ * The boot kernel's data may be larger (newer version) or -+ * smaller (older version) than ours. Copy the minimum -+ * of the two sizes, so that we don't overwrite valid values -+ * from pre-atomic copy. -+ */ -+ -+ memcpy(&toi_bkd, (char *) boot_kernel_data_buffer, -+ min_t(int, sizeof(struct toi_boot_kernel_data), -+ bkd->size)); -+ } -+ -+ toi_running = 0; -+ return error; -+} -+ -+/** -+ * toi_atomic_restore - prepare to do the atomic restore -+ * -+ * Get ready to do the atomic restore. This part gets us into the same -+ * state we are in prior to do calling do_toi_lowlevel while -+ * hibernating: hot-unplugging secondary cpus and freeze processes, -+ * before starting the thread that will do the restore. -+ **/ -+int toi_atomic_restore(void) -+{ -+ int error; -+ -+ toi_running = 1; -+ -+ toi_prepare_status(DONT_CLEAR_BAR, "Atomic restore."); -+ -+ memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line, -+ strlen(saved_command_line)); -+ -+ toi_pre_atomic_restore_modules(&toi_bkd); -+ -+ if (add_boot_kernel_data_pbe()) -+ goto Failed; -+ -+ toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore."); -+ -+ if (toi_go_atomic(PMSG_QUIESCE, 0)) -+ goto Failed; -+ -+ /* We'll ignore saved state, but this gets preempt count (etc) right */ -+ save_processor_state(); -+ -+ error = swsusp_arch_resume(); -+ /* -+ * Code below is only ever reached in case of failure. Otherwise -+ * execution continues at place where swsusp_arch_suspend was called. -+ * -+ * We don't know whether it's safe to continue (this shouldn't happen), -+ * so lets err on the side of caution. -+ */ -+ BUG(); -+ -+Failed: -+ free_pbe_list(&restore_pblist, 0); -+#ifdef CONFIG_HIGHMEM -+ free_pbe_list(&restore_highmem_pblist, 1); -+#endif -+ toi_running = 0; -+ return 1; -+} -+ -+/** -+ * toi_go_atomic - do the actual atomic copy/restore -+ * @state: The state to use for dpm_suspend_start & power_down calls. -+ * @suspend_time: Whether we're suspending or resuming. -+ **/ -+int toi_go_atomic(pm_message_t state, int suspend_time) -+{ -+ if (suspend_time) { -+ if (platform_begin(1)) { -+ set_abort_result(TOI_PLATFORM_PREP_FAILED); -+ toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 3); -+ return 1; -+ } -+ -+ if (dpm_prepare(PMSG_FREEZE)) { -+ set_abort_result(TOI_DPM_PREPARE_FAILED); -+ dpm_complete(PMSG_RECOVER); -+ toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 3); -+ return 1; -+ } -+ } -+ -+ suspend_console(); -+ ftrace_stop(); -+ pm_restrict_gfp_mask(); -+ -+ if (suspend_time) { -+ if (dpm_suspend(state)) { -+ set_abort_result(TOI_DPM_SUSPEND_FAILED); -+ toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3); -+ return 1; -+ } -+ } else { -+ if (dpm_suspend_start(state)) { -+ set_abort_result(TOI_DPM_SUSPEND_FAILED); -+ toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3); -+ return 1; -+ } -+ } -+ -+ /* At this point, dpm_suspend_start() has been called, but *not* -+ * dpm_suspend_noirq(). We *must* dpm_suspend_noirq() now. -+ * Otherwise, drivers for some devices (e.g. interrupt controllers) -+ * become desynchronized with the actual state of the hardware -+ * at resume time, and evil weirdness ensues. -+ */ -+ -+ if (dpm_suspend_end(state)) { -+ set_abort_result(TOI_DEVICE_REFUSED); -+ toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1); -+ return 1; -+ } -+ -+ if (suspend_time) { -+ if (platform_pre_snapshot(1)) -+ set_abort_result(TOI_PRE_SNAPSHOT_FAILED); -+ } else { -+ if (platform_pre_restore(1)) -+ set_abort_result(TOI_PRE_RESTORE_FAILED); -+ } -+ -+ if (test_result_state(TOI_ABORTED)) { -+ toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1); -+ return 1; -+ } -+ -+ if (test_action_state(TOI_LATE_CPU_HOTPLUG)) { -+ if (disable_nonboot_cpus()) { -+ set_abort_result(TOI_CPU_HOTPLUG_FAILED); -+ toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG, -+ suspend_time, 1); -+ return 1; -+ } -+ } -+ -+ local_irq_disable(); -+ -+ if (syscore_suspend()) { -+ set_abort_result(TOI_SYSCORE_REFUSED); -+ toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 1); -+ return 1; -+ } -+ -+ if (suspend_time && pm_wakeup_pending()) { -+ set_abort_result(TOI_WAKEUP_EVENT); -+ toi_end_atomic(ATOMIC_STEP_SYSCORE_RESUME, suspend_time, 1); -+ return 1; -+ } -+ return 0; -+} -+ -+/** -+ * toi_end_atomic - post atomic copy/restore routines -+ * @stage: What step to start at. -+ * @suspend_time: Whether we're suspending or resuming. -+ * @error: Whether we're recovering from an error. -+ **/ -+void toi_end_atomic(int stage, int suspend_time, int error) -+{ -+ pm_message_t msg = suspend_time ? (error ? PMSG_RECOVER : PMSG_THAW) : -+ PMSG_RESTORE; -+ -+ switch (stage) { -+ case ATOMIC_ALL_STEPS: -+ if (!suspend_time) { -+ events_check_enabled = false; -+ } -+ platform_leave(1); -+ case ATOMIC_STEP_SYSCORE_RESUME: -+ syscore_resume(); -+ case ATOMIC_STEP_IRQS: -+ local_irq_enable(); -+ case ATOMIC_STEP_CPU_HOTPLUG: -+ if (test_action_state(TOI_LATE_CPU_HOTPLUG)) -+ enable_nonboot_cpus(); -+ case ATOMIC_STEP_PLATFORM_FINISH: -+ if (!suspend_time && error & 2) -+ platform_restore_cleanup(1); -+ else -+ platform_finish(1); -+ dpm_resume_start(msg); -+ case ATOMIC_STEP_DEVICE_RESUME: -+ if (suspend_time && (error & 2)) -+ platform_recover(1); -+ dpm_resume(msg); -+ if (error || !toi_in_suspend()) -+ pm_restore_gfp_mask(); -+ ftrace_start(); -+ resume_console(); -+ case ATOMIC_STEP_DPM_COMPLETE: -+ dpm_complete(msg); -+ case ATOMIC_STEP_PLATFORM_END: -+ platform_end(1); -+ -+ toi_prepare_status(DONT_CLEAR_BAR, "Post atomic."); -+ } -+} -diff --git a/kernel/power/tuxonice_atomic_copy.h b/kernel/power/tuxonice_atomic_copy.h -new file mode 100644 -index 0000000..86aaae3 ---- /dev/null -+++ b/kernel/power/tuxonice_atomic_copy.h -@@ -0,0 +1,23 @@ -+/* -+ * kernel/power/tuxonice_atomic_copy.h -+ * -+ * Copyright 2008-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ * Routines for doing the atomic save/restore. -+ */ -+ -+enum { -+ ATOMIC_ALL_STEPS, -+ ATOMIC_STEP_SYSCORE_RESUME, -+ ATOMIC_STEP_IRQS, -+ ATOMIC_STEP_CPU_HOTPLUG, -+ ATOMIC_STEP_PLATFORM_FINISH, -+ ATOMIC_STEP_DEVICE_RESUME, -+ ATOMIC_STEP_DPM_COMPLETE, -+ ATOMIC_STEP_PLATFORM_END, -+}; -+ -+int toi_go_atomic(pm_message_t state, int toi_time); -+void toi_end_atomic(int stage, int toi_time, int error); -diff --git a/kernel/power/tuxonice_bio.h b/kernel/power/tuxonice_bio.h -new file mode 100644 -index 0000000..65130c8 ---- /dev/null -+++ b/kernel/power/tuxonice_bio.h -@@ -0,0 +1,77 @@ -+/* -+ * kernel/power/tuxonice_bio.h -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ * This file contains declarations for functions exported from -+ * tuxonice_bio.c, which contains low level io functions. -+ */ -+ -+#include -+#include "tuxonice_extent.h" -+ -+void toi_put_extent_chain(struct hibernate_extent_chain *chain); -+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain, -+ unsigned long start, unsigned long end); -+ -+struct hibernate_extent_saved_state { -+ int extent_num; -+ struct hibernate_extent *extent_ptr; -+ unsigned long offset; -+}; -+ -+struct toi_bdev_info { -+ struct toi_bdev_info *next; -+ struct hibernate_extent_chain blocks; -+ struct block_device *bdev; -+ struct toi_module_ops *allocator; -+ int allocator_index; -+ struct hibernate_extent_chain allocations; -+ char name[266]; /* "swap on " or "file " + up to 256 chars */ -+ -+ /* Saved in header */ -+ char uuid[17]; -+ dev_t dev_t; -+ int prio; -+ int bmap_shift; -+ int blocks_per_page; -+ unsigned long pages_used; -+ struct hibernate_extent_saved_state saved_state[4]; -+}; -+ -+struct toi_extent_iterate_state { -+ struct toi_bdev_info *current_chain; -+ int num_chains; -+ int saved_chain_number[4]; -+ struct toi_bdev_info *saved_chain_ptr[4]; -+}; -+ -+/* -+ * Our exported interface so the swapwriter and filewriter don't -+ * need these functions duplicated. -+ */ -+struct toi_bio_ops { -+ int (*bdev_page_io) (int rw, struct block_device *bdev, long pos, -+ struct page *page); -+ int (*register_storage)(struct toi_bdev_info *new); -+ void (*free_storage)(void); -+}; -+ -+struct toi_allocator_ops { -+ unsigned long (*toi_swap_storage_available) (void); -+}; -+ -+extern struct toi_bio_ops toi_bio_ops; -+ -+extern char *toi_writer_buffer; -+extern int toi_writer_buffer_posn; -+ -+struct toi_bio_allocator_ops { -+ int (*register_storage) (void); -+ unsigned long (*storage_available)(void); -+ int (*allocate_storage) (struct toi_bdev_info *, unsigned long); -+ int (*bmap) (struct toi_bdev_info *); -+ void (*free_storage) (struct toi_bdev_info *); -+}; -diff --git a/kernel/power/tuxonice_bio_chains.c b/kernel/power/tuxonice_bio_chains.c -new file mode 100644 -index 0000000..73dbcf2 ---- /dev/null -+++ b/kernel/power/tuxonice_bio_chains.c -@@ -0,0 +1,1048 @@ -+/* -+ * kernel/power/tuxonice_bio_devinfo.c -+ * -+ * Copyright (C) 2009-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ */ -+ -+#include -+#include "tuxonice_bio.h" -+#include "tuxonice_bio_internal.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_ui.h" -+#include "tuxonice.h" -+#include "tuxonice_io.h" -+ -+static struct toi_bdev_info *prio_chain_head; -+static int num_chains; -+ -+/* Pointer to current entry being loaded/saved. */ -+struct toi_extent_iterate_state toi_writer_posn; -+ -+#define metadata_size (sizeof(struct toi_bdev_info) - \ -+ offsetof(struct toi_bdev_info, uuid)) -+ -+/* -+ * After section 0 (header) comes 2 => next_section[0] = 2 -+ */ -+static int next_section[3] = { 2, 3, 1 }; -+ -+/** -+ * dump_block_chains - print the contents of the bdev info array. -+ **/ -+void dump_block_chains(void) -+{ -+ int i = 0; -+ int j; -+ struct toi_bdev_info *cur_chain = prio_chain_head; -+ -+ while (cur_chain) { -+ struct hibernate_extent *this = cur_chain->blocks.first; -+ -+ printk(KERN_DEBUG "Chain %d (prio %d):", i, cur_chain->prio); -+ -+ while (this) { -+ printk(KERN_CONT " [%lu-%lu]%s", this->start, -+ this->end, this->next ? "," : ""); -+ this = this->next; -+ } -+ -+ printk("\n"); -+ cur_chain = cur_chain->next; -+ i++; -+ } -+ -+ printk(KERN_DEBUG "Saved states:\n"); -+ for (i = 0; i < 4; i++) { -+ printk(KERN_DEBUG "Slot %d: Chain %d.\n", -+ i, toi_writer_posn.saved_chain_number[i]); -+ -+ cur_chain = prio_chain_head; -+ j = 0; -+ while (cur_chain) { -+ printk(KERN_DEBUG " Chain %d: Extent %d. Offset %lu.\n", -+ j, cur_chain->saved_state[i].extent_num, -+ cur_chain->saved_state[i].offset); -+ cur_chain = cur_chain->next; -+ j++; -+ } -+ printk(KERN_CONT "\n"); -+ } -+} -+ -+/** -+ * -+ **/ -+static void toi_extent_chain_next(void) -+{ -+ struct toi_bdev_info *this = toi_writer_posn.current_chain; -+ -+ if (!this->blocks.current_extent) -+ return; -+ -+ if (this->blocks.current_offset == this->blocks.current_extent->end) { -+ if (this->blocks.current_extent->next) { -+ this->blocks.current_extent = -+ this->blocks.current_extent->next; -+ this->blocks.current_offset = -+ this->blocks.current_extent->start; -+ } else { -+ this->blocks.current_extent = NULL; -+ this->blocks.current_offset = 0; -+ } -+ } else -+ this->blocks.current_offset++; -+} -+ -+/** -+ * -+ */ -+ -+static struct toi_bdev_info *__find_next_chain_same_prio(void) -+{ -+ struct toi_bdev_info *start_chain = toi_writer_posn.current_chain; -+ struct toi_bdev_info *this = start_chain; -+ int orig_prio = this->prio; -+ -+ do { -+ this = this->next; -+ -+ if (!this) -+ this = prio_chain_head; -+ -+ /* Back on original chain? Use it again. */ -+ if (this == start_chain) -+ return start_chain; -+ -+ } while (!this->blocks.current_extent || this->prio != orig_prio); -+ -+ return this; -+} -+ -+static void find_next_chain(void) -+{ -+ struct toi_bdev_info *this; -+ -+ this = __find_next_chain_same_prio(); -+ -+ /* -+ * If we didn't get another chain of the same priority that we -+ * can use, look for the next priority. -+ */ -+ while (this && !this->blocks.current_extent) -+ this = this->next; -+ -+ toi_writer_posn.current_chain = this; -+} -+ -+/** -+ * toi_extent_state_next - go to the next extent -+ * @blocks: The number of values to progress. -+ * @stripe_mode: Whether to spread usage across all chains. -+ * -+ * Given a state, progress to the next valid entry. We may begin in an -+ * invalid state, as we do when invoked after extent_state_goto_start below. -+ * -+ * When using compression and expected_compression > 0, we let the image size -+ * be larger than storage, so we can validly run out of data to return. -+ **/ -+static unsigned long toi_extent_state_next(int blocks, int current_stream) -+{ -+ int i; -+ -+ if (!toi_writer_posn.current_chain) -+ return -ENOSPC; -+ -+ /* Assume chains always have lengths that are multiples of @blocks */ -+ for (i = 0; i < blocks; i++) -+ toi_extent_chain_next(); -+ -+ /* The header stream is not striped */ -+ if (current_stream || -+ !toi_writer_posn.current_chain->blocks.current_extent) -+ find_next_chain(); -+ -+ return toi_writer_posn.current_chain ? 0 : -ENOSPC; -+} -+ -+static void toi_insert_chain_in_prio_list(struct toi_bdev_info *this) -+{ -+ struct toi_bdev_info **prev_ptr; -+ struct toi_bdev_info *cur; -+ -+ /* Loop through the existing chain, finding where to insert it */ -+ prev_ptr = &prio_chain_head; -+ cur = prio_chain_head; -+ -+ while (cur && cur->prio >= this->prio) { -+ prev_ptr = &cur->next; -+ cur = cur->next; -+ } -+ -+ this->next = *prev_ptr; -+ *prev_ptr = this; -+ -+ this = prio_chain_head; -+ while (this) -+ this = this->next; -+ num_chains++; -+} -+ -+/** -+ * toi_extent_state_goto_start - reinitialize an extent chain iterator -+ * @state: Iterator to reinitialize -+ **/ -+void toi_extent_state_goto_start(void) -+{ -+ struct toi_bdev_info *this = prio_chain_head; -+ -+ while (this) { -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Setting current extent to %p.", this->blocks.first); -+ this->blocks.current_extent = this->blocks.first; -+ if (this->blocks.current_extent) { -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Setting current offset to %lu.", -+ this->blocks.current_extent->start); -+ this->blocks.current_offset = -+ this->blocks.current_extent->start; -+ } -+ -+ this = this->next; -+ } -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Setting current chain to %p.", -+ prio_chain_head); -+ toi_writer_posn.current_chain = prio_chain_head; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Leaving extent state goto start."); -+} -+ -+/** -+ * toi_extent_state_save - save state of the iterator -+ * @state: Current state of the chain -+ * @saved_state: Iterator to populate -+ * -+ * Given a state and a struct hibernate_extent_state_store, save the current -+ * position in a format that can be used with relocated chains (at -+ * resume time). -+ **/ -+void toi_extent_state_save(int slot) -+{ -+ struct toi_bdev_info *cur_chain = prio_chain_head; -+ struct hibernate_extent *extent; -+ struct hibernate_extent_saved_state *chain_state; -+ int i = 0; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_extent_state_save, slot %d.", -+ slot); -+ -+ if (!toi_writer_posn.current_chain) { -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "No current chain => " -+ "chain_num = -1."); -+ toi_writer_posn.saved_chain_number[slot] = -1; -+ return; -+ } -+ -+ while (cur_chain) { -+ i++; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Saving chain %d (%p) " -+ "state, slot %d.", i, cur_chain, slot); -+ -+ chain_state = &cur_chain->saved_state[slot]; -+ -+ chain_state->offset = cur_chain->blocks.current_offset; -+ -+ if (toi_writer_posn.current_chain == cur_chain) { -+ toi_writer_posn.saved_chain_number[slot] = i; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "This is the chain " -+ "we were on => chain_num is %d.", i); -+ } -+ -+ if (!cur_chain->blocks.current_extent) { -+ chain_state->extent_num = 0; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "No current extent " -+ "for this chain => extent_num %d is 0.", -+ i); -+ cur_chain = cur_chain->next; -+ continue; -+ } -+ -+ extent = cur_chain->blocks.first; -+ chain_state->extent_num = 1; -+ -+ while (extent != cur_chain->blocks.current_extent) { -+ chain_state->extent_num++; -+ extent = extent->next; -+ } -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "extent num %d is %d.", i, -+ chain_state->extent_num); -+ -+ cur_chain = cur_chain->next; -+ } -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Completed saving extent state slot %d.", slot); -+} -+ -+/** -+ * toi_extent_state_restore - restore the position saved by extent_state_save -+ * @state: State to populate -+ * @saved_state: Iterator saved to restore -+ **/ -+void toi_extent_state_restore(int slot) -+{ -+ int i = 0; -+ struct toi_bdev_info *cur_chain = prio_chain_head; -+ struct hibernate_extent_saved_state *chain_state; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "toi_extent_state_restore - slot %d.", slot); -+ -+ if (toi_writer_posn.saved_chain_number[slot] == -1) { -+ toi_writer_posn.current_chain = NULL; -+ return; -+ } -+ -+ while (cur_chain) { -+ int posn; -+ int j; -+ i++; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Restoring chain %d (%p) " -+ "state, slot %d.", i, cur_chain, slot); -+ -+ chain_state = &cur_chain->saved_state[slot]; -+ -+ posn = chain_state->extent_num; -+ -+ cur_chain->blocks.current_extent = cur_chain->blocks.first; -+ cur_chain->blocks.current_offset = chain_state->offset; -+ -+ if (i == toi_writer_posn.saved_chain_number[slot]) { -+ toi_writer_posn.current_chain = cur_chain; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Found current chain."); -+ } -+ -+ for (j = 0; j < 4; j++) -+ if (i == toi_writer_posn.saved_chain_number[j]) { -+ toi_writer_posn.saved_chain_ptr[j] = cur_chain; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Found saved chain ptr %d (%p) (offset" -+ " %d).", j, cur_chain, -+ cur_chain->saved_state[j].offset); -+ } -+ -+ if (posn) { -+ while (--posn) -+ cur_chain->blocks.current_extent = -+ cur_chain->blocks.current_extent->next; -+ } else -+ cur_chain->blocks.current_extent = NULL; -+ -+ cur_chain = cur_chain->next; -+ } -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Done."); -+ if (test_action_state(TOI_LOGALL)) -+ dump_block_chains(); -+} -+ -+/* -+ * Storage needed -+ * -+ * Returns amount of space in the image header required -+ * for the chain data. This ignores the links between -+ * pages, which we factor in when allocating the space. -+ */ -+int toi_bio_devinfo_storage_needed(void) -+{ -+ int result = sizeof(num_chains); -+ struct toi_bdev_info *chain = prio_chain_head; -+ -+ while (chain) { -+ result += metadata_size; -+ -+ /* Chain size */ -+ result += sizeof(int); -+ -+ /* Extents */ -+ result += (2 * sizeof(unsigned long) * -+ chain->blocks.num_extents); -+ -+ chain = chain->next; -+ } -+ -+ result += 4 * sizeof(int); -+ return result; -+} -+ -+static unsigned long chain_pages_used(struct toi_bdev_info *chain) -+{ -+ struct hibernate_extent *this = chain->blocks.first; -+ struct hibernate_extent_saved_state *state = &chain->saved_state[3]; -+ unsigned long size = 0; -+ int extent_idx = 1; -+ -+ if (!state->extent_num) { -+ if (!this) -+ return 0; -+ else -+ return chain->blocks.size; -+ } -+ -+ while (extent_idx < state->extent_num) { -+ size += (this->end - this->start + 1); -+ this = this->next; -+ extent_idx++; -+ } -+ -+ /* We didn't use the one we're sitting on, so don't count it */ -+ return size + state->offset - this->start; -+} -+ -+/** -+ * toi_serialise_extent_chain - write a chain in the image -+ * @chain: Chain to write. -+ **/ -+static int toi_serialise_extent_chain(struct toi_bdev_info *chain) -+{ -+ struct hibernate_extent *this; -+ int ret; -+ int i = 1; -+ -+ chain->pages_used = chain_pages_used(chain); -+ -+ if (test_action_state(TOI_LOGALL)) -+ dump_block_chains(); -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Serialising chain (dev_t %lx).", -+ chain->dev_t); -+ /* Device info - dev_t, prio, bmap_shift, blocks per page, positions */ -+ ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops, -+ (char *) &chain->uuid, metadata_size); -+ if (ret) -+ return ret; -+ -+ /* Num extents */ -+ ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops, -+ (char *) &chain->blocks.num_extents, sizeof(int)); -+ if (ret) -+ return ret; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d extents.", -+ chain->blocks.num_extents); -+ -+ this = chain->blocks.first; -+ while (this) { -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent %d.", i); -+ ret = toiActiveAllocator->rw_header_chunk(WRITE, -+ &toi_blockwriter_ops, -+ (char *) this, 2 * sizeof(this->start)); -+ if (ret) -+ return ret; -+ this = this->next; -+ i++; -+ } -+ -+ return ret; -+} -+ -+int toi_serialise_extent_chains(void) -+{ -+ struct toi_bdev_info *this = prio_chain_head; -+ int result; -+ -+ /* Write the number of chains */ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Write number of chains (%d)", -+ num_chains); -+ result = toiActiveAllocator->rw_header_chunk(WRITE, -+ &toi_blockwriter_ops, (char *) &num_chains, -+ sizeof(int)); -+ if (result) -+ return result; -+ -+ /* Then the chains themselves */ -+ while (this) { -+ result = toi_serialise_extent_chain(this); -+ if (result) -+ return result; -+ this = this->next; -+ } -+ -+ /* -+ * Finally, the chain we should be on at the start of each -+ * section. -+ */ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Saved chain numbers."); -+ result = toiActiveAllocator->rw_header_chunk(WRITE, -+ &toi_blockwriter_ops, -+ (char *) &toi_writer_posn.saved_chain_number[0], -+ 4 * sizeof(int)); -+ -+ return result; -+} -+ -+int toi_register_storage_chain(struct toi_bdev_info *new) -+{ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Inserting chain %p into list.", -+ new); -+ toi_insert_chain_in_prio_list(new); -+ return 0; -+} -+ -+static void free_bdev_info(struct toi_bdev_info *chain) -+{ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Free chain %p.", chain); -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Block extents."); -+ toi_put_extent_chain(&chain->blocks); -+ -+ /* -+ * The allocator may need to do more than just free the chains -+ * (swap_free, for example). Don't call from boot kernel. -+ */ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Allocator extents."); -+ if (chain->allocator) -+ chain->allocator->bio_allocator_ops->free_storage(chain); -+ -+ /* -+ * Dropping out of reading atomic copy? Need to undo -+ * toi_open_by_devnum. -+ */ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Bdev."); -+ if (chain->bdev && !IS_ERR(chain->bdev) && -+ chain->bdev != resume_block_device && -+ chain->bdev != header_block_device && -+ test_toi_state(TOI_TRYING_TO_RESUME)) -+ toi_close_bdev(chain->bdev); -+ -+ /* Poison */ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Struct."); -+ toi_kfree(39, chain, sizeof(*chain)); -+ -+ if (prio_chain_head == chain) -+ prio_chain_head = NULL; -+ -+ num_chains--; -+} -+ -+void free_all_bdev_info(void) -+{ -+ struct toi_bdev_info *this = prio_chain_head; -+ -+ while (this) { -+ struct toi_bdev_info *next = this->next; -+ free_bdev_info(this); -+ this = next; -+ } -+ -+ memset((char *) &toi_writer_posn, 0, sizeof(toi_writer_posn)); -+ prio_chain_head = NULL; -+} -+ -+static void set_up_start_position(void) -+{ -+ toi_writer_posn.current_chain = prio_chain_head; -+ go_next_page(0, 0); -+} -+ -+/** -+ * toi_load_extent_chain - read back a chain saved in the image -+ * @chain: Chain to load -+ * -+ * The linked list of extents is reconstructed from the disk. chain will point -+ * to the first entry. -+ **/ -+int toi_load_extent_chain(int index, int *num_loaded) -+{ -+ struct toi_bdev_info *chain = toi_kzalloc(39, -+ sizeof(struct toi_bdev_info), GFP_ATOMIC); -+ struct hibernate_extent *this, *last = NULL; -+ int i, ret; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Loading extent chain %d.", index); -+ /* Get dev_t, prio, bmap_shift, blocks per page, positions */ -+ ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, -+ (char *) &chain->uuid, metadata_size); -+ -+ if (ret) { -+ printk(KERN_ERR "Failed to read the size of extent chain.\n"); -+ toi_kfree(39, chain, sizeof(*chain)); -+ return 1; -+ } -+ -+ toi_bkd.pages_used[index] = chain->pages_used; -+ -+ ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, -+ (char *) &chain->blocks.num_extents, sizeof(int)); -+ if (ret) { -+ printk(KERN_ERR "Failed to read the size of extent chain.\n"); -+ toi_kfree(39, chain, sizeof(*chain)); -+ return 1; -+ } -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d extents.", -+ chain->blocks.num_extents); -+ -+ for (i = 0; i < chain->blocks.num_extents; i++) { -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent %d.", i + 1); -+ -+ this = toi_kzalloc(2, sizeof(struct hibernate_extent), -+ TOI_ATOMIC_GFP); -+ if (!this) { -+ printk(KERN_INFO "Failed to allocate a new extent.\n"); -+ free_bdev_info(chain); -+ return -ENOMEM; -+ } -+ this->next = NULL; -+ /* Get the next page */ -+ ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, -+ NULL, (char *) this, 2 * sizeof(this->start)); -+ if (ret) { -+ printk(KERN_INFO "Failed to read an extent.\n"); -+ toi_kfree(2, this, sizeof(struct hibernate_extent)); -+ free_bdev_info(chain); -+ return 1; -+ } -+ -+ if (last) -+ last->next = this; -+ else { -+ char b1[32], b2[32], b3[32]; -+ /* -+ * Open the bdev -+ */ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Chain dev_t is %s. Resume dev t is %s. Header" -+ " bdev_t is %s.\n", -+ format_dev_t(b1, chain->dev_t), -+ format_dev_t(b2, resume_dev_t), -+ format_dev_t(b3, toi_sig_data->header_dev_t)); -+ -+ if (chain->dev_t == resume_dev_t) -+ chain->bdev = resume_block_device; -+ else if (chain->dev_t == toi_sig_data->header_dev_t) -+ chain->bdev = header_block_device; -+ else { -+ chain->bdev = toi_open_bdev(chain->uuid, -+ chain->dev_t, 1); -+ if (IS_ERR(chain->bdev)) { -+ free_bdev_info(chain); -+ return -ENODEV; -+ } -+ } -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Chain bmap shift " -+ "is %d and blocks per page is %d.", -+ chain->bmap_shift, -+ chain->blocks_per_page); -+ -+ chain->blocks.first = this; -+ -+ /* -+ * Couldn't do this earlier, but can't do -+ * goto_start now - we may have already used blocks -+ * in the first chain. -+ */ -+ chain->blocks.current_extent = this; -+ chain->blocks.current_offset = this->start; -+ -+ /* -+ * Can't wait until we've read the whole chain -+ * before we insert it in the list. We might need -+ * this chain to read the next page in the header -+ */ -+ toi_insert_chain_in_prio_list(chain); -+ } -+ -+ /* -+ * We have to wait until 2 extents are loaded before setting up -+ * properly because if the first extent has only one page, we -+ * will need to put the position on the second extent. Sounds -+ * obvious, but it wasn't! -+ */ -+ (*num_loaded)++; -+ if ((*num_loaded) == 2) -+ set_up_start_position(); -+ last = this; -+ } -+ -+ /* -+ * Shouldn't get empty chains, but it's not impossible. Link them in so -+ * they get freed properly later. -+ */ -+ if (!chain->blocks.num_extents) -+ toi_insert_chain_in_prio_list(chain); -+ -+ if (!chain->blocks.current_extent) { -+ chain->blocks.current_extent = chain->blocks.first; -+ if (chain->blocks.current_extent) -+ chain->blocks.current_offset = -+ chain->blocks.current_extent->start; -+ } -+ return 0; -+} -+ -+int toi_load_extent_chains(void) -+{ -+ int result; -+ int to_load; -+ int i; -+ int extents_loaded = 0; -+ -+ result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL, -+ (char *) &to_load, -+ sizeof(int)); -+ if (result) -+ return result; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d chains to read.", to_load); -+ -+ for (i = 0; i < to_load; i++) { -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " >> Loading chain %d/%d.", -+ i, to_load); -+ result = toi_load_extent_chain(i, &extents_loaded); -+ if (result) -+ return result; -+ } -+ -+ /* If we never got to a second extent, we still need to do this. */ -+ if (extents_loaded == 1) -+ set_up_start_position(); -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Save chain numbers."); -+ result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, -+ &toi_blockwriter_ops, -+ (char *) &toi_writer_posn.saved_chain_number[0], -+ 4 * sizeof(int)); -+ -+ return result; -+} -+ -+static int toi_end_of_stream(int writing, int section_barrier) -+{ -+ struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain; -+ int compare_to = next_section[current_stream]; -+ struct toi_bdev_info *compare_chain = -+ toi_writer_posn.saved_chain_ptr[compare_to]; -+ int compare_offset = compare_chain ? -+ compare_chain->saved_state[compare_to].offset : 0; -+ -+ if (!section_barrier) -+ return 0; -+ -+ if (!cur_chain) -+ return 1; -+ -+ if (cur_chain == compare_chain && -+ cur_chain->blocks.current_offset == compare_offset) { -+ if (writing) { -+ if (!current_stream) { -+ debug_broken_header(); -+ return 1; -+ } -+ } else { -+ more_readahead = 0; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Reached the end of stream %d " -+ "(not an error).", current_stream); -+ return 1; -+ } -+ } -+ -+ return 0; -+} -+ -+/** -+ * go_next_page - skip blocks to the start of the next page -+ * @writing: Whether we're reading or writing the image. -+ * -+ * Go forward one page. -+ **/ -+int go_next_page(int writing, int section_barrier) -+{ -+ struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain; -+ int max = cur_chain ? cur_chain->blocks_per_page : 1; -+ -+ /* Nope. Go foward a page - or maybe two. Don't stripe the header, -+ * so that bad fragmentation doesn't put the extent data containing -+ * the location of the second page out of the first header page. -+ */ -+ if (toi_extent_state_next(max, current_stream)) { -+ /* Don't complain if readahead falls off the end */ -+ if (writing && section_barrier) { -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent state eof. " -+ "Expected compression ratio too optimistic?"); -+ if (test_action_state(TOI_LOGALL)) -+ dump_block_chains(); -+ } -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Ran out of extents to " -+ "read/write. (Not necessarily a fatal error."); -+ return -ENOSPC; -+ } -+ -+ return 0; -+} -+ -+int devices_of_same_priority(struct toi_bdev_info *this) -+{ -+ struct toi_bdev_info *check = prio_chain_head; -+ int i = 0; -+ -+ while (check) { -+ if (check->prio == this->prio) -+ i++; -+ check = check->next; -+ } -+ -+ return i; -+} -+ -+/** -+ * toi_bio_rw_page - do i/o on the next disk page in the image -+ * @writing: Whether reading or writing. -+ * @page: Page to do i/o on. -+ * @is_readahead: Whether we're doing readahead -+ * @free_group: The group used in allocating the page -+ * -+ * Submit a page for reading or writing, possibly readahead. -+ * Pass the group used in allocating the page as well, as it should -+ * be freed on completion of the bio if we're writing the page. -+ **/ -+int toi_bio_rw_page(int writing, struct page *page, -+ int is_readahead, int free_group) -+{ -+ int result = toi_end_of_stream(writing, 1); -+ struct toi_bdev_info *dev_info = toi_writer_posn.current_chain; -+ -+ if (result) { -+ if (writing) -+ abort_hibernate(TOI_INSUFFICIENT_STORAGE, -+ "Insufficient storage for your image."); -+ else -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Seeking to " -+ "read/write another page when stream has " -+ "ended."); -+ return -ENOSPC; -+ } -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "%s %lx:%ld", -+ writing ? "Write" : "Read", -+ dev_info->dev_t, dev_info->blocks.current_offset); -+ -+ result = toi_do_io(writing, dev_info->bdev, -+ dev_info->blocks.current_offset << dev_info->bmap_shift, -+ page, is_readahead, 0, free_group); -+ -+ /* Ignore the result here - will check end of stream if come in again */ -+ go_next_page(writing, 1); -+ -+ if (result) -+ printk(KERN_ERR "toi_do_io returned %d.\n", result); -+ return result; -+} -+ -+dev_t get_header_dev_t(void) -+{ -+ return prio_chain_head->dev_t; -+} -+ -+struct block_device *get_header_bdev(void) -+{ -+ return prio_chain_head->bdev; -+} -+ -+unsigned long get_headerblock(void) -+{ -+ return prio_chain_head->blocks.first->start << -+ prio_chain_head->bmap_shift; -+} -+ -+int get_main_pool_phys_params(void) -+{ -+ struct toi_bdev_info *this = prio_chain_head; -+ int result; -+ -+ while (this) { -+ result = this->allocator->bio_allocator_ops->bmap(this); -+ if (result) -+ return result; -+ this = this->next; -+ } -+ -+ return 0; -+} -+ -+static int apply_header_reservation(void) -+{ -+ int i; -+ -+ if (!header_pages_reserved) { -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "No header pages reserved at the moment."); -+ return 0; -+ } -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Applying header reservation."); -+ -+ /* Apply header space reservation */ -+ toi_extent_state_goto_start(); -+ -+ for (i = 0; i < header_pages_reserved; i++) -+ if (go_next_page(1, 0)) -+ return -ENOSPC; -+ -+ /* The end of header pages will be the start of pageset 2 */ -+ toi_extent_state_save(2); -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Finished applying header reservation."); -+ return 0; -+} -+ -+static int toi_bio_register_storage(void) -+{ -+ int result = 0; -+ struct toi_module_ops *this_module; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled || -+ this_module->type != BIO_ALLOCATOR_MODULE) -+ continue; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Registering storage from %s.", -+ this_module->name); -+ result = this_module->bio_allocator_ops->register_storage(); -+ if (result) -+ break; -+ } -+ -+ return result; -+} -+ -+int toi_bio_allocate_storage(unsigned long request) -+{ -+ struct toi_bdev_info *chain = prio_chain_head; -+ unsigned long to_get = request; -+ unsigned long extra_pages, needed; -+ int no_free = 0; -+ -+ if (!chain) { -+ int result = toi_bio_register_storage(); -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: " -+ "Registering storage."); -+ if (result) -+ return 0; -+ chain = prio_chain_head; -+ if (!chain) { -+ printk("TuxOnIce: No storage was registered.\n"); -+ return 0; -+ } -+ } -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: " -+ "Request is %lu pages.", request); -+ extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long) -+ + sizeof(int)), PAGE_SIZE); -+ needed = request + extra_pages + header_pages_reserved; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Adding %lu extra pages and %lu " -+ "for header => %lu.", -+ extra_pages, header_pages_reserved, needed); -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Already allocated %lu pages.", -+ raw_pages_allocd); -+ -+ to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : 0; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Need to get %lu pages.", to_get); -+ -+ if (!to_get) -+ return apply_header_reservation(); -+ -+ while (to_get && chain) { -+ int num_group = devices_of_same_priority(chain); -+ int divisor = num_group - no_free; -+ int i; -+ unsigned long portion = DIV_ROUND_UP(to_get, divisor); -+ unsigned long got = 0; -+ unsigned long got_this_round = 0; -+ struct toi_bdev_info *top = chain; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ " Start of loop. To get is %lu. Divisor is %d.", -+ to_get, divisor); -+ no_free = 0; -+ -+ /* -+ * We're aiming to spread the allocated storage as evenly -+ * as possible, but we also want to get all the storage we -+ * can off this priority. -+ */ -+ for (i = 0; i < num_group; i++) { -+ struct toi_bio_allocator_ops *ops = -+ chain->allocator->bio_allocator_ops; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ " Asking for %lu pages from chain %p.", -+ portion, chain); -+ got = ops->allocate_storage(chain, portion); -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ " Got %lu pages from allocator %p.", -+ got, chain); -+ if (!got) -+ no_free++; -+ got_this_round += got; -+ chain = chain->next; -+ } -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " Loop finished. Got a " -+ "total of %lu pages from %d allocators.", -+ got_this_round, divisor - no_free); -+ -+ raw_pages_allocd += got_this_round; -+ to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : -+ 0; -+ -+ /* -+ * If we got anything from chains of this priority and we -+ * still have storage to allocate, go over this priority -+ * again. -+ */ -+ if (got_this_round && to_get) -+ chain = top; -+ else -+ no_free = 0; -+ } -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Finished allocating. Calling " -+ "get_main_pool_phys_params"); -+ /* Now let swap allocator bmap the pages */ -+ get_main_pool_phys_params(); -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Done. Reserving header."); -+ return apply_header_reservation(); -+} -+ -+void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd) -+{ -+ int i = 0; -+ struct toi_bdev_info *cur_chain = prio_chain_head; -+ -+ while (cur_chain) { -+ cur_chain->pages_used = bkd->pages_used[i]; -+ cur_chain = cur_chain->next; -+ i++; -+ } -+} -+ -+int toi_bio_chains_debug_info(char *buffer, int size) -+{ -+ /* Show what we actually used */ -+ struct toi_bdev_info *cur_chain = prio_chain_head; -+ int len = 0; -+ -+ while (cur_chain) { -+ len += scnprintf(buffer + len, size - len, " Used %lu pages " -+ "from %s.\n", cur_chain->pages_used, -+ cur_chain->name); -+ cur_chain = cur_chain->next; -+ } -+ -+ return len; -+} -diff --git a/kernel/power/tuxonice_bio_core.c b/kernel/power/tuxonice_bio_core.c -new file mode 100644 -index 0000000..41d3505 ---- /dev/null -+++ b/kernel/power/tuxonice_bio_core.c -@@ -0,0 +1,1839 @@ -+/* -+ * kernel/power/tuxonice_bio.c -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ * This file contains block io functions for TuxOnIce. These are -+ * used by the swapwriter and it is planned that they will also -+ * be used by the NFSwriter. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "tuxonice.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_prepare_image.h" -+#include "tuxonice_bio.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_io.h" -+#include "tuxonice_builtin.h" -+#include "tuxonice_bio_internal.h" -+ -+#define MEMORY_ONLY 1 -+#define THROTTLE_WAIT 2 -+ -+/* #define MEASURE_MUTEX_CONTENTION */ -+#ifndef MEASURE_MUTEX_CONTENTION -+#define my_mutex_lock(index, the_lock) mutex_lock(the_lock) -+#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock) -+#else -+unsigned long mutex_times[2][2][NR_CPUS]; -+#define my_mutex_lock(index, the_lock) do { \ -+ int have_mutex; \ -+ have_mutex = mutex_trylock(the_lock); \ -+ if (!have_mutex) { \ -+ mutex_lock(the_lock); \ -+ mutex_times[index][0][smp_processor_id()]++; \ -+ } else { \ -+ mutex_times[index][1][smp_processor_id()]++; \ -+ } -+ -+#define my_mutex_unlock(index, the_lock) \ -+ mutex_unlock(the_lock); \ -+} while (0) -+#endif -+ -+static int page_idx, reset_idx; -+ -+static int target_outstanding_io = 1024; -+static int max_outstanding_writes, max_outstanding_reads; -+ -+static struct page *bio_queue_head, *bio_queue_tail; -+static atomic_t toi_bio_queue_size; -+static DEFINE_SPINLOCK(bio_queue_lock); -+ -+static int free_mem_throttle, throughput_throttle; -+int more_readahead = 1; -+static struct page *readahead_list_head, *readahead_list_tail; -+ -+static struct page *waiting_on; -+ -+static atomic_t toi_io_in_progress, toi_io_done; -+static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait); -+ -+int current_stream; -+/* Not static, so that the allocators can setup and complete -+ * writing the header */ -+char *toi_writer_buffer; -+int toi_writer_buffer_posn; -+ -+static DEFINE_MUTEX(toi_bio_mutex); -+static DEFINE_MUTEX(toi_bio_readahead_mutex); -+ -+static struct task_struct *toi_queue_flusher; -+static int toi_bio_queue_flush_pages(int dedicated_thread); -+ -+struct toi_module_ops toi_blockwriter_ops; -+ -+#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \ -+ atomic_read(&toi_bio_queue_size)) -+ -+unsigned long raw_pages_allocd, header_pages_reserved; -+ -+/** -+ * set_free_mem_throttle - set the point where we pause to avoid oom. -+ * -+ * Initially, this value is zero, but when we first fail to allocate memory, -+ * we set it (plus a buffer) and thereafter throttle i/o once that limit is -+ * reached. -+ **/ -+static void set_free_mem_throttle(void) -+{ -+ int new_throttle = nr_free_buffer_pages() + 256; -+ -+ if (new_throttle > free_mem_throttle) -+ free_mem_throttle = new_throttle; -+} -+ -+#define NUM_REASONS 7 -+static atomic_t reasons[NUM_REASONS]; -+static char *reason_name[NUM_REASONS] = { -+ "readahead not ready", -+ "bio allocation", -+ "synchronous I/O", -+ "toi_bio_get_new_page", -+ "memory low", -+ "readahead buffer allocation", -+ "throughput_throttle", -+}; -+ -+/* User Specified Parameters. */ -+unsigned long resume_firstblock; -+dev_t resume_dev_t; -+struct block_device *resume_block_device; -+static atomic_t resume_bdev_open_count; -+ -+struct block_device *header_block_device; -+ -+/** -+ * toi_open_bdev: Open a bdev at resume time. -+ * -+ * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t -+ * (the user can have resume= pointing at a swap partition/file that isn't -+ * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the -+ * header. It will be from a swap partition that was enabled when we hibernated, -+ * but we don't know it's real index until we read that first page. -+ * dev_t: The device major/minor. -+ * display_errs: Whether to try to do this quietly. -+ * -+ * We stored a dev_t in the image header. Open the matching device without -+ * requiring /dev/ in most cases and record the details needed -+ * to close it later and avoid duplicating work. -+ */ -+struct block_device *toi_open_bdev(char *uuid, dev_t default_device, -+ int display_errs) -+{ -+ struct block_device *bdev; -+ dev_t device = default_device; -+ char buf[32]; -+ int retried = 0; -+ -+retry: -+ if (uuid) { -+ struct fs_info seek; -+ strncpy((char *) &seek.uuid, uuid, 16); -+ seek.dev_t = 0; -+ seek.last_mount_size = 0; -+ device = blk_lookup_fs_info(&seek); -+ if (!device) { -+ device = default_device; -+ printk(KERN_DEBUG "Unable to resolve uuid. Falling back" -+ " to dev_t.\n"); -+ } else -+ printk(KERN_DEBUG "Resolved uuid to device %s.\n", -+ format_dev_t(buf, device)); -+ } -+ -+ if (!device) { -+ printk(KERN_ERR "TuxOnIce attempting to open a " -+ "blank dev_t!\n"); -+ dump_stack(); -+ return NULL; -+ } -+ bdev = toi_open_by_devnum(device); -+ -+ if (IS_ERR(bdev) || !bdev) { -+ if (!retried) { -+ retried = 1; -+ wait_for_device_probe(); -+ goto retry; -+ } -+ if (display_errs) -+ toi_early_boot_message(1, TOI_CONTINUE_REQ, -+ "Failed to get access to block device " -+ "\"%x\" (error %d).\n Maybe you need " -+ "to run mknod and/or lvmsetup in an " -+ "initrd/ramfs?", device, bdev); -+ return ERR_PTR(-EINVAL); -+ } -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "TuxOnIce got bdev %p for dev_t %x.", -+ bdev, device); -+ -+ return bdev; -+} -+ -+static void toi_bio_reserve_header_space(unsigned long request) -+{ -+ header_pages_reserved = request; -+} -+ -+/** -+ * do_bio_wait - wait for some TuxOnIce I/O to complete -+ * @reason: The array index of the reason we're waiting. -+ * -+ * Wait for a particular page of I/O if we're after a particular page. -+ * If we're not after a particular page, wait instead for all in flight -+ * I/O to be completed or for us to have enough free memory to be able -+ * to submit more I/O. -+ * -+ * If we wait, we also update our statistics regarding why we waited. -+ **/ -+static void do_bio_wait(int reason) -+{ -+ struct page *was_waiting_on = waiting_on; -+ -+ /* On SMP, waiting_on can be reset, so we make a copy */ -+ if (was_waiting_on) { -+ wait_on_page_locked(was_waiting_on); -+ atomic_inc(&reasons[reason]); -+ } else { -+ atomic_inc(&reasons[reason]); -+ -+ wait_event(num_in_progress_wait, -+ !atomic_read(&toi_io_in_progress) || -+ nr_free_buffer_pages() > free_mem_throttle); -+ } -+} -+ -+/** -+ * throttle_if_needed - wait for I/O completion if throttle points are reached -+ * @flags: What to check and how to act. -+ * -+ * Check whether we need to wait for some I/O to complete. We always check -+ * whether we have enough memory available, but may also (depending upon -+ * @reason) check if the throughput throttle limit has been reached. -+ **/ -+static int throttle_if_needed(int flags) -+{ -+ int free_pages = nr_free_buffer_pages(); -+ -+ /* Getting low on memory and I/O is in progress? */ -+ while (unlikely(free_pages < free_mem_throttle) && -+ atomic_read(&toi_io_in_progress) && -+ !test_result_state(TOI_ABORTED)) { -+ if (!(flags & THROTTLE_WAIT)) -+ return -ENOMEM; -+ do_bio_wait(4); -+ free_pages = nr_free_buffer_pages(); -+ } -+ -+ while (!(flags & MEMORY_ONLY) && throughput_throttle && -+ TOTAL_OUTSTANDING_IO >= throughput_throttle && -+ !test_result_state(TOI_ABORTED)) { -+ int result = toi_bio_queue_flush_pages(0); -+ if (result) -+ return result; -+ atomic_inc(&reasons[6]); -+ wait_event(num_in_progress_wait, -+ !atomic_read(&toi_io_in_progress) || -+ TOTAL_OUTSTANDING_IO < throughput_throttle); -+ } -+ -+ return 0; -+} -+ -+/** -+ * update_throughput_throttle - update the raw throughput throttle -+ * @jif_index: The number of times this function has been called. -+ * -+ * This function is called four times per second by the core, and used to limit -+ * the amount of I/O we submit at once, spreading out our waiting through the -+ * whole job and letting userui get an opportunity to do its work. -+ * -+ * We don't start limiting I/O until 1/4s has gone so that we get a -+ * decent sample for our initial limit, and keep updating it because -+ * throughput may vary (on rotating media, eg) with our block number. -+ * -+ * We throttle to 1/10s worth of I/O. -+ **/ -+static void update_throughput_throttle(int jif_index) -+{ -+ int done = atomic_read(&toi_io_done); -+ throughput_throttle = done * 2 / 5 / jif_index; -+} -+ -+/** -+ * toi_finish_all_io - wait for all outstanding i/o to complete -+ * -+ * Flush any queued but unsubmitted I/O and wait for it all to complete. -+ **/ -+static int toi_finish_all_io(void) -+{ -+ int result = toi_bio_queue_flush_pages(0); -+ toi_bio_queue_flusher_should_finish = 1; -+ wake_up(&toi_io_queue_flusher); -+ wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO); -+ return result; -+} -+ -+/** -+ * toi_end_bio - bio completion function. -+ * @bio: bio that has completed. -+ * @err: Error value. Yes, like end_swap_bio_read, we ignore it. -+ * -+ * Function called by the block driver from interrupt context when I/O is -+ * completed. If we were writing the page, we want to free it and will have -+ * set bio->bi_private to the parameter we should use in telling the page -+ * allocation accounting code what the page was allocated for. If we're -+ * reading the page, it will be in the singly linked list made from -+ * page->private pointers. -+ **/ -+static void toi_end_bio(struct bio *bio, int err) -+{ -+ struct page *page = bio->bi_io_vec[0].bv_page; -+ -+ BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); -+ -+ unlock_page(page); -+ bio_put(bio); -+ -+ if (waiting_on == page) -+ waiting_on = NULL; -+ -+ put_page(page); -+ -+ if (bio->bi_private) -+ toi__free_page((int) ((unsigned long) bio->bi_private) , page); -+ -+ bio_put(bio); -+ -+ atomic_dec(&toi_io_in_progress); -+ atomic_inc(&toi_io_done); -+ -+ wake_up(&num_in_progress_wait); -+} -+ -+/** -+ * submit - submit BIO request -+ * @writing: READ or WRITE. -+ * @dev: The block device we're using. -+ * @first_block: The first sector we're using. -+ * @page: The page being used for I/O. -+ * @free_group: If writing, the group that was used in allocating the page -+ * and which will be used in freeing the page from the completion -+ * routine. -+ * -+ * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the -+ * textbook - allocate and initialize the bio. If we're writing, make sure -+ * the page is marked as dirty. Then submit it and carry on." -+ * -+ * If we're just testing the speed of our own code, we fake having done all -+ * the hard work and all toi_end_bio immediately. -+ **/ -+static int submit(int writing, struct block_device *dev, sector_t first_block, -+ struct page *page, int free_group) -+{ -+ struct bio *bio = NULL; -+ int cur_outstanding_io, result; -+ -+ /* -+ * Shouldn't throttle if reading - can deadlock in the single -+ * threaded case as pages are only freed when we use the -+ * readahead. -+ */ -+ if (writing) { -+ result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT); -+ if (result) -+ return result; -+ } -+ -+ while (!bio) { -+ bio = bio_alloc(TOI_ATOMIC_GFP, 1); -+ if (!bio) { -+ set_free_mem_throttle(); -+ do_bio_wait(1); -+ } -+ } -+ -+ bio->bi_bdev = dev; -+ bio->bi_iter.bi_sector = first_block; -+ bio->bi_private = (void *) ((unsigned long) free_group); -+ bio->bi_end_io = toi_end_bio; -+ bio->bi_flags |= (1 << BIO_TOI); -+ -+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { -+ printk(KERN_DEBUG "ERROR: adding page to bio at %lld\n", -+ (unsigned long long) first_block); -+ bio_put(bio); -+ return -EFAULT; -+ } -+ -+ bio_get(bio); -+ -+ cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress); -+ if (writing) { -+ if (cur_outstanding_io > max_outstanding_writes) -+ max_outstanding_writes = cur_outstanding_io; -+ } else { -+ if (cur_outstanding_io > max_outstanding_reads) -+ max_outstanding_reads = cur_outstanding_io; -+ } -+ -+ -+ /* Still read the header! */ -+ if (unlikely(test_action_state(TOI_TEST_BIO) && writing)) { -+ /* Fake having done the hard work */ -+ set_bit(BIO_UPTODATE, &bio->bi_flags); -+ toi_end_bio(bio, 0); -+ } else -+ submit_bio(writing | REQ_SYNC, bio); -+ -+ return 0; -+} -+ -+/** -+ * toi_do_io: Prepare to do some i/o on a page and submit or batch it. -+ * -+ * @writing: Whether reading or writing. -+ * @bdev: The block device which we're using. -+ * @block0: The first sector we're reading or writing. -+ * @page: The page on which I/O is being done. -+ * @readahead_index: If doing readahead, the index (reset this flag when done). -+ * @syncio: Whether the i/o is being done synchronously. -+ * -+ * Prepare and start a read or write operation. -+ * -+ * Note that we always work with our own page. If writing, we might be given a -+ * compression buffer that will immediately be used to start compressing the -+ * next page. For reading, we do readahead and therefore don't know the final -+ * address where the data needs to go. -+ **/ -+int toi_do_io(int writing, struct block_device *bdev, long block0, -+ struct page *page, int is_readahead, int syncio, int free_group) -+{ -+ page->private = 0; -+ -+ /* Do here so we don't race against toi_bio_get_next_page_read */ -+ lock_page(page); -+ -+ if (is_readahead) { -+ if (readahead_list_head) -+ readahead_list_tail->private = (unsigned long) page; -+ else -+ readahead_list_head = page; -+ -+ readahead_list_tail = page; -+ } -+ -+ /* Done before submitting to avoid races. */ -+ if (syncio) -+ waiting_on = page; -+ -+ /* Submit the page */ -+ get_page(page); -+ -+ if (submit(writing, bdev, block0, page, free_group)) -+ return -EFAULT; -+ -+ if (syncio) -+ do_bio_wait(2); -+ -+ return 0; -+} -+ -+/** -+ * toi_bdev_page_io - simpler interface to do directly i/o on a single page -+ * @writing: Whether reading or writing. -+ * @bdev: Block device on which we're operating. -+ * @pos: Sector at which page to read or write starts. -+ * @page: Page to be read/written. -+ * -+ * A simple interface to submit a page of I/O and wait for its completion. -+ * The caller must free the page used. -+ **/ -+static int toi_bdev_page_io(int writing, struct block_device *bdev, -+ long pos, struct page *page) -+{ -+ return toi_do_io(writing, bdev, pos, page, 0, 1, 0); -+} -+ -+/** -+ * toi_bio_memory_needed - report the amount of memory needed for block i/o -+ * -+ * We want to have at least enough memory so as to have target_outstanding_io -+ * or more transactions on the fly at once. If we can do more, fine. -+ **/ -+static int toi_bio_memory_needed(void) -+{ -+ return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) + -+ sizeof(struct bio)); -+} -+ -+/** -+ * toi_bio_print_debug_stats - put out debugging info in the buffer provided -+ * @buffer: A buffer of size @size into which text should be placed. -+ * @size: The size of @buffer. -+ * -+ * Fill a buffer with debugging info. This is used for both our debug_info sysfs -+ * entry and for recording the same info in dmesg. -+ **/ -+static int toi_bio_print_debug_stats(char *buffer, int size) -+{ -+ int len = 0; -+ -+ if (toiActiveAllocator != &toi_blockwriter_ops) { -+ len = scnprintf(buffer, size, -+ "- Block I/O inactive.\n"); -+ return len; -+ } -+ -+ len = scnprintf(buffer, size, "- Block I/O active.\n"); -+ -+ len += toi_bio_chains_debug_info(buffer + len, size - len); -+ -+ len += scnprintf(buffer + len, size - len, -+ "- Max outstanding reads %d. Max writes %d.\n", -+ max_outstanding_reads, max_outstanding_writes); -+ -+ len += scnprintf(buffer + len, size - len, -+ " Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n", -+ target_outstanding_io, -+ PAGE_SIZE, (unsigned int) sizeof(struct request), -+ (unsigned int) sizeof(struct bio), toi_bio_memory_needed()); -+ -+#ifdef MEASURE_MUTEX_CONTENTION -+ { -+ int i; -+ -+ len += scnprintf(buffer + len, size - len, -+ " Mutex contention while reading:\n Contended Free\n"); -+ -+ for_each_online_cpu(i) -+ len += scnprintf(buffer + len, size - len, -+ " %9lu %9lu\n", -+ mutex_times[0][0][i], mutex_times[0][1][i]); -+ -+ len += scnprintf(buffer + len, size - len, -+ " Mutex contention while writing:\n Contended Free\n"); -+ -+ for_each_online_cpu(i) -+ len += scnprintf(buffer + len, size - len, -+ " %9lu %9lu\n", -+ mutex_times[1][0][i], mutex_times[1][1][i]); -+ -+ } -+#endif -+ -+ return len + scnprintf(buffer + len, size - len, -+ " Free mem throttle point reached %d.\n", free_mem_throttle); -+} -+ -+static int total_header_bytes; -+static int unowned; -+ -+void debug_broken_header(void) -+{ -+ printk(KERN_DEBUG "Image header too big for size allocated!\n"); -+ print_toi_header_storage_for_modules(); -+ printk(KERN_DEBUG "Page flags : %d.\n", toi_pageflags_space_needed()); -+ printk(KERN_DEBUG "toi_header : %zu.\n", sizeof(struct toi_header)); -+ printk(KERN_DEBUG "Total unowned : %d.\n", unowned); -+ printk(KERN_DEBUG "Total used : %d (%ld pages).\n", total_header_bytes, -+ DIV_ROUND_UP(total_header_bytes, PAGE_SIZE)); -+ printk(KERN_DEBUG "Space needed now : %ld.\n", -+ get_header_storage_needed()); -+ dump_block_chains(); -+ abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small."); -+} -+ -+/** -+ * toi_rw_init - prepare to read or write a stream in the image -+ * @writing: Whether reading or writing. -+ * @stream number: Section of the image being processed. -+ * -+ * Prepare to read or write a section ('stream') in the image. -+ **/ -+static int toi_rw_init(int writing, int stream_number) -+{ -+ if (stream_number) -+ toi_extent_state_restore(stream_number); -+ else -+ toi_extent_state_goto_start(); -+ -+ if (writing) { -+ reset_idx = 0; -+ if (!current_stream) -+ page_idx = 0; -+ } else { -+ reset_idx = 1; -+ } -+ -+ atomic_set(&toi_io_done, 0); -+ if (!toi_writer_buffer) -+ toi_writer_buffer = (char *) toi_get_zeroed_page(11, -+ TOI_ATOMIC_GFP); -+ toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE; -+ -+ current_stream = stream_number; -+ -+ more_readahead = 1; -+ -+ return toi_writer_buffer ? 0 : -ENOMEM; -+} -+ -+/** -+ * toi_bio_queue_write - queue a page for writing -+ * @full_buffer: Pointer to a page to be queued -+ * -+ * Add a page to the queue to be submitted. If we're the queue flusher, -+ * we'll do this once we've dropped toi_bio_mutex, so other threads can -+ * continue to submit I/O while we're on the slow path doing the actual -+ * submission. -+ **/ -+static void toi_bio_queue_write(char **full_buffer) -+{ -+ struct page *page = virt_to_page(*full_buffer); -+ unsigned long flags; -+ -+ *full_buffer = NULL; -+ page->private = 0; -+ -+ spin_lock_irqsave(&bio_queue_lock, flags); -+ if (!bio_queue_head) -+ bio_queue_head = page; -+ else -+ bio_queue_tail->private = (unsigned long) page; -+ -+ bio_queue_tail = page; -+ atomic_inc(&toi_bio_queue_size); -+ -+ spin_unlock_irqrestore(&bio_queue_lock, flags); -+ wake_up(&toi_io_queue_flusher); -+} -+ -+/** -+ * toi_rw_cleanup - Cleanup after i/o. -+ * @writing: Whether we were reading or writing. -+ * -+ * Flush all I/O and clean everything up after reading or writing a -+ * section of the image. -+ **/ -+static int toi_rw_cleanup(int writing) -+{ -+ int i, result = 0; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_rw_cleanup."); -+ if (writing) { -+ if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED)) -+ toi_bio_queue_write(&toi_writer_buffer); -+ -+ while (bio_queue_head && !result) -+ result = toi_bio_queue_flush_pages(0); -+ -+ if (result) -+ return result; -+ -+ if (current_stream == 2) -+ toi_extent_state_save(1); -+ else if (current_stream == 1) -+ toi_extent_state_save(3); -+ } -+ -+ result = toi_finish_all_io(); -+ -+ while (readahead_list_head) { -+ void *next = (void *) readahead_list_head->private; -+ toi__free_page(12, readahead_list_head); -+ readahead_list_head = next; -+ } -+ -+ readahead_list_tail = NULL; -+ -+ if (!current_stream) -+ return result; -+ -+ for (i = 0; i < NUM_REASONS; i++) { -+ if (!atomic_read(&reasons[i])) -+ continue; -+ printk(KERN_DEBUG "Waited for i/o due to %s %d times.\n", -+ reason_name[i], atomic_read(&reasons[i])); -+ atomic_set(&reasons[i], 0); -+ } -+ -+ current_stream = 0; -+ return result; -+} -+ -+/** -+ * toi_start_one_readahead - start one page of readahead -+ * @dedicated_thread: Is this a thread dedicated to doing readahead? -+ * -+ * Start one new page of readahead. If this is being called by a thread -+ * whose only just is to submit readahead, don't quit because we failed -+ * to allocate a page. -+ **/ -+static int toi_start_one_readahead(int dedicated_thread) -+{ -+ char *buffer = NULL; -+ int oom = 0, result; -+ -+ result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0); -+ if (result) -+ return result; -+ -+ mutex_lock(&toi_bio_readahead_mutex); -+ -+ while (!buffer) { -+ buffer = (char *) toi_get_zeroed_page(12, -+ TOI_ATOMIC_GFP); -+ if (!buffer) { -+ if (oom && !dedicated_thread) { -+ mutex_unlock(&toi_bio_readahead_mutex); -+ return -ENOMEM; -+ } -+ -+ oom = 1; -+ set_free_mem_throttle(); -+ do_bio_wait(5); -+ } -+ } -+ -+ result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0); -+ if (result == -ENOSPC) -+ toi__free_page(12, virt_to_page(buffer)); -+ mutex_unlock(&toi_bio_readahead_mutex); -+ if (result) { -+ if (result == -ENOSPC) -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, -+ "Last readahead page submitted."); -+ else -+ printk(KERN_DEBUG "toi_bio_rw_page returned %d.\n", -+ result); -+ } -+ return result; -+} -+ -+/** -+ * toi_start_new_readahead - start new readahead -+ * @dedicated_thread: Are we dedicated to this task? -+ * -+ * Start readahead of image pages. -+ * -+ * We can be called as a thread dedicated to this task (may be helpful on -+ * systems with lots of CPUs), in which case we don't exit until there's no -+ * more readahead. -+ * -+ * If this is not called by a dedicated thread, we top up our queue until -+ * there's no more readahead to submit, we've submitted the number given -+ * in target_outstanding_io or the number in progress exceeds the target -+ * outstanding I/O value. -+ * -+ * No mutex needed because this is only ever called by the first cpu. -+ **/ -+static int toi_start_new_readahead(int dedicated_thread) -+{ -+ int last_result, num_submitted = 0; -+ -+ /* Start a new readahead? */ -+ if (!more_readahead) -+ return 0; -+ -+ do { -+ last_result = toi_start_one_readahead(dedicated_thread); -+ -+ if (last_result) { -+ if (last_result == -ENOMEM || last_result == -ENOSPC) -+ return 0; -+ -+ printk(KERN_DEBUG -+ "Begin read chunk returned %d.\n", -+ last_result); -+ } else -+ num_submitted++; -+ -+ } while (more_readahead && !last_result && -+ (dedicated_thread || -+ (num_submitted < target_outstanding_io && -+ atomic_read(&toi_io_in_progress) < target_outstanding_io))); -+ -+ return last_result; -+} -+ -+/** -+ * bio_io_flusher - start the dedicated I/O flushing routine -+ * @writing: Whether we're writing the image. -+ **/ -+static int bio_io_flusher(int writing) -+{ -+ -+ if (writing) -+ return toi_bio_queue_flush_pages(1); -+ else -+ return toi_start_new_readahead(1); -+} -+ -+/** -+ * toi_bio_get_next_page_read - read a disk page, perhaps with readahead -+ * @no_readahead: Whether we can use readahead -+ * -+ * Read a page from disk, submitting readahead and cleaning up finished i/o -+ * while we wait for the page we're after. -+ **/ -+static int toi_bio_get_next_page_read(int no_readahead) -+{ -+ char *virt; -+ struct page *old_readahead_list_head; -+ -+ /* -+ * When reading the second page of the header, we have to -+ * delay submitting the read until after we've gotten the -+ * extents out of the first page. -+ */ -+ if (unlikely(no_readahead && toi_start_one_readahead(0))) { -+ printk(KERN_EMERG "No readahead and toi_start_one_readahead " -+ "returned non-zero.\n"); -+ return -EIO; -+ } -+ -+ if (unlikely(!readahead_list_head)) { -+ /* -+ * If the last page finishes exactly on the page -+ * boundary, we will be called one extra time and -+ * have no data to return. In this case, we should -+ * not BUG(), like we used to! -+ */ -+ if (!more_readahead) { -+ printk(KERN_EMERG "No more readahead.\n"); -+ return -ENOSPC; -+ } -+ if (unlikely(toi_start_one_readahead(0))) { -+ printk(KERN_EMERG "No readahead and " -+ "toi_start_one_readahead returned non-zero.\n"); -+ return -EIO; -+ } -+ } -+ -+ if (PageLocked(readahead_list_head)) { -+ waiting_on = readahead_list_head; -+ do_bio_wait(0); -+ } -+ -+ virt = page_address(readahead_list_head); -+ memcpy(toi_writer_buffer, virt, PAGE_SIZE); -+ -+ mutex_lock(&toi_bio_readahead_mutex); -+ old_readahead_list_head = readahead_list_head; -+ readahead_list_head = (struct page *) readahead_list_head->private; -+ mutex_unlock(&toi_bio_readahead_mutex); -+ toi__free_page(12, old_readahead_list_head); -+ return 0; -+} -+ -+/** -+ * toi_bio_queue_flush_pages - flush the queue of pages queued for writing -+ * @dedicated_thread: Whether we're a dedicated thread -+ * -+ * Flush the queue of pages ready to be written to disk. -+ * -+ * If we're a dedicated thread, stay in here until told to leave, -+ * sleeping in wait_event. -+ * -+ * The first thread is normally the only one to come in here. Another -+ * thread can enter this routine too, though, via throttle_if_needed. -+ * Since that's the case, we must be careful to only have one thread -+ * doing this work at a time. Otherwise we have a race and could save -+ * pages out of order. -+ * -+ * If an error occurs, free all remaining pages without submitting them -+ * for I/O. -+ **/ -+ -+int toi_bio_queue_flush_pages(int dedicated_thread) -+{ -+ unsigned long flags; -+ int result = 0; -+ static DEFINE_MUTEX(busy); -+ -+ if (!mutex_trylock(&busy)) -+ return 0; -+ -+top: -+ spin_lock_irqsave(&bio_queue_lock, flags); -+ while (bio_queue_head) { -+ struct page *page = bio_queue_head; -+ bio_queue_head = (struct page *) page->private; -+ if (bio_queue_tail == page) -+ bio_queue_tail = NULL; -+ atomic_dec(&toi_bio_queue_size); -+ spin_unlock_irqrestore(&bio_queue_lock, flags); -+ -+ /* Don't generate more error messages if already had one */ -+ if (!result) -+ result = toi_bio_rw_page(WRITE, page, 0, 11); -+ /* -+ * If writing the page failed, don't drop out. -+ * Flush the rest of the queue too. -+ */ -+ if (result) -+ toi__free_page(11 , page); -+ spin_lock_irqsave(&bio_queue_lock, flags); -+ } -+ spin_unlock_irqrestore(&bio_queue_lock, flags); -+ -+ if (dedicated_thread) { -+ wait_event(toi_io_queue_flusher, bio_queue_head || -+ toi_bio_queue_flusher_should_finish); -+ if (likely(!toi_bio_queue_flusher_should_finish)) -+ goto top; -+ toi_bio_queue_flusher_should_finish = 0; -+ } -+ -+ mutex_unlock(&busy); -+ return result; -+} -+ -+/** -+ * toi_bio_get_new_page - get a new page for I/O -+ * @full_buffer: Pointer to a page to allocate. -+ **/ -+static int toi_bio_get_new_page(char **full_buffer) -+{ -+ int result = throttle_if_needed(THROTTLE_WAIT); -+ if (result) -+ return result; -+ -+ while (!*full_buffer) { -+ *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP); -+ if (!*full_buffer) { -+ set_free_mem_throttle(); -+ do_bio_wait(3); -+ } -+ } -+ -+ return 0; -+} -+ -+/** -+ * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O -+ * @writing: Bool - whether writing (or reading). -+ * @buffer: The start of the buffer to write or fill. -+ * @buffer_size: The size of the buffer to write or fill. -+ * @no_readahead: Don't try to start readhead (when getting extents). -+ **/ -+static int toi_rw_buffer(int writing, char *buffer, int buffer_size, -+ int no_readahead) -+{ -+ int bytes_left = buffer_size, result = 0; -+ -+ while (bytes_left) { -+ char *source_start = buffer + buffer_size - bytes_left; -+ char *dest_start = toi_writer_buffer + toi_writer_buffer_posn; -+ int capacity = PAGE_SIZE - toi_writer_buffer_posn; -+ char *to = writing ? dest_start : source_start; -+ char *from = writing ? source_start : dest_start; -+ -+ if (bytes_left <= capacity) { -+ memcpy(to, from, bytes_left); -+ toi_writer_buffer_posn += bytes_left; -+ return 0; -+ } -+ -+ /* Complete this page and start a new one */ -+ memcpy(to, from, capacity); -+ bytes_left -= capacity; -+ -+ if (!writing) { -+ /* -+ * Perform actual I/O: -+ * read readahead_list_head into toi_writer_buffer -+ */ -+ int result = toi_bio_get_next_page_read(no_readahead); -+ if (result) { -+ printk("toi_bio_get_next_page_read " -+ "returned %d.\n", result); -+ return result; -+ } -+ } else { -+ toi_bio_queue_write(&toi_writer_buffer); -+ result = toi_bio_get_new_page(&toi_writer_buffer); -+ if (result) { -+ printk(KERN_ERR "toi_bio_get_new_page returned " -+ "%d.\n", result); -+ return result; -+ } -+ } -+ -+ toi_writer_buffer_posn = 0; -+ toi_cond_pause(0, NULL); -+ } -+ -+ return 0; -+} -+ -+/** -+ * toi_bio_read_page - read a page of the image -+ * @pfn: The pfn where the data belongs. -+ * @buffer_page: The page containing the (possibly compressed) data. -+ * @buf_size: The number of bytes on @buffer_page used (PAGE_SIZE). -+ * -+ * Read a (possibly compressed) page from the image, into buffer_page, -+ * returning its pfn and the buffer size. -+ **/ -+static int toi_bio_read_page(unsigned long *pfn, int buf_type, -+ void *buffer_page, unsigned int *buf_size) -+{ -+ int result = 0; -+ int this_idx; -+ char *buffer_virt = TOI_MAP(buf_type, buffer_page); -+ -+ /* -+ * Only call start_new_readahead if we don't have a dedicated thread -+ * and we're the queue flusher. -+ */ -+ if (current == toi_queue_flusher && more_readahead && -+ !test_action_state(TOI_NO_READAHEAD)) { -+ int result2 = toi_start_new_readahead(0); -+ if (result2) { -+ printk(KERN_DEBUG "Queue flusher and " -+ "toi_start_one_readahead returned non-zero.\n"); -+ result = -EIO; -+ goto out; -+ } -+ } -+ -+ my_mutex_lock(0, &toi_bio_mutex); -+ -+ /* -+ * Structure in the image: -+ * [destination pfn|page size|page data] -+ * buf_size is PAGE_SIZE -+ * We can validly find there's nothing to read in a multithreaded -+ * situation. -+ */ -+ if (toi_rw_buffer(READ, (char *) &this_idx, sizeof(int), 0) || -+ toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) || -+ toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) || -+ toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) { -+ result = -ENODATA; -+ goto out_unlock; -+ } -+ -+ if (reset_idx) { -+ page_idx = this_idx; -+ reset_idx = 0; -+ } else { -+ page_idx++; -+ if (!this_idx) -+ result = -ENODATA; -+ else if (page_idx != this_idx) -+ printk(KERN_ERR "Got page index %d, expected %d.\n", -+ this_idx, page_idx); -+ } -+ -+out_unlock: -+ my_mutex_unlock(0, &toi_bio_mutex); -+out: -+ TOI_UNMAP(buf_type, buffer_page); -+ return result; -+} -+ -+/** -+ * toi_bio_write_page - write a page of the image -+ * @pfn: The pfn where the data belongs. -+ * @buffer_page: The page containing the (possibly compressed) data. -+ * @buf_size: The number of bytes on @buffer_page used. -+ * -+ * Write a (possibly compressed) page to the image from the buffer, together -+ * with it's index and buffer size. -+ **/ -+static int toi_bio_write_page(unsigned long pfn, int buf_type, -+ void *buffer_page, unsigned int buf_size) -+{ -+ char *buffer_virt; -+ int result = 0, result2 = 0; -+ -+ if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED))) -+ return 0; -+ -+ my_mutex_lock(1, &toi_bio_mutex); -+ -+ if (test_result_state(TOI_ABORTED)) { -+ my_mutex_unlock(1, &toi_bio_mutex); -+ return 0; -+ } -+ -+ buffer_virt = TOI_MAP(buf_type, buffer_page); -+ page_idx++; -+ -+ /* -+ * Structure in the image: -+ * [destination pfn|page size|page data] -+ * buf_size is PAGE_SIZE -+ */ -+ if (toi_rw_buffer(WRITE, (char *) &page_idx, sizeof(int), 0) || -+ toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) || -+ toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) || -+ toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) { -+ printk(KERN_DEBUG "toi_rw_buffer returned non-zero to " -+ "toi_bio_write_page.\n"); -+ result = -EIO; -+ } -+ -+ TOI_UNMAP(buf_type, buffer_page); -+ my_mutex_unlock(1, &toi_bio_mutex); -+ -+ if (current == toi_queue_flusher) -+ result2 = toi_bio_queue_flush_pages(0); -+ -+ return result ? result : result2; -+} -+ -+/** -+ * _toi_rw_header_chunk - read or write a portion of the image header -+ * @writing: Whether reading or writing. -+ * @owner: The module for which we're writing. -+ * Used for confirming that modules -+ * don't use more header space than they asked for. -+ * @buffer: Address of the data to write. -+ * @buffer_size: Size of the data buffer. -+ * @no_readahead: Don't try to start readhead (when getting extents). -+ * -+ * Perform PAGE_SIZE I/O. Start readahead if needed. -+ **/ -+static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner, -+ char *buffer, int buffer_size, int no_readahead) -+{ -+ int result = 0; -+ -+ if (owner) { -+ owner->header_used += buffer_size; -+ toi_message(TOI_HEADER, TOI_LOW, 1, -+ "Header: %s : %d bytes (%d/%d) from offset %d.", -+ owner->name, -+ buffer_size, owner->header_used, -+ owner->header_requested, -+ toi_writer_buffer_posn); -+ if (owner->header_used > owner->header_requested && writing) { -+ printk(KERN_EMERG "TuxOnIce module %s is using more " -+ "header space (%u) than it requested (%u).\n", -+ owner->name, -+ owner->header_used, -+ owner->header_requested); -+ return buffer_size; -+ } -+ } else { -+ unowned += buffer_size; -+ toi_message(TOI_HEADER, TOI_LOW, 1, -+ "Header: (No owner): %d bytes (%d total so far) from " -+ "offset %d.", buffer_size, unowned, -+ toi_writer_buffer_posn); -+ } -+ -+ if (!writing && !no_readahead && more_readahead) { -+ result = toi_start_new_readahead(0); -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Start new readahead " -+ "returned %d.", result); -+ } -+ -+ if (!result) { -+ result = toi_rw_buffer(writing, buffer, buffer_size, -+ no_readahead); -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "rw_buffer returned " -+ "%d.", result); -+ } -+ -+ total_header_bytes += buffer_size; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "_toi_rw_header_chunk returning " -+ "%d.", result); -+ return result; -+} -+ -+static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner, -+ char *buffer, int size) -+{ -+ return _toi_rw_header_chunk(writing, owner, buffer, size, 1); -+} -+ -+static int toi_rw_header_chunk_noreadahead(int writing, -+ struct toi_module_ops *owner, char *buffer, int size) -+{ -+ return _toi_rw_header_chunk(writing, owner, buffer, size, 1); -+} -+ -+/** -+ * toi_bio_storage_needed - get the amount of storage needed for my fns -+ **/ -+static int toi_bio_storage_needed(void) -+{ -+ return sizeof(int) + PAGE_SIZE + toi_bio_devinfo_storage_needed(); -+} -+ -+/** -+ * toi_bio_save_config_info - save block I/O config to image header -+ * @buf: PAGE_SIZE'd buffer into which data should be saved. -+ **/ -+static int toi_bio_save_config_info(char *buf) -+{ -+ int *ints = (int *) buf; -+ ints[0] = target_outstanding_io; -+ return sizeof(int); -+} -+ -+/** -+ * toi_bio_load_config_info - restore block I/O config -+ * @buf: Data to be reloaded. -+ * @size: Size of the buffer saved. -+ **/ -+static void toi_bio_load_config_info(char *buf, int size) -+{ -+ int *ints = (int *) buf; -+ target_outstanding_io = ints[0]; -+} -+ -+void close_resume_dev_t(int force) -+{ -+ if (!resume_block_device) -+ return; -+ -+ if (force) -+ atomic_set(&resume_bdev_open_count, 0); -+ else -+ atomic_dec(&resume_bdev_open_count); -+ -+ if (!atomic_read(&resume_bdev_open_count)) { -+ toi_close_bdev(resume_block_device); -+ resume_block_device = NULL; -+ } -+} -+ -+int open_resume_dev_t(int force, int quiet) -+{ -+ if (force) { -+ close_resume_dev_t(1); -+ atomic_set(&resume_bdev_open_count, 1); -+ } else -+ atomic_inc(&resume_bdev_open_count); -+ -+ if (resume_block_device) -+ return 0; -+ -+ resume_block_device = toi_open_bdev(NULL, resume_dev_t, 0); -+ if (IS_ERR(resume_block_device)) { -+ if (!quiet) -+ toi_early_boot_message(1, TOI_CONTINUE_REQ, -+ "Failed to open device %x, where" -+ " the header should be found.", -+ resume_dev_t); -+ resume_block_device = NULL; -+ atomic_set(&resume_bdev_open_count, 0); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/** -+ * toi_bio_initialise - initialise bio code at start of some action -+ * @starting_cycle: Whether starting a hibernation cycle, or just reading or -+ * writing a sysfs value. -+ **/ -+static int toi_bio_initialise(int starting_cycle) -+{ -+ int result; -+ -+ if (!starting_cycle || !resume_dev_t) -+ return 0; -+ -+ max_outstanding_writes = 0; -+ max_outstanding_reads = 0; -+ current_stream = 0; -+ toi_queue_flusher = current; -+#ifdef MEASURE_MUTEX_CONTENTION -+ { -+ int i, j, k; -+ -+ for (i = 0; i < 2; i++) -+ for (j = 0; j < 2; j++) -+ for_each_online_cpu(k) -+ mutex_times[i][j][k] = 0; -+ } -+#endif -+ result = open_resume_dev_t(0, 1); -+ -+ if (result) -+ return result; -+ -+ return get_signature_page(); -+} -+ -+static unsigned long raw_to_real(unsigned long raw) -+{ -+ unsigned long extra; -+ -+ extra = (raw * (sizeof(unsigned long) + sizeof(int)) + -+ (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) / -+ (PAGE_SIZE + sizeof(unsigned long) + sizeof(int)); -+ -+ return raw > extra ? raw - extra : 0; -+} -+ -+static unsigned long toi_bio_storage_available(void) -+{ -+ unsigned long sum = 0; -+ struct toi_module_ops *this_module; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled || -+ this_module->type != BIO_ALLOCATOR_MODULE) -+ continue; -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Seeking storage " -+ "available from %s.", this_module->name); -+ sum += this_module->bio_allocator_ops->storage_available(); -+ } -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Total storage available is %lu " -+ "pages (%d header pages).", sum, header_pages_reserved); -+ -+ return sum > header_pages_reserved ? -+ raw_to_real(sum - header_pages_reserved) : 0; -+ -+} -+ -+static unsigned long toi_bio_storage_allocated(void) -+{ -+ return raw_pages_allocd > header_pages_reserved ? -+ raw_to_real(raw_pages_allocd - header_pages_reserved) : 0; -+} -+ -+/* -+ * If we have read part of the image, we might have filled memory with -+ * data that should be zeroed out. -+ */ -+static void toi_bio_noresume_reset(void) -+{ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_noresume_reset."); -+ toi_rw_cleanup(READ); -+ free_all_bdev_info(); -+} -+ -+/** -+ * toi_bio_cleanup - cleanup after some action -+ * @finishing_cycle: Whether completing a cycle. -+ **/ -+static void toi_bio_cleanup(int finishing_cycle) -+{ -+ if (!finishing_cycle) -+ return; -+ -+ if (toi_writer_buffer) { -+ toi_free_page(11, (unsigned long) toi_writer_buffer); -+ toi_writer_buffer = NULL; -+ } -+ -+ forget_signature_page(); -+ -+ if (header_block_device && toi_sig_data && -+ toi_sig_data->header_dev_t != resume_dev_t) -+ toi_close_bdev(header_block_device); -+ -+ header_block_device = NULL; -+ -+ close_resume_dev_t(0); -+} -+ -+static int toi_bio_write_header_init(void) -+{ -+ int result; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_write_header_init"); -+ toi_rw_init(WRITE, 0); -+ toi_writer_buffer_posn = 0; -+ -+ /* Info needed to bootstrap goes at the start of the header. -+ * First we save the positions and devinfo, including the number -+ * of header pages. Then we save the structs containing data needed -+ * for reading the header pages back. -+ * Note that even if header pages take more than one page, when we -+ * read back the info, we will have restored the location of the -+ * next header page by the time we go to use it. -+ */ -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise extent chains."); -+ result = toi_serialise_extent_chains(); -+ -+ if (result) -+ return result; -+ -+ /* -+ * Signature page hasn't been modified at this point. Write it in -+ * the header so we can restore it later. -+ */ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise signature page."); -+ return toi_rw_header_chunk_noreadahead(WRITE, &toi_blockwriter_ops, -+ (char *) toi_cur_sig_page, -+ PAGE_SIZE); -+} -+ -+static int toi_bio_write_header_cleanup(void) -+{ -+ int result = 0; -+ -+ if (toi_writer_buffer_posn) -+ toi_bio_queue_write(&toi_writer_buffer); -+ -+ result = toi_finish_all_io(); -+ -+ unowned = 0; -+ total_header_bytes = 0; -+ -+ /* Set signature to save we have an image */ -+ if (!result) -+ result = toi_bio_mark_have_image(); -+ -+ return result; -+} -+ -+/* -+ * toi_bio_read_header_init() -+ * -+ * Description: -+ * 1. Attempt to read the device specified with resume=. -+ * 2. Check the contents of the swap header for our signature. -+ * 3. Warn, ignore, reset and/or continue as appropriate. -+ * 4. If continuing, read the toi_swap configuration section -+ * of the header and set up block device info so we can read -+ * the rest of the header & image. -+ * -+ * Returns: -+ * May not return if user choose to reboot at a warning. -+ * -EINVAL if cannot resume at this time. Booting should continue -+ * normally. -+ */ -+ -+static int toi_bio_read_header_init(void) -+{ -+ int result = 0; -+ char buf[32]; -+ -+ toi_writer_buffer_posn = 0; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_init"); -+ -+ if (!toi_sig_data) { -+ printk(KERN_INFO "toi_bio_read_header_init called when we " -+ "haven't verified there is an image!\n"); -+ return -EINVAL; -+ } -+ -+ /* -+ * If the header is not on the resume_swap_dev_t, get the resume device -+ * first. -+ */ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Header dev_t is %lx.", -+ toi_sig_data->header_dev_t); -+ if (toi_sig_data->have_uuid) { -+ struct fs_info seek; -+ dev_t device; -+ -+ strncpy((char *) seek.uuid, toi_sig_data->header_uuid, 16); -+ seek.dev_t = toi_sig_data->header_dev_t; -+ seek.last_mount_size = 0; -+ device = blk_lookup_fs_info(&seek); -+ if (device) { -+ printk("Using dev_t %s, returned by blk_lookup_fs_info.\n", -+ format_dev_t(buf, device)); -+ toi_sig_data->header_dev_t = device; -+ } -+ } -+ if (toi_sig_data->header_dev_t != resume_dev_t) { -+ header_block_device = toi_open_bdev(NULL, -+ toi_sig_data->header_dev_t, 1); -+ -+ if (IS_ERR(header_block_device)) -+ return PTR_ERR(header_block_device); -+ } else -+ header_block_device = resume_block_device; -+ -+ if (!toi_writer_buffer) -+ toi_writer_buffer = (char *) toi_get_zeroed_page(11, -+ TOI_ATOMIC_GFP); -+ more_readahead = 1; -+ -+ /* -+ * Read toi_swap configuration. -+ * Headerblock size taken into account already. -+ */ -+ result = toi_bio_ops.bdev_page_io(READ, header_block_device, -+ toi_sig_data->first_header_block, -+ virt_to_page((unsigned long) toi_writer_buffer)); -+ if (result) -+ return result; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "load extent chains."); -+ result = toi_load_extent_chains(); -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "load original signature page."); -+ toi_orig_sig_page = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP); -+ if (!toi_orig_sig_page) { -+ printk(KERN_ERR "Failed to allocate memory for the current" -+ " image signature.\n"); -+ return -ENOMEM; -+ } -+ -+ return toi_rw_header_chunk_noreadahead(READ, &toi_blockwriter_ops, -+ (char *) toi_orig_sig_page, -+ PAGE_SIZE); -+} -+ -+static int toi_bio_read_header_cleanup(void) -+{ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_cleanup."); -+ return toi_rw_cleanup(READ); -+} -+ -+/* Works only for digits and letters, but small and fast */ -+#define TOLOWER(x) ((x) | 0x20) -+ -+/* -+ * UUID must be 32 chars long. It may have dashes, but nothing -+ * else. -+ */ -+char *uuid_from_commandline(char *commandline) -+{ -+ int low = 0; -+ char *result = NULL, *output, *ptr; -+ -+ if (strncmp(commandline, "UUID=", 5)) -+ return NULL; -+ -+ result = kzalloc(17, GFP_KERNEL); -+ if (!result) { -+ printk("Failed to kzalloc UUID text memory.\n"); -+ return NULL; -+ } -+ -+ ptr = commandline + 5; -+ output = result; -+ -+ while (*ptr && (output - result) < 16) { -+ if (isxdigit(*ptr)) { -+ int value = isdigit(*ptr) ? *ptr - '0' : -+ TOLOWER(*ptr) - 'a' + 10; -+ if (low) { -+ *output += value; -+ output++; -+ } else { -+ *output = value << 4; -+ } -+ low = !low; -+ } else if (*ptr != '-') -+ break; -+ ptr++; -+ } -+ -+ if ((output - result) < 16 || *ptr) { -+ printk(KERN_DEBUG "Found resume=UUID=, but the value looks " -+ "invalid.\n"); -+ kfree(result); -+ result = NULL; -+ } -+ -+ return result; -+} -+ -+#define retry_if_fails(command) \ -+do { \ -+ command; \ -+ if (!resume_dev_t && !waited_for_device_probe) { \ -+ wait_for_device_probe(); \ -+ command; \ -+ waited_for_device_probe = 1; \ -+ } \ -+} while(0) -+ -+/** -+ * try_to_open_resume_device: Try to parse and open resume= -+ * -+ * Any "swap:" has been stripped away and we just have the path to deal with. -+ * We attempt to do name_to_dev_t, open and stat the file. Having opened the -+ * file, get the struct block_device * to match. -+ */ -+static int try_to_open_resume_device(char *commandline, int quiet) -+{ -+ struct kstat stat; -+ int error = 0; -+ char *uuid = uuid_from_commandline(commandline); -+ int waited_for_device_probe = 0; -+ -+ resume_dev_t = MKDEV(0, 0); -+ -+ if (!strlen(commandline)) -+ retry_if_fails(toi_bio_scan_for_image(quiet)); -+ -+ if (uuid) { -+ struct fs_info seek; -+ strncpy((char *) &seek.uuid, uuid, 16); -+ seek.dev_t = resume_dev_t; -+ seek.last_mount_size = 0; -+ retry_if_fails(resume_dev_t = blk_lookup_fs_info(&seek)); -+ kfree(uuid); -+ } -+ -+ if (!resume_dev_t) -+ retry_if_fails(resume_dev_t = name_to_dev_t(commandline)); -+ -+ if (!resume_dev_t) { -+ struct file *file = filp_open(commandline, -+ O_RDONLY|O_LARGEFILE, 0); -+ -+ if (!IS_ERR(file) && file) { -+ vfs_getattr(&file->f_path, &stat); -+ filp_close(file, NULL); -+ } else -+ error = vfs_stat(commandline, &stat); -+ if (!error) -+ resume_dev_t = stat.rdev; -+ } -+ -+ if (!resume_dev_t) { -+ if (quiet) -+ return 1; -+ -+ if (test_toi_state(TOI_TRYING_TO_RESUME)) -+ toi_early_boot_message(1, toi_translate_err_default, -+ "Failed to translate \"%s\" into a device id.\n", -+ commandline); -+ else -+ printk("TuxOnIce: Can't translate \"%s\" into a device " -+ "id yet.\n", commandline); -+ return 1; -+ } -+ -+ return open_resume_dev_t(1, quiet); -+} -+ -+/* -+ * Parse Image Location -+ * -+ * Attempt to parse a resume= parameter. -+ * Swap Writer accepts: -+ * resume=[swap:|file:]DEVNAME[:FIRSTBLOCK][@BLOCKSIZE] -+ * -+ * Where: -+ * DEVNAME is convertable to a dev_t by name_to_dev_t -+ * FIRSTBLOCK is the location of the first block in the swap file -+ * (specifying for a swap partition is nonsensical but not prohibited). -+ * Data is validated by attempting to read a swap header from the -+ * location given. Failure will result in toi_swap refusing to -+ * save an image, and a reboot with correct parameters will be -+ * necessary. -+ */ -+static int toi_bio_parse_sig_location(char *commandline, -+ int only_allocator, int quiet) -+{ -+ char *thischar, *devstart, *colon = NULL; -+ int signature_found, result = -EINVAL, temp_result = 0; -+ -+ if (strncmp(commandline, "swap:", 5) && -+ strncmp(commandline, "file:", 5)) { -+ /* -+ * Failing swap:, we'll take a simple resume=/dev/hda2, or a -+ * blank value (scan) but fall through to other allocators -+ * if /dev/ or UUID= isn't matched. -+ */ -+ if (strncmp(commandline, "/dev/", 5) && -+ strncmp(commandline, "UUID=", 5) && -+ strlen(commandline)) -+ return 1; -+ } else -+ commandline += 5; -+ -+ devstart = commandline; -+ thischar = commandline; -+ while ((*thischar != ':') && (*thischar != '@') && -+ ((thischar - commandline) < 250) && (*thischar)) -+ thischar++; -+ -+ if (*thischar == ':') { -+ colon = thischar; -+ *colon = 0; -+ thischar++; -+ } -+ -+ while ((thischar - commandline) < 250 && *thischar) -+ thischar++; -+ -+ if (colon) { -+ unsigned long block; -+ temp_result = strict_strtoul(colon + 1, 0, &block); -+ if (!temp_result) -+ resume_firstblock = (int) block; -+ } else -+ resume_firstblock = 0; -+ -+ clear_toi_state(TOI_CAN_HIBERNATE); -+ clear_toi_state(TOI_CAN_RESUME); -+ -+ if (!temp_result) -+ temp_result = try_to_open_resume_device(devstart, quiet); -+ -+ if (colon) -+ *colon = ':'; -+ -+ /* No error if we only scanned */ -+ if (temp_result) -+ return strlen(commandline) ? -EINVAL : 1; -+ -+ signature_found = toi_bio_image_exists(quiet); -+ -+ if (signature_found != -1) { -+ result = 0; -+ /* -+ * TODO: If only file storage, CAN_HIBERNATE should only be -+ * set if file allocator's target is valid. -+ */ -+ set_toi_state(TOI_CAN_HIBERNATE); -+ set_toi_state(TOI_CAN_RESUME); -+ } else -+ if (!quiet) -+ printk(KERN_ERR "TuxOnIce: Block I/O: No " -+ "signature found at %s.\n", devstart); -+ -+ return result; -+} -+ -+static void toi_bio_release_storage(void) -+{ -+ header_pages_reserved = 0; -+ raw_pages_allocd = 0; -+ -+ free_all_bdev_info(); -+} -+ -+/* toi_swap_remove_image -+ * -+ */ -+static int toi_bio_remove_image(void) -+{ -+ int result; -+ -+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_remove_image."); -+ -+ result = toi_bio_restore_original_signature(); -+ -+ /* -+ * We don't do a sanity check here: we want to restore the swap -+ * whatever version of kernel made the hibernate image. -+ * -+ * We need to write swap, but swap may not be enabled so -+ * we write the device directly -+ * -+ * If we don't have an current_signature_page, we didn't -+ * read an image header, so don't change anything. -+ */ -+ -+ toi_bio_release_storage(); -+ -+ return result; -+} -+ -+struct toi_bio_ops toi_bio_ops = { -+ .bdev_page_io = toi_bdev_page_io, -+ .register_storage = toi_register_storage_chain, -+ .free_storage = toi_bio_release_storage, -+}; -+EXPORT_SYMBOL_GPL(toi_bio_ops); -+ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io, -+ 0, 16384, 0, NULL), -+}; -+ -+struct toi_module_ops toi_blockwriter_ops = { -+ .type = WRITER_MODULE, -+ .name = "block i/o", -+ .directory = "block_io", -+ .module = THIS_MODULE, -+ .memory_needed = toi_bio_memory_needed, -+ .print_debug_info = toi_bio_print_debug_stats, -+ .storage_needed = toi_bio_storage_needed, -+ .save_config_info = toi_bio_save_config_info, -+ .load_config_info = toi_bio_load_config_info, -+ .initialise = toi_bio_initialise, -+ .cleanup = toi_bio_cleanup, -+ .post_atomic_restore = toi_bio_chains_post_atomic, -+ -+ .rw_init = toi_rw_init, -+ .rw_cleanup = toi_rw_cleanup, -+ .read_page = toi_bio_read_page, -+ .write_page = toi_bio_write_page, -+ .rw_header_chunk = toi_rw_header_chunk, -+ .rw_header_chunk_noreadahead = toi_rw_header_chunk_noreadahead, -+ .io_flusher = bio_io_flusher, -+ .update_throughput_throttle = update_throughput_throttle, -+ .finish_all_io = toi_finish_all_io, -+ -+ .noresume_reset = toi_bio_noresume_reset, -+ .storage_available = toi_bio_storage_available, -+ .storage_allocated = toi_bio_storage_allocated, -+ .reserve_header_space = toi_bio_reserve_header_space, -+ .allocate_storage = toi_bio_allocate_storage, -+ .image_exists = toi_bio_image_exists, -+ .mark_resume_attempted = toi_bio_mark_resume_attempted, -+ .write_header_init = toi_bio_write_header_init, -+ .write_header_cleanup = toi_bio_write_header_cleanup, -+ .read_header_init = toi_bio_read_header_init, -+ .read_header_cleanup = toi_bio_read_header_cleanup, -+ .get_header_version = toi_bio_get_header_version, -+ .remove_image = toi_bio_remove_image, -+ .parse_sig_location = toi_bio_parse_sig_location, -+ -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+/** -+ * toi_block_io_load - load time routine for block I/O module -+ * -+ * Register block i/o ops and sysfs entries. -+ **/ -+static __init int toi_block_io_load(void) -+{ -+ return toi_register_module(&toi_blockwriter_ops); -+} -+ -+#ifdef MODULE -+static __exit void toi_block_io_unload(void) -+{ -+ toi_unregister_module(&toi_blockwriter_ops); -+} -+ -+module_init(toi_block_io_load); -+module_exit(toi_block_io_unload); -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Nigel Cunningham"); -+MODULE_DESCRIPTION("TuxOnIce block io functions"); -+#else -+late_initcall(toi_block_io_load); -+#endif -diff --git a/kernel/power/tuxonice_bio_internal.h b/kernel/power/tuxonice_bio_internal.h -new file mode 100644 -index 0000000..b09e176 ---- /dev/null -+++ b/kernel/power/tuxonice_bio_internal.h -@@ -0,0 +1,86 @@ -+/* -+ * kernel/power/tuxonice_bio_internal.h -+ * -+ * Copyright (C) 2009-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ * This file contains declarations for functions exported from -+ * tuxonice_bio.c, which contains low level io functions. -+ */ -+ -+/* Extent chains */ -+void toi_extent_state_goto_start(void); -+void toi_extent_state_save(int slot); -+int go_next_page(int writing, int section_barrier); -+void toi_extent_state_restore(int slot); -+void free_all_bdev_info(void); -+int devices_of_same_priority(struct toi_bdev_info *this); -+int toi_register_storage_chain(struct toi_bdev_info *new); -+int toi_serialise_extent_chains(void); -+int toi_load_extent_chains(void); -+int toi_bio_rw_page(int writing, struct page *page, int is_readahead, -+ int free_group); -+int toi_bio_restore_original_signature(void); -+int toi_bio_devinfo_storage_needed(void); -+unsigned long get_headerblock(void); -+dev_t get_header_dev_t(void); -+struct block_device *get_header_bdev(void); -+int toi_bio_allocate_storage(unsigned long request); -+ -+/* Signature functions */ -+#define HaveImage "HaveImage" -+#define NoImage "TuxOnIce" -+#define sig_size (sizeof(HaveImage)) -+ -+struct sig_data { -+ char sig[sig_size]; -+ int have_image; -+ int resumed_before; -+ -+ char have_uuid; -+ char header_uuid[17]; -+ dev_t header_dev_t; -+ unsigned long first_header_block; -+ -+ /* Repeat the signature to be sure we have a header version */ -+ char sig2[sig_size]; -+ int header_version; -+}; -+ -+void forget_signature_page(void); -+int toi_check_for_signature(void); -+int toi_bio_image_exists(int quiet); -+int get_signature_page(void); -+int toi_bio_mark_resume_attempted(int); -+extern char *toi_cur_sig_page; -+extern char *toi_orig_sig_page; -+int toi_bio_mark_have_image(void); -+extern struct sig_data *toi_sig_data; -+extern dev_t resume_dev_t; -+extern struct block_device *resume_block_device; -+extern struct block_device *header_block_device; -+extern unsigned long resume_firstblock; -+ -+struct block_device *open_bdev(dev_t device, int display_errs); -+extern int current_stream; -+extern int more_readahead; -+int toi_do_io(int writing, struct block_device *bdev, long block0, -+ struct page *page, int is_readahead, int syncio, int free_group); -+int get_main_pool_phys_params(void); -+ -+void toi_close_bdev(struct block_device *bdev); -+struct block_device *toi_open_bdev(char *uuid, dev_t default_device, -+ int display_errs); -+ -+extern struct toi_module_ops toi_blockwriter_ops; -+void dump_block_chains(void); -+void debug_broken_header(void); -+extern unsigned long raw_pages_allocd, header_pages_reserved; -+int toi_bio_chains_debug_info(char *buffer, int size); -+void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd); -+int toi_bio_scan_for_image(int quiet); -+int toi_bio_get_header_version(void); -+ -+void close_resume_dev_t(int force); -+int open_resume_dev_t(int force, int quiet); -diff --git a/kernel/power/tuxonice_bio_signature.c b/kernel/power/tuxonice_bio_signature.c -new file mode 100644 -index 0000000..9985385 ---- /dev/null -+++ b/kernel/power/tuxonice_bio_signature.c -@@ -0,0 +1,403 @@ -+/* -+ * kernel/power/tuxonice_bio_signature.c -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ */ -+ -+#include -+ -+#include "tuxonice.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_prepare_image.h" -+#include "tuxonice_bio.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_io.h" -+#include "tuxonice_builtin.h" -+#include "tuxonice_bio_internal.h" -+ -+struct sig_data *toi_sig_data; -+ -+/* Struct of swap header pages */ -+ -+struct old_sig_data { -+ dev_t device; -+ unsigned long sector; -+ int resume_attempted; -+ int orig_sig_type; -+}; -+ -+union diskpage { -+ union swap_header swh; /* swh.magic is the only member used */ -+ struct sig_data sig_data; -+ struct old_sig_data old_sig_data; -+}; -+ -+union p_diskpage { -+ union diskpage *pointer; -+ char *ptr; -+ unsigned long address; -+}; -+ -+char *toi_cur_sig_page; -+char *toi_orig_sig_page; -+int have_image; -+int have_old_image; -+ -+int get_signature_page(void) -+{ -+ if (!toi_cur_sig_page) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, -+ "Allocating current signature page."); -+ toi_cur_sig_page = (char *) toi_get_zeroed_page(38, -+ TOI_ATOMIC_GFP); -+ if (!toi_cur_sig_page) { -+ printk(KERN_ERR "Failed to allocate memory for the " -+ "current image signature.\n"); -+ return -ENOMEM; -+ } -+ -+ toi_sig_data = (struct sig_data *) toi_cur_sig_page; -+ } -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Reading signature from dev %lx," -+ " sector %d.", -+ resume_block_device->bd_dev, resume_firstblock); -+ -+ return toi_bio_ops.bdev_page_io(READ, resume_block_device, -+ resume_firstblock, virt_to_page(toi_cur_sig_page)); -+} -+ -+void forget_signature_page(void) -+{ -+ if (toi_cur_sig_page) { -+ toi_sig_data = NULL; -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_cur_sig_page" -+ " (%p).", toi_cur_sig_page); -+ toi_free_page(38, (unsigned long) toi_cur_sig_page); -+ toi_cur_sig_page = NULL; -+ } -+ -+ if (toi_orig_sig_page) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_orig_sig_page" -+ " (%p).", toi_orig_sig_page); -+ toi_free_page(38, (unsigned long) toi_orig_sig_page); -+ toi_orig_sig_page = NULL; -+ } -+} -+ -+/* -+ * We need to ensure we use the signature page that's currently on disk, -+ * so as to not remove the image header. Post-atomic-restore, the orig sig -+ * page will be empty, so we can use that as our method of knowing that we -+ * need to load the on-disk signature and not use the non-image sig in -+ * memory. (We're going to powerdown after writing the change, so it's safe. -+ */ -+int toi_bio_mark_resume_attempted(int flag) -+{ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Make resume attempted = %d.", -+ flag); -+ if (!toi_orig_sig_page) { -+ forget_signature_page(); -+ get_signature_page(); -+ } -+ toi_sig_data->resumed_before = flag; -+ return toi_bio_ops.bdev_page_io(WRITE, resume_block_device, -+ resume_firstblock, virt_to_page(toi_cur_sig_page)); -+} -+ -+int toi_bio_mark_have_image(void) -+{ -+ int result = 0; -+ char buf[32]; -+ struct fs_info *fs_info; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that an image exists."); -+ memcpy(toi_sig_data->sig, tuxonice_signature, -+ sizeof(tuxonice_signature)); -+ toi_sig_data->have_image = 1; -+ toi_sig_data->resumed_before = 0; -+ toi_sig_data->header_dev_t = get_header_dev_t(); -+ toi_sig_data->have_uuid = 0; -+ -+ fs_info = fs_info_from_block_dev(get_header_bdev()); -+ if (fs_info && !IS_ERR(fs_info)) { -+ memcpy(toi_sig_data->header_uuid, &fs_info->uuid, 16); -+ free_fs_info(fs_info); -+ } else -+ result = (int) PTR_ERR(fs_info); -+ -+ if (!result) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Got uuid for dev_t %s.", -+ format_dev_t(buf, get_header_dev_t())); -+ toi_sig_data->have_uuid = 1; -+ } else -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Could not get uuid for " -+ "dev_t %s.", -+ format_dev_t(buf, get_header_dev_t())); -+ -+ toi_sig_data->first_header_block = get_headerblock(); -+ have_image = 1; -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is %x. First block " -+ "is %d.", toi_sig_data->header_dev_t, -+ toi_sig_data->first_header_block); -+ -+ memcpy(toi_sig_data->sig2, tuxonice_signature, -+ sizeof(tuxonice_signature)); -+ toi_sig_data->header_version = TOI_HEADER_VERSION; -+ -+ return toi_bio_ops.bdev_page_io(WRITE, resume_block_device, -+ resume_firstblock, virt_to_page(toi_cur_sig_page)); -+} -+ -+int remove_old_signature(void) -+{ -+ union p_diskpage swap_header_page = (union p_diskpage) toi_cur_sig_page; -+ char *orig_sig; -+ char *header_start = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP); -+ int result; -+ struct block_device *header_bdev; -+ struct old_sig_data *old_sig_data = -+ &swap_header_page.pointer->old_sig_data; -+ -+ header_bdev = toi_open_bdev(NULL, old_sig_data->device, 1); -+ result = toi_bio_ops.bdev_page_io(READ, header_bdev, -+ old_sig_data->sector, virt_to_page(header_start)); -+ -+ if (result) -+ goto out; -+ -+ /* -+ * TODO: Get the original contents of the first bytes of the swap -+ * header page. -+ */ -+ if (!old_sig_data->orig_sig_type) -+ orig_sig = "SWAP-SPACE"; -+ else -+ orig_sig = "SWAPSPACE2"; -+ -+ memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10); -+ memcpy(swap_header_page.ptr, header_start, 10); -+ -+ result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device, -+ resume_firstblock, virt_to_page(swap_header_page.ptr)); -+ -+out: -+ toi_close_bdev(header_bdev); -+ have_old_image = 0; -+ toi_free_page(38, (unsigned long) header_start); -+ return result; -+} -+ -+/* -+ * toi_bio_restore_original_signature - restore the original signature -+ * -+ * At boot time (aborting pre atomic-restore), toi_orig_sig_page gets used. -+ * It will have the original signature page contents, stored in the image -+ * header. Post atomic-restore, we use :toi_cur_sig_page, which will contain -+ * the contents that were loaded when we started the cycle. -+ */ -+int toi_bio_restore_original_signature(void) -+{ -+ char *use = toi_orig_sig_page ? toi_orig_sig_page : toi_cur_sig_page; -+ -+ if (have_old_image) -+ return remove_old_signature(); -+ -+ if (!use) { -+ printk("toi_bio_restore_original_signature: No signature " -+ "page loaded.\n"); -+ return 0; -+ } -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that no image exists."); -+ have_image = 0; -+ toi_sig_data->have_image = 0; -+ return toi_bio_ops.bdev_page_io(WRITE, resume_block_device, -+ resume_firstblock, virt_to_page(use)); -+} -+ -+/* -+ * check_for_signature - See whether we have an image. -+ * -+ * Returns 0 if no image, 1 if there is one, -1 if indeterminate. -+ */ -+int toi_check_for_signature(void) -+{ -+ union p_diskpage swap_header_page; -+ int type; -+ const char *normal_sigs[] = {"SWAP-SPACE", "SWAPSPACE2" }; -+ const char *swsusp_sigs[] = {"S1SUSP", "S2SUSP", "S1SUSPEND" }; -+ char *swap_header; -+ -+ if (!toi_cur_sig_page) { -+ int result = get_signature_page(); -+ -+ if (result) -+ return result; -+ } -+ -+ /* -+ * Start by looking for the binary header. -+ */ -+ if (!memcmp(tuxonice_signature, toi_cur_sig_page, -+ sizeof(tuxonice_signature))) { -+ have_image = toi_sig_data->have_image; -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Have binary signature. " -+ "Have image is %d.", have_image); -+ if (have_image) -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is " -+ "%x. First block is %d.", -+ toi_sig_data->header_dev_t, -+ toi_sig_data->first_header_block); -+ return toi_sig_data->have_image; -+ } -+ -+ /* -+ * Failing that, try old file allocator headers. -+ */ -+ -+ if (!memcmp(HaveImage, toi_cur_sig_page, strlen(HaveImage))) { -+ have_image = 1; -+ return 1; -+ } -+ -+ have_image = 0; -+ -+ if (!memcmp(NoImage, toi_cur_sig_page, strlen(NoImage))) -+ return 0; -+ -+ /* -+ * Nope? How about swap? -+ */ -+ swap_header_page = (union p_diskpage) toi_cur_sig_page; -+ swap_header = swap_header_page.pointer->swh.magic.magic; -+ -+ /* Normal swapspace? */ -+ for (type = 0; type < 2; type++) -+ if (!memcmp(normal_sigs[type], swap_header, -+ strlen(normal_sigs[type]))) -+ return 0; -+ -+ /* Swsusp or uswsusp? */ -+ for (type = 0; type < 3; type++) -+ if (!memcmp(swsusp_sigs[type], swap_header, -+ strlen(swsusp_sigs[type]))) -+ return 2; -+ -+ /* Old TuxOnIce version? */ -+ if (!memcmp(tuxonice_signature, swap_header, -+ sizeof(tuxonice_signature) - 1)) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Found old TuxOnIce " -+ "signature."); -+ have_old_image = 1; -+ return 3; -+ } -+ -+ return -1; -+} -+ -+/* -+ * Image_exists -+ * -+ * Returns -1 if don't know, otherwise 0 (no) or 1 (yes). -+ */ -+int toi_bio_image_exists(int quiet) -+{ -+ int result; -+ char *msg = NULL; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_image_exists."); -+ -+ if (!resume_dev_t) { -+ if (!quiet) -+ printk(KERN_INFO "Not even trying to read header " -+ "because resume_dev_t is not set.\n"); -+ return -1; -+ } -+ -+ if (open_resume_dev_t(0, quiet)) -+ return -1; -+ -+ result = toi_check_for_signature(); -+ -+ clear_toi_state(TOI_RESUMED_BEFORE); -+ if (toi_sig_data->resumed_before) -+ set_toi_state(TOI_RESUMED_BEFORE); -+ -+ if (quiet || result == -ENOMEM) -+ return result; -+ -+ if (result == -1) -+ msg = "TuxOnIce: Unable to find a signature." -+ " Could you have moved a swap file?\n"; -+ else if (!result) -+ msg = "TuxOnIce: No image found.\n"; -+ else if (result == 1) -+ msg = "TuxOnIce: Image found.\n"; -+ else if (result == 2) -+ msg = "TuxOnIce: uswsusp or swsusp image found.\n"; -+ else if (result == 3) -+ msg = "TuxOnIce: Old implementation's signature found.\n"; -+ -+ printk(KERN_INFO "%s", msg); -+ -+ return result; -+} -+ -+int toi_bio_scan_for_image(int quiet) -+{ -+ struct block_device *bdev; -+ char default_name[255] = ""; -+ -+ if (!quiet) -+ printk(KERN_DEBUG "Scanning swap devices for TuxOnIce " -+ "signature...\n"); -+ for (bdev = next_bdev_of_type(NULL, "swap"); bdev; -+ bdev = next_bdev_of_type(bdev, "swap")) { -+ int result; -+ char name[255] = ""; -+ sprintf(name, "%u:%u", MAJOR(bdev->bd_dev), -+ MINOR(bdev->bd_dev)); -+ if (!quiet) -+ printk(KERN_DEBUG "- Trying %s.\n", name); -+ resume_block_device = bdev; -+ resume_dev_t = bdev->bd_dev; -+ -+ result = toi_check_for_signature(); -+ -+ resume_block_device = NULL; -+ resume_dev_t = MKDEV(0, 0); -+ -+ if (!default_name[0]) -+ strcpy(default_name, name); -+ -+ if (result == 1) { -+ /* Got one! */ -+ strcpy(resume_file, name); -+ next_bdev_of_type(bdev, NULL); -+ if (!quiet) -+ printk(KERN_DEBUG " ==> Image found on %s.\n", -+ resume_file); -+ return 1; -+ } -+ forget_signature_page(); -+ } -+ -+ if (!quiet) -+ printk(KERN_DEBUG "TuxOnIce scan: No image found.\n"); -+ strcpy(resume_file, default_name); -+ return 0; -+} -+ -+int toi_bio_get_header_version(void) -+{ -+ return (memcmp(toi_sig_data->sig2, tuxonice_signature, -+ sizeof(tuxonice_signature))) ? -+ 0 : toi_sig_data->header_version; -+ -+} -diff --git a/kernel/power/tuxonice_builtin.c b/kernel/power/tuxonice_builtin.c -new file mode 100644 -index 0000000..a565bf6 ---- /dev/null -+++ b/kernel/power/tuxonice_builtin.c -@@ -0,0 +1,445 @@ -+/* -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "tuxonice_io.h" -+#include "tuxonice.h" -+#include "tuxonice_extent.h" -+#include "tuxonice_netlink.h" -+#include "tuxonice_prepare_image.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_pagedir.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_builtin.h" -+#include "tuxonice_power_off.h" -+#include "tuxonice_alloc.h" -+ -+unsigned long toi_bootflags_mask; -+EXPORT_SYMBOL_GPL(toi_bootflags_mask); -+ -+/* -+ * Highmem related functions (x86 only). -+ */ -+ -+#ifdef CONFIG_HIGHMEM -+ -+/** -+ * copyback_high: Restore highmem pages. -+ * -+ * Highmem data and pbe lists are/can be stored in highmem. -+ * The format is slightly different to the lowmem pbe lists -+ * used for the assembly code: the last pbe in each page is -+ * a struct page * instead of struct pbe *, pointing to the -+ * next page where pbes are stored (or NULL if happens to be -+ * the end of the list). Since we don't want to generate -+ * unnecessary deltas against swsusp code, we use a cast -+ * instead of a union. -+ **/ -+ -+static void copyback_high(void) -+{ -+ struct page *pbe_page = (struct page *) restore_highmem_pblist; -+ struct pbe *this_pbe, *first_pbe; -+ unsigned long *origpage, *copypage; -+ int pbe_index = 1; -+ -+ if (!pbe_page) -+ return; -+ -+ this_pbe = (struct pbe *) kmap_atomic(pbe_page); -+ first_pbe = this_pbe; -+ -+ while (this_pbe) { -+ int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1; -+ -+ origpage = kmap_atomic(pfn_to_page((unsigned long) this_pbe->orig_address)); -+ copypage = kmap_atomic((struct page *) this_pbe->address); -+ -+ while (loop >= 0) { -+ *(origpage + loop) = *(copypage + loop); -+ loop--; -+ } -+ -+ kunmap_atomic(origpage); -+ kunmap_atomic(copypage); -+ -+ if (!this_pbe->next) -+ break; -+ -+ if (pbe_index < PBES_PER_PAGE) { -+ this_pbe++; -+ pbe_index++; -+ } else { -+ pbe_page = (struct page *) this_pbe->next; -+ kunmap_atomic(first_pbe); -+ if (!pbe_page) -+ return; -+ this_pbe = (struct pbe *) kmap_atomic(pbe_page); -+ first_pbe = this_pbe; -+ pbe_index = 1; -+ } -+ } -+ kunmap_atomic(first_pbe); -+} -+ -+#else /* CONFIG_HIGHMEM */ -+static void copyback_high(void) { } -+#endif -+ -+char toi_wait_for_keypress_dev_console(int timeout) -+{ -+ int fd, this_timeout = 255; -+ char key = '\0'; -+ struct termios t, t_backup; -+ -+ /* We should be guaranteed /dev/console exists after populate_rootfs() -+ * in init/main.c. -+ */ -+ fd = sys_open("/dev/console", O_RDONLY, 0); -+ if (fd < 0) { -+ printk(KERN_INFO "Couldn't open /dev/console.\n"); -+ return key; -+ } -+ -+ if (sys_ioctl(fd, TCGETS, (long)&t) < 0) -+ goto out_close; -+ -+ memcpy(&t_backup, &t, sizeof(t)); -+ -+ t.c_lflag &= ~(ISIG|ICANON|ECHO); -+ t.c_cc[VMIN] = 0; -+ -+new_timeout: -+ if (timeout > 0) { -+ this_timeout = timeout < 26 ? timeout : 25; -+ timeout -= this_timeout; -+ this_timeout *= 10; -+ } -+ -+ t.c_cc[VTIME] = this_timeout; -+ -+ if (sys_ioctl(fd, TCSETS, (long)&t) < 0) -+ goto out_restore; -+ -+ while (1) { -+ if (sys_read(fd, &key, 1) <= 0) { -+ if (timeout) -+ goto new_timeout; -+ key = '\0'; -+ break; -+ } -+ key = tolower(key); -+ if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) { -+ if (key == 'c') { -+ set_toi_state(TOI_CONTINUE_REQ); -+ break; -+ } else if (key == ' ') -+ break; -+ } else -+ break; -+ } -+ -+out_restore: -+ sys_ioctl(fd, TCSETS, (long)&t_backup); -+out_close: -+ sys_close(fd); -+ -+ return key; -+} -+EXPORT_SYMBOL_GPL(toi_wait_for_keypress_dev_console); -+ -+struct toi_boot_kernel_data toi_bkd __nosavedata -+ __attribute__((aligned(PAGE_SIZE))) = { -+ MY_BOOT_KERNEL_DATA_VERSION, -+ 0, -+#ifdef CONFIG_TOI_REPLACE_SWSUSP -+ (1 << TOI_REPLACE_SWSUSP) | -+#endif -+ (1 << TOI_NO_FLUSHER_THREAD) | -+ (1 << TOI_PAGESET2_FULL) | (1 << TOI_LATE_CPU_HOTPLUG), -+}; -+EXPORT_SYMBOL_GPL(toi_bkd); -+ -+struct block_device *toi_open_by_devnum(dev_t dev) -+{ -+ struct block_device *bdev = bdget(dev); -+ int err = -ENOMEM; -+ if (bdev) -+ err = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); -+ return err ? ERR_PTR(err) : bdev; -+} -+EXPORT_SYMBOL_GPL(toi_open_by_devnum); -+ -+/** -+ * toi_close_bdev: Close a swap bdev. -+ * -+ * int: The swap entry number to close. -+ */ -+void toi_close_bdev(struct block_device *bdev) -+{ -+ blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); -+} -+EXPORT_SYMBOL_GPL(toi_close_bdev); -+ -+int toi_wait = CONFIG_TOI_DEFAULT_WAIT; -+EXPORT_SYMBOL_GPL(toi_wait); -+ -+struct toi_core_fns *toi_core_fns; -+EXPORT_SYMBOL_GPL(toi_core_fns); -+ -+unsigned long toi_result; -+EXPORT_SYMBOL_GPL(toi_result); -+ -+struct pagedir pagedir1 = {1}; -+EXPORT_SYMBOL_GPL(pagedir1); -+ -+unsigned long toi_get_nonconflicting_page(void) -+{ -+ return toi_core_fns->get_nonconflicting_page(); -+} -+ -+int toi_post_context_save(void) -+{ -+ return toi_core_fns->post_context_save(); -+} -+ -+int try_tuxonice_hibernate(void) -+{ -+ if (!toi_core_fns) -+ return -ENODEV; -+ -+ return toi_core_fns->try_hibernate(); -+} -+ -+static int num_resume_calls; -+#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL -+static int ignore_late_initcall = 1; -+#else -+static int ignore_late_initcall; -+#endif -+ -+int toi_translate_err_default = TOI_CONTINUE_REQ; -+EXPORT_SYMBOL_GPL(toi_translate_err_default); -+ -+void try_tuxonice_resume(void) -+{ -+ /* Don't let it wrap around eventually */ -+ if (num_resume_calls < 2) -+ num_resume_calls++; -+ -+ if (num_resume_calls == 1 && ignore_late_initcall) { -+ printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n"); -+ return; -+ } -+ -+ if (toi_core_fns) -+ toi_core_fns->try_resume(); -+ else -+ printk(KERN_INFO "TuxOnIce core not loaded yet.\n"); -+} -+ -+int toi_lowlevel_builtin(void) -+{ -+ int error = 0; -+ -+ save_processor_state(); -+ error = swsusp_arch_suspend(); -+ if (error) -+ printk(KERN_ERR "Error %d hibernating\n", error); -+ -+ /* Restore control flow appears here */ -+ if (!toi_in_hibernate) { -+ copyback_high(); -+ set_toi_state(TOI_NOW_RESUMING); -+ } -+ -+ restore_processor_state(); -+ return error; -+} -+EXPORT_SYMBOL_GPL(toi_lowlevel_builtin); -+ -+unsigned long toi_compress_bytes_in; -+EXPORT_SYMBOL_GPL(toi_compress_bytes_in); -+ -+unsigned long toi_compress_bytes_out; -+EXPORT_SYMBOL_GPL(toi_compress_bytes_out); -+ -+int toi_in_suspend(void) -+{ -+ return in_suspend; -+} -+EXPORT_SYMBOL_GPL(toi_in_suspend); -+ -+unsigned long toi_state = ((1 << TOI_BOOT_TIME) | -+ (1 << TOI_IGNORE_LOGLEVEL) | -+ (1 << TOI_IO_STOPPED)); -+EXPORT_SYMBOL_GPL(toi_state); -+ -+/* The number of hibernates we have started (some may have been cancelled) */ -+unsigned int nr_hibernates; -+EXPORT_SYMBOL_GPL(nr_hibernates); -+ -+int toi_running; -+EXPORT_SYMBOL_GPL(toi_running); -+ -+__nosavedata int toi_in_hibernate; -+EXPORT_SYMBOL_GPL(toi_in_hibernate); -+ -+__nosavedata struct pbe *restore_highmem_pblist; -+EXPORT_SYMBOL_GPL(restore_highmem_pblist); -+ -+int toi_trace_allocs; -+EXPORT_SYMBOL_GPL(toi_trace_allocs); -+ -+void toi_read_lock_tasklist(void) -+{ -+ read_lock(&tasklist_lock); -+} -+EXPORT_SYMBOL_GPL(toi_read_lock_tasklist); -+ -+void toi_read_unlock_tasklist(void) -+{ -+ read_unlock(&tasklist_lock); -+} -+EXPORT_SYMBOL_GPL(toi_read_unlock_tasklist); -+ -+#ifdef CONFIG_TOI_ZRAM_SUPPORT -+int (*toi_flag_zram_disks) (void); -+EXPORT_SYMBOL_GPL(toi_flag_zram_disks); -+ -+int toi_do_flag_zram_disks(void) -+{ -+ return toi_flag_zram_disks ? (*toi_flag_zram_disks)() : 0; -+} -+EXPORT_SYMBOL_GPL(toi_do_flag_zram_disks); -+#endif -+ -+static int __init toi_wait_setup(char *str) -+{ -+ int value; -+ -+ if (sscanf(str, "=%d", &value)) { -+ if (value < -1 || value > 255) -+ printk(KERN_INFO "TuxOnIce_wait outside range -1 to " -+ "255.\n"); -+ else -+ toi_wait = value; -+ } -+ -+ return 1; -+} -+ -+__setup("toi_wait", toi_wait_setup); -+ -+static int __init toi_translate_retry_setup(char *str) -+{ -+ toi_translate_err_default = 0; -+ return 1; -+} -+ -+__setup("toi_translate_retry", toi_translate_retry_setup); -+ -+static int __init toi_debug_setup(char *str) -+{ -+ toi_bkd.toi_action |= (1 << TOI_LOGALL); -+ toi_bootflags_mask |= (1 << TOI_LOGALL); -+ toi_bkd.toi_debug_state = 255; -+ toi_bkd.toi_default_console_level = 7; -+ return 1; -+} -+ -+__setup("toi_debug_setup", toi_debug_setup); -+ -+static int __init toi_pause_setup(char *str) -+{ -+ toi_bkd.toi_action |= (1 << TOI_PAUSE); -+ toi_bootflags_mask |= (1 << TOI_PAUSE); -+ return 1; -+} -+ -+__setup("toi_pause", toi_pause_setup); -+ -+#ifdef CONFIG_PM_DEBUG -+static int __init toi_trace_allocs_setup(char *str) -+{ -+ int value; -+ -+ if (sscanf(str, "=%d", &value)) -+ toi_trace_allocs = value; -+ -+ return 1; -+} -+__setup("toi_trace_allocs", toi_trace_allocs_setup); -+#endif -+ -+static int __init toi_ignore_late_initcall_setup(char *str) -+{ -+ int value; -+ -+ if (sscanf(str, "=%d", &value)) -+ ignore_late_initcall = value; -+ -+ return 1; -+} -+ -+__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup); -+ -+static int __init toi_force_no_multithreaded_setup(char *str) -+{ -+ int value; -+ -+ toi_bkd.toi_action &= ~(1 << TOI_NO_MULTITHREADED_IO); -+ toi_bootflags_mask |= (1 << TOI_NO_MULTITHREADED_IO); -+ -+ if (sscanf(str, "=%d", &value) && value) -+ toi_bkd.toi_action |= (1 << TOI_NO_MULTITHREADED_IO); -+ -+ return 1; -+} -+ -+__setup("toi_no_multithreaded", toi_force_no_multithreaded_setup); -+ -+#ifdef CONFIG_KGDB -+static int __init toi_post_resume_breakpoint_setup(char *str) -+{ -+ int value; -+ -+ toi_bkd.toi_action &= ~(1 << TOI_POST_RESUME_BREAKPOINT); -+ toi_bootflags_mask |= (1 << TOI_POST_RESUME_BREAKPOINT); -+ if (sscanf(str, "=%d", &value) && value) -+ toi_bkd.toi_action |= (1 << TOI_POST_RESUME_BREAKPOINT); -+ -+ return 1; -+} -+ -+__setup("toi_post_resume_break", toi_post_resume_breakpoint_setup); -+#endif -+ -+static int __init toi_disable_readahead_setup(char *str) -+{ -+ int value; -+ -+ toi_bkd.toi_action &= ~(1 << TOI_NO_READAHEAD); -+ toi_bootflags_mask |= (1 << TOI_NO_READAHEAD); -+ if (sscanf(str, "=%d", &value) && value) -+ toi_bkd.toi_action |= (1 << TOI_NO_READAHEAD); -+ -+ return 1; -+} -+ -+__setup("toi_no_readahead", toi_disable_readahead_setup); -diff --git a/kernel/power/tuxonice_builtin.h b/kernel/power/tuxonice_builtin.h -new file mode 100644 -index 0000000..6a1fd41 ---- /dev/null -+++ b/kernel/power/tuxonice_builtin.h -@@ -0,0 +1,39 @@ -+/* -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ */ -+#include -+ -+extern struct toi_core_fns *toi_core_fns; -+extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out; -+extern unsigned int nr_hibernates; -+extern int toi_in_hibernate; -+ -+extern __nosavedata struct pbe *restore_highmem_pblist; -+ -+int toi_lowlevel_builtin(void); -+ -+#ifdef CONFIG_HIGHMEM -+extern __nosavedata struct zone_data *toi_nosave_zone_list; -+extern __nosavedata unsigned long toi_nosave_max_pfn; -+#endif -+ -+extern unsigned long toi_get_nonconflicting_page(void); -+extern int toi_post_context_save(void); -+ -+extern char toi_wait_for_keypress_dev_console(int timeout); -+extern struct block_device *toi_open_by_devnum(dev_t dev); -+extern void toi_close_bdev(struct block_device *bdev); -+extern int toi_wait; -+extern int toi_translate_err_default; -+extern int toi_force_no_multithreaded; -+extern void toi_read_lock_tasklist(void); -+extern void toi_read_unlock_tasklist(void); -+extern int toi_in_suspend(void); -+ -+#ifdef CONFIG_TOI_ZRAM_SUPPORT -+extern int toi_do_flag_zram_disks(void); -+#else -+#define toi_do_flag_zram_disks() (0) -+#endif -diff --git a/kernel/power/tuxonice_checksum.c b/kernel/power/tuxonice_checksum.c -new file mode 100644 -index 0000000..305475c ---- /dev/null -+++ b/kernel/power/tuxonice_checksum.c -@@ -0,0 +1,384 @@ -+/* -+ * kernel/power/tuxonice_checksum.c -+ * -+ * Copyright (C) 2006-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * This file contains data checksum routines for TuxOnIce, -+ * using cryptoapi. They are used to locate any modifications -+ * made to pageset 2 while we're saving it. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "tuxonice.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_io.h" -+#include "tuxonice_pageflags.h" -+#include "tuxonice_checksum.h" -+#include "tuxonice_pagedir.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_ui.h" -+ -+static struct toi_module_ops toi_checksum_ops; -+ -+/* Constant at the mo, but I might allow tuning later */ -+static char toi_checksum_name[32] = "md4"; -+/* Bytes per checksum */ -+#define CHECKSUM_SIZE (16) -+ -+#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE) -+ -+struct cpu_context { -+ struct crypto_hash *transform; -+ struct hash_desc desc; -+ struct scatterlist sg[2]; -+ char *buf; -+}; -+ -+static DEFINE_PER_CPU(struct cpu_context, contexts); -+static int pages_allocated; -+static unsigned long page_list; -+ -+static int toi_num_resaved; -+ -+static unsigned long this_checksum, next_page; -+static int checksum_index; -+ -+static inline int checksum_pages_needed(void) -+{ -+ return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE); -+} -+ -+/* ---- Local buffer management ---- */ -+ -+/* -+ * toi_checksum_cleanup -+ * -+ * Frees memory allocated for our labours. -+ */ -+static void toi_checksum_cleanup(int ending_cycle) -+{ -+ int cpu; -+ -+ if (ending_cycle) { -+ for_each_online_cpu(cpu) { -+ struct cpu_context *this = &per_cpu(contexts, cpu); -+ if (this->transform) { -+ crypto_free_hash(this->transform); -+ this->transform = NULL; -+ this->desc.tfm = NULL; -+ } -+ -+ if (this->buf) { -+ toi_free_page(27, (unsigned long) this->buf); -+ this->buf = NULL; -+ } -+ } -+ } -+} -+ -+/* -+ * toi_crypto_initialise -+ * -+ * Prepare to do some work by allocating buffers and transforms. -+ * Returns: Int: Zero. Even if we can't set up checksum, we still -+ * seek to hibernate. -+ */ -+static int toi_checksum_initialise(int starting_cycle) -+{ -+ int cpu; -+ -+ if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled) -+ return 0; -+ -+ if (!*toi_checksum_name) { -+ printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n"); -+ return 1; -+ } -+ -+ for_each_online_cpu(cpu) { -+ struct cpu_context *this = &per_cpu(contexts, cpu); -+ struct page *page; -+ -+ this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0); -+ if (IS_ERR(this->transform)) { -+ printk(KERN_INFO "TuxOnIce: Failed to initialise the " -+ "%s checksum algorithm: %ld.\n", -+ toi_checksum_name, (long) this->transform); -+ this->transform = NULL; -+ return 1; -+ } -+ -+ this->desc.tfm = this->transform; -+ this->desc.flags = 0; -+ -+ page = toi_alloc_page(27, GFP_KERNEL); -+ if (!page) -+ return 1; -+ this->buf = page_address(page); -+ sg_init_one(&this->sg[0], this->buf, PAGE_SIZE); -+ } -+ return 0; -+} -+ -+/* -+ * toi_checksum_print_debug_stats -+ * @buffer: Pointer to a buffer into which the debug info will be printed. -+ * @size: Size of the buffer. -+ * -+ * Print information to be recorded for debugging purposes into a buffer. -+ * Returns: Number of characters written to the buffer. -+ */ -+ -+static int toi_checksum_print_debug_stats(char *buffer, int size) -+{ -+ int len; -+ -+ if (!toi_checksum_ops.enabled) -+ return scnprintf(buffer, size, -+ "- Checksumming disabled.\n"); -+ -+ len = scnprintf(buffer, size, "- Checksum method is '%s'.\n", -+ toi_checksum_name); -+ len += scnprintf(buffer + len, size - len, -+ " %d pages resaved in atomic copy.\n", toi_num_resaved); -+ return len; -+} -+ -+static int toi_checksum_memory_needed(void) -+{ -+ return toi_checksum_ops.enabled ? -+ checksum_pages_needed() << PAGE_SHIFT : 0; -+} -+ -+static int toi_checksum_storage_needed(void) -+{ -+ if (toi_checksum_ops.enabled) -+ return strlen(toi_checksum_name) + sizeof(int) + 1; -+ else -+ return 0; -+} -+ -+/* -+ * toi_checksum_save_config_info -+ * @buffer: Pointer to a buffer of size PAGE_SIZE. -+ * -+ * Save informaton needed when reloading the image at resume time. -+ * Returns: Number of bytes used for saving our data. -+ */ -+static int toi_checksum_save_config_info(char *buffer) -+{ -+ int namelen = strlen(toi_checksum_name) + 1; -+ int total_len; -+ -+ *((unsigned int *) buffer) = namelen; -+ strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen); -+ total_len = sizeof(unsigned int) + namelen; -+ return total_len; -+} -+ -+/* toi_checksum_load_config_info -+ * @buffer: Pointer to the start of the data. -+ * @size: Number of bytes that were saved. -+ * -+ * Description: Reload information needed for dechecksuming the image at -+ * resume time. -+ */ -+static void toi_checksum_load_config_info(char *buffer, int size) -+{ -+ int namelen; -+ -+ namelen = *((unsigned int *) (buffer)); -+ strncpy(toi_checksum_name, buffer + sizeof(unsigned int), -+ namelen); -+ return; -+} -+ -+/* -+ * Free Checksum Memory -+ */ -+ -+void free_checksum_pages(void) -+{ -+ while (pages_allocated) { -+ unsigned long next = *((unsigned long *) page_list); -+ ClearPageNosave(virt_to_page(page_list)); -+ toi_free_page(15, (unsigned long) page_list); -+ page_list = next; -+ pages_allocated--; -+ } -+} -+ -+/* -+ * Allocate Checksum Memory -+ */ -+ -+int allocate_checksum_pages(void) -+{ -+ int pages_needed = checksum_pages_needed(); -+ -+ if (!toi_checksum_ops.enabled) -+ return 0; -+ -+ while (pages_allocated < pages_needed) { -+ unsigned long *new_page = -+ (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP); -+ if (!new_page) { -+ printk(KERN_ERR "Unable to allocate checksum pages.\n"); -+ return -ENOMEM; -+ } -+ SetPageNosave(virt_to_page(new_page)); -+ (*new_page) = page_list; -+ page_list = (unsigned long) new_page; -+ pages_allocated++; -+ } -+ -+ next_page = (unsigned long) page_list; -+ checksum_index = 0; -+ -+ return 0; -+} -+ -+char *tuxonice_get_next_checksum(void) -+{ -+ if (!toi_checksum_ops.enabled) -+ return NULL; -+ -+ if (checksum_index % CHECKSUMS_PER_PAGE) -+ this_checksum += CHECKSUM_SIZE; -+ else { -+ this_checksum = next_page + sizeof(void *); -+ next_page = *((unsigned long *) next_page); -+ } -+ -+ checksum_index++; -+ return (char *) this_checksum; -+} -+ -+int tuxonice_calc_checksum(struct page *page, char *checksum_locn) -+{ -+ char *pa; -+ int result, cpu = smp_processor_id(); -+ struct cpu_context *ctx = &per_cpu(contexts, cpu); -+ -+ if (!toi_checksum_ops.enabled) -+ return 0; -+ -+ pa = kmap(page); -+ memcpy(ctx->buf, pa, PAGE_SIZE); -+ kunmap(page); -+ result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE, -+ checksum_locn); -+ if (result) -+ printk(KERN_ERR "TuxOnIce checksumming: crypto_hash_digest " -+ "returned %d.\n", result); -+ return result; -+} -+/* -+ * Calculate checksums -+ */ -+ -+void check_checksums(void) -+{ -+ int pfn, index = 0, cpu = smp_processor_id(); -+ char current_checksum[CHECKSUM_SIZE]; -+ struct cpu_context *ctx = &per_cpu(contexts, cpu); -+ -+ if (!toi_checksum_ops.enabled) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Checksumming disabled."); -+ return; -+ } -+ -+ next_page = (unsigned long) page_list; -+ -+ toi_num_resaved = 0; -+ this_checksum = 0; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Verifying checksums."); -+ memory_bm_position_reset(pageset2_map); -+ for (pfn = memory_bm_next_pfn(pageset2_map); pfn != BM_END_OF_MAP; -+ pfn = memory_bm_next_pfn(pageset2_map)) { -+ int ret; -+ char *pa; -+ struct page *page = pfn_to_page(pfn); -+ -+ if (index % CHECKSUMS_PER_PAGE) { -+ this_checksum += CHECKSUM_SIZE; -+ } else { -+ this_checksum = next_page + sizeof(void *); -+ next_page = *((unsigned long *) next_page); -+ } -+ -+ /* Done when IRQs disabled so must be atomic */ -+ pa = kmap_atomic(page); -+ memcpy(ctx->buf, pa, PAGE_SIZE); -+ kunmap_atomic(pa); -+ ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE, -+ current_checksum); -+ -+ if (ret) { -+ printk(KERN_INFO "Digest failed. Returned %d.\n", ret); -+ return; -+ } -+ -+ if (memcmp(current_checksum, (char *) this_checksum, -+ CHECKSUM_SIZE)) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Resaving %ld.", -+ pfn); -+ SetPageResave(pfn_to_page(pfn)); -+ toi_num_resaved++; -+ if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED)) -+ set_abort_result(TOI_RESAVE_NEEDED); -+ } -+ -+ index++; -+ } -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Checksum verification complete."); -+} -+ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0, -+ NULL), -+ SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_ABORT_ON_RESAVE_NEEDED, 0) -+}; -+ -+/* -+ * Ops structure. -+ */ -+static struct toi_module_ops toi_checksum_ops = { -+ .type = MISC_MODULE, -+ .name = "checksumming", -+ .directory = "checksum", -+ .module = THIS_MODULE, -+ .initialise = toi_checksum_initialise, -+ .cleanup = toi_checksum_cleanup, -+ .print_debug_info = toi_checksum_print_debug_stats, -+ .save_config_info = toi_checksum_save_config_info, -+ .load_config_info = toi_checksum_load_config_info, -+ .memory_needed = toi_checksum_memory_needed, -+ .storage_needed = toi_checksum_storage_needed, -+ -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+/* ---- Registration ---- */ -+int toi_checksum_init(void) -+{ -+ int result = toi_register_module(&toi_checksum_ops); -+ return result; -+} -+ -+void toi_checksum_exit(void) -+{ -+ toi_unregister_module(&toi_checksum_ops); -+} -diff --git a/kernel/power/tuxonice_checksum.h b/kernel/power/tuxonice_checksum.h -new file mode 100644 -index 0000000..08d3e7a ---- /dev/null -+++ b/kernel/power/tuxonice_checksum.h -@@ -0,0 +1,31 @@ -+/* -+ * kernel/power/tuxonice_checksum.h -+ * -+ * Copyright (C) 2006-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * This file contains data checksum routines for TuxOnIce, -+ * using cryptoapi. They are used to locate any modifications -+ * made to pageset 2 while we're saving it. -+ */ -+ -+#if defined(CONFIG_TOI_CHECKSUM) -+extern int toi_checksum_init(void); -+extern void toi_checksum_exit(void); -+void check_checksums(void); -+int allocate_checksum_pages(void); -+void free_checksum_pages(void); -+char *tuxonice_get_next_checksum(void); -+int tuxonice_calc_checksum(struct page *page, char *checksum_locn); -+#else -+static inline int toi_checksum_init(void) { return 0; } -+static inline void toi_checksum_exit(void) { } -+static inline void check_checksums(void) { }; -+static inline int allocate_checksum_pages(void) { return 0; }; -+static inline void free_checksum_pages(void) { }; -+static inline char *tuxonice_get_next_checksum(void) { return NULL; }; -+static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn) -+ { return 0; } -+#endif -+ -diff --git a/kernel/power/tuxonice_cluster.c b/kernel/power/tuxonice_cluster.c -new file mode 100644 -index 0000000..c504227 ---- /dev/null -+++ b/kernel/power/tuxonice_cluster.c -@@ -0,0 +1,1069 @@ -+/* -+ * kernel/power/tuxonice_cluster.c -+ * -+ * Copyright (C) 2006-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * This file contains routines for cluster hibernation support. -+ * -+ * Based on ip autoconfiguration code in net/ipv4/ipconfig.c. -+ * -+ * How does it work? -+ * -+ * There is no 'master' node that tells everyone else what to do. All nodes -+ * send messages to the broadcast address/port, maintain a list of peers -+ * and figure out when to progress to the next step in hibernating or resuming. -+ * This makes us more fault tolerant when it comes to nodes coming and going -+ * (which may be more of an issue if we're hibernating when power supplies -+ * are being unreliable). -+ * -+ * At boot time, we start a ktuxonice thread that handles communication with -+ * other nodes. This node maintains a state machine that controls our progress -+ * through hibernating and resuming, keeping us in step with other nodes. Nodes -+ * are identified by their hw address. -+ * -+ * On startup, the node sends CLUSTER_PING on the configured interface's -+ * broadcast address, port $toi_cluster_port (see below) and begins to listen -+ * for other broadcast messages. CLUSTER_PING messages are repeated at -+ * intervals of 5 minutes, with a random offset to spread traffic out. -+ * -+ * A hibernation cycle is initiated from any node via -+ * -+ * echo > /sys/power/tuxonice/do_hibernate -+ * -+ * and (possibily) the hibernate script. At each step of the process, the node -+ * completes its work, and waits for all other nodes to signal completion of -+ * their work (or timeout) before progressing to the next step. -+ * -+ * Request/state Action before reply Possible reply Next state -+ * HIBERNATE capable, pre-script HIBERNATE|ACK NODE_PREP -+ * HIBERNATE|NACK INIT_0 -+ * -+ * PREP prepare_image PREP|ACK IMAGE_WRITE -+ * PREP|NACK INIT_0 -+ * ABORT RUNNING -+ * -+ * IO write image IO|ACK power off -+ * ABORT POST_RESUME -+ * -+ * (Boot time) check for image IMAGE|ACK RESUME_PREP -+ * (Note 1) -+ * IMAGE|NACK (Note 2) -+ * -+ * PREP prepare read image PREP|ACK IMAGE_READ -+ * PREP|NACK (As NACK_IMAGE) -+ * -+ * IO read image IO|ACK POST_RESUME -+ * -+ * POST_RESUME thaw, post-script RUNNING -+ * -+ * INIT_0 init 0 -+ * -+ * Other messages: -+ * -+ * - PING: Request for all other live nodes to send a PONG. Used at startup to -+ * announce presence, when a node is suspected dead and periodically, in case -+ * segments of the network are [un]plugged. -+ * -+ * - PONG: Response to a PING. -+ * -+ * - ABORT: Request to cancel writing an image. -+ * -+ * - BYE: Notification that this node is shutting down. -+ * -+ * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that -+ * nodes which are slower to start up can get state synchronised. If a node -+ * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send -+ * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it -+ * must invalidate its image (if any) and boot normally. -+ * -+ * Note 2: May occur when one node lost power or powered off while others -+ * hibernated. This node waits for others to complete resuming (ACK_READ) -+ * before completing its boot, so that it appears as a fail node restarting. -+ * -+ * If any node has an image, then it also has a list of nodes that hibernated -+ * in synchronisation with it. The node will wait for other nodes to appear -+ * or timeout before beginning its restoration. -+ * -+ * If a node has no image, it needs to wait, in case other nodes which do have -+ * an image are going to resume, but are taking longer to announce their -+ * presence. For this reason, the user can specify a timeout value and a number -+ * of nodes detected before we just continue. (We might want to assume in a -+ * cluster of, say, 15 nodes, if 8 others have booted without finding an image, -+ * the remaining nodes will too. This might help in situations where some nodes -+ * are much slower to boot, or more subject to hardware failures or such like). -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "tuxonice.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_io.h" -+ -+#if 1 -+#define PRINTK(a, b...) do { printk(a, ##b); } while (0) -+#else -+#define PRINTK(a, b...) do { } while (0) -+#endif -+ -+static int loopback_mode; -+static int num_local_nodes = 1; -+#define MAX_LOCAL_NODES 8 -+#define SADDR (loopback_mode ? b->sid : h->saddr) -+ -+#define MYNAME "TuxOnIce Clustering" -+ -+enum cluster_message { -+ MSG_ACK = 1, -+ MSG_NACK = 2, -+ MSG_PING = 4, -+ MSG_ABORT = 8, -+ MSG_BYE = 16, -+ MSG_HIBERNATE = 32, -+ MSG_IMAGE = 64, -+ MSG_IO = 128, -+ MSG_RUNNING = 256 -+}; -+ -+static char *str_message(int message) -+{ -+ switch (message) { -+ case 4: -+ return "Ping"; -+ case 8: -+ return "Abort"; -+ case 9: -+ return "Abort acked"; -+ case 10: -+ return "Abort nacked"; -+ case 16: -+ return "Bye"; -+ case 17: -+ return "Bye acked"; -+ case 18: -+ return "Bye nacked"; -+ case 32: -+ return "Hibernate request"; -+ case 33: -+ return "Hibernate ack"; -+ case 34: -+ return "Hibernate nack"; -+ case 64: -+ return "Image exists?"; -+ case 65: -+ return "Image does exist"; -+ case 66: -+ return "No image here"; -+ case 128: -+ return "I/O"; -+ case 129: -+ return "I/O okay"; -+ case 130: -+ return "I/O failed"; -+ case 256: -+ return "Running"; -+ default: -+ printk(KERN_ERR "Unrecognised message %d.\n", message); -+ return "Unrecognised message (see dmesg)"; -+ } -+} -+ -+#define MSG_ACK_MASK (MSG_ACK | MSG_NACK) -+#define MSG_STATE_MASK (~MSG_ACK_MASK) -+ -+struct node_info { -+ struct list_head member_list; -+ wait_queue_head_t member_events; -+ spinlock_t member_list_lock; -+ spinlock_t receive_lock; -+ int peer_count, ignored_peer_count; -+ struct toi_sysfs_data sysfs_data; -+ enum cluster_message current_message; -+}; -+ -+struct node_info node_array[MAX_LOCAL_NODES]; -+ -+struct cluster_member { -+ __be32 addr; -+ enum cluster_message message; -+ struct list_head list; -+ int ignore; -+}; -+ -+#define toi_cluster_port_send 3501 -+#define toi_cluster_port_recv 3502 -+ -+static struct net_device *net_dev; -+static struct toi_module_ops toi_cluster_ops; -+ -+static int toi_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pt, struct net_device *orig_dev); -+ -+static struct packet_type toi_cluster_packet_type = { -+ .type = __constant_htons(ETH_P_IP), -+ .func = toi_recv, -+}; -+ -+struct toi_pkt { /* BOOTP packet format */ -+ struct iphdr iph; /* IP header */ -+ struct udphdr udph; /* UDP header */ -+ u8 htype; /* HW address type */ -+ u8 hlen; /* HW address length */ -+ __be32 xid; /* Transaction ID */ -+ __be16 secs; /* Seconds since we started */ -+ __be16 flags; /* Just what it says */ -+ u8 hw_addr[16]; /* Sender's HW address */ -+ u16 message; /* Message */ -+ unsigned long sid; /* Source ID for loopback testing */ -+}; -+ -+static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE; -+ -+static int added_pack; -+ -+static int others_have_image; -+ -+/* Key used to allow multiple clusters on the same lan */ -+static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY; -+static char pre_hibernate_script[255] = -+ CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE; -+static char post_hibernate_script[255] = -+ CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE; -+ -+/* List of cluster members */ -+static unsigned long continue_delay = 5 * HZ; -+static unsigned long cluster_message_timeout = 3 * HZ; -+ -+/* === Membership list === */ -+ -+static void print_member_info(int index) -+{ -+ struct cluster_member *this; -+ -+ printk(KERN_INFO "==> Dumping node %d.\n", index); -+ -+ list_for_each_entry(this, &node_array[index].member_list, list) -+ printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n", -+ NIPQUAD(this->addr), -+ str_message(this->message), -+ this->ignore ? "(Ignored)" : ""); -+ printk(KERN_INFO "== Done ==\n"); -+} -+ -+static struct cluster_member *__find_member(int index, __be32 addr) -+{ -+ struct cluster_member *this; -+ -+ list_for_each_entry(this, &node_array[index].member_list, list) { -+ if (this->addr != addr) -+ continue; -+ -+ return this; -+ } -+ -+ return NULL; -+} -+ -+static void set_ignore(int index, __be32 addr, struct cluster_member *this) -+{ -+ if (this->ignore) { -+ PRINTK("Node %d already ignoring %d.%d.%d.%d.\n", -+ index, NIPQUAD(addr)); -+ return; -+ } -+ -+ PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n", -+ index, NIPQUAD(addr)); -+ this->ignore = 1; -+ node_array[index].ignored_peer_count++; -+} -+ -+static int __add_update_member(int index, __be32 addr, int message) -+{ -+ struct cluster_member *this; -+ -+ this = __find_member(index, addr); -+ if (this) { -+ if (this->message != message) { -+ this->message = message; -+ if ((message & MSG_NACK) && -+ (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO))) -+ set_ignore(index, addr, this); -+ PRINTK("Node %d sees node %d.%d.%d.%d now sending " -+ "%s.\n", index, NIPQUAD(addr), -+ str_message(message)); -+ wake_up(&node_array[index].member_events); -+ } -+ return 0; -+ } -+ -+ this = (struct cluster_member *) toi_kzalloc(36, -+ sizeof(struct cluster_member), GFP_KERNEL); -+ -+ if (!this) -+ return -1; -+ -+ this->addr = addr; -+ this->message = message; -+ this->ignore = 0; -+ INIT_LIST_HEAD(&this->list); -+ -+ node_array[index].peer_count++; -+ -+ PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index, -+ NIPQUAD(addr), str_message(message)); -+ -+ if ((message & MSG_NACK) && -+ (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO))) -+ set_ignore(index, addr, this); -+ list_add_tail(&this->list, &node_array[index].member_list); -+ return 1; -+} -+ -+static int add_update_member(int index, __be32 addr, int message) -+{ -+ int result; -+ unsigned long flags; -+ spin_lock_irqsave(&node_array[index].member_list_lock, flags); -+ result = __add_update_member(index, addr, message); -+ spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); -+ -+ print_member_info(index); -+ -+ wake_up(&node_array[index].member_events); -+ -+ return result; -+} -+ -+static void del_member(int index, __be32 addr) -+{ -+ struct cluster_member *this; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&node_array[index].member_list_lock, flags); -+ this = __find_member(index, addr); -+ -+ if (this) { -+ list_del_init(&this->list); -+ toi_kfree(36, this, sizeof(*this)); -+ node_array[index].peer_count--; -+ } -+ -+ spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); -+} -+ -+/* === Message transmission === */ -+ -+static void toi_send_if(int message, unsigned long my_id); -+ -+/* -+ * Process received TOI packet. -+ */ -+static int toi_recv(struct sk_buff *skb, struct net_device *dev, -+ struct packet_type *pt, struct net_device *orig_dev) -+{ -+ struct toi_pkt *b; -+ struct iphdr *h; -+ int len, result, index; -+ unsigned long addr, message, ack; -+ -+ /* Perform verifications before taking the lock. */ -+ if (skb->pkt_type == PACKET_OTHERHOST) -+ goto drop; -+ -+ if (dev != net_dev) -+ goto drop; -+ -+ skb = skb_share_check(skb, GFP_ATOMIC); -+ if (!skb) -+ return NET_RX_DROP; -+ -+ if (!pskb_may_pull(skb, -+ sizeof(struct iphdr) + -+ sizeof(struct udphdr))) -+ goto drop; -+ -+ b = (struct toi_pkt *)skb_network_header(skb); -+ h = &b->iph; -+ -+ if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP) -+ goto drop; -+ -+ /* Fragments are not supported */ -+ if (h->frag_off & htons(IP_OFFSET | IP_MF)) { -+ if (net_ratelimit()) -+ printk(KERN_ERR "TuxOnIce: Ignoring fragmented " -+ "cluster message.\n"); -+ goto drop; -+ } -+ -+ if (skb->len < ntohs(h->tot_len)) -+ goto drop; -+ -+ if (ip_fast_csum((char *) h, h->ihl)) -+ goto drop; -+ -+ if (b->udph.source != htons(toi_cluster_port_send) || -+ b->udph.dest != htons(toi_cluster_port_recv)) -+ goto drop; -+ -+ if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr)) -+ goto drop; -+ -+ len = ntohs(b->udph.len) - sizeof(struct udphdr); -+ -+ /* Ok the front looks good, make sure we can get at the rest. */ -+ if (!pskb_may_pull(skb, skb->len)) -+ goto drop; -+ -+ b = (struct toi_pkt *)skb_network_header(skb); -+ h = &b->iph; -+ -+ addr = SADDR; -+ PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n", -+ str_message(b->message), NIPQUAD(addr)); -+ -+ message = b->message & MSG_STATE_MASK; -+ ack = b->message & MSG_ACK_MASK; -+ -+ for (index = 0; index < num_local_nodes; index++) { -+ int new_message = node_array[index].current_message, -+ old_message = new_message; -+ -+ if (index == SADDR || !old_message) { -+ PRINTK("Ignoring node %d (offline or self).\n", index); -+ continue; -+ } -+ -+ /* One message at a time, please. */ -+ spin_lock(&node_array[index].receive_lock); -+ -+ result = add_update_member(index, SADDR, b->message); -+ if (result == -1) { -+ printk(KERN_INFO "Failed to add new cluster member " -+ NIPQUAD_FMT ".\n", -+ NIPQUAD(addr)); -+ goto drop_unlock; -+ } -+ -+ switch (b->message & MSG_STATE_MASK) { -+ case MSG_PING: -+ break; -+ case MSG_ABORT: -+ break; -+ case MSG_BYE: -+ break; -+ case MSG_HIBERNATE: -+ /* Can I hibernate? */ -+ new_message = MSG_HIBERNATE | -+ ((index & 1) ? MSG_NACK : MSG_ACK); -+ break; -+ case MSG_IMAGE: -+ /* Can I resume? */ -+ new_message = MSG_IMAGE | -+ ((index & 1) ? MSG_NACK : MSG_ACK); -+ if (new_message != old_message) -+ printk(KERN_ERR "Setting whether I can resume " -+ "to %d.\n", new_message); -+ break; -+ case MSG_IO: -+ new_message = MSG_IO | MSG_ACK; -+ break; -+ case MSG_RUNNING: -+ break; -+ default: -+ if (net_ratelimit()) -+ printk(KERN_ERR "Unrecognised TuxOnIce cluster" -+ " message %d from " NIPQUAD_FMT ".\n", -+ b->message, NIPQUAD(addr)); -+ }; -+ -+ if (old_message != new_message) { -+ node_array[index].current_message = new_message; -+ printk(KERN_INFO ">>> Sending new message for node " -+ "%d.\n", index); -+ toi_send_if(new_message, index); -+ } else if (!ack) { -+ printk(KERN_INFO ">>> Resending message for node %d.\n", -+ index); -+ toi_send_if(new_message, index); -+ } -+drop_unlock: -+ spin_unlock(&node_array[index].receive_lock); -+ }; -+ -+drop: -+ /* Throw the packet out. */ -+ kfree_skb(skb); -+ -+ return 0; -+} -+ -+/* -+ * Send cluster message to single interface. -+ */ -+static void toi_send_if(int message, unsigned long my_id) -+{ -+ struct sk_buff *skb; -+ struct toi_pkt *b; -+ int hh_len = LL_RESERVED_SPACE(net_dev); -+ struct iphdr *h; -+ -+ /* Allocate packet */ -+ skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL); -+ if (!skb) -+ return; -+ skb_reserve(skb, hh_len); -+ b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt)); -+ memset(b, 0, sizeof(struct toi_pkt)); -+ -+ /* Construct IP header */ -+ skb_reset_network_header(skb); -+ h = ip_hdr(skb); -+ h->version = 4; -+ h->ihl = 5; -+ h->tot_len = htons(sizeof(struct toi_pkt)); -+ h->frag_off = htons(IP_DF); -+ h->ttl = 64; -+ h->protocol = IPPROTO_UDP; -+ h->daddr = htonl(INADDR_BROADCAST); -+ h->check = ip_fast_csum((unsigned char *) h, h->ihl); -+ -+ /* Construct UDP header */ -+ b->udph.source = htons(toi_cluster_port_send); -+ b->udph.dest = htons(toi_cluster_port_recv); -+ b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr)); -+ /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */ -+ -+ /* Construct message */ -+ b->message = message; -+ b->sid = my_id; -+ b->htype = net_dev->type; /* can cause undefined behavior */ -+ b->hlen = net_dev->addr_len; -+ memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len); -+ b->secs = htons(3); /* 3 seconds */ -+ -+ /* Chain packet down the line... */ -+ skb->dev = net_dev; -+ skb->protocol = htons(ETH_P_IP); -+ if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol), -+ net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) || -+ dev_queue_xmit(skb) < 0) -+ printk(KERN_INFO "E"); -+} -+ -+/* ========================================= */ -+ -+/* kTOICluster */ -+ -+static atomic_t num_cluster_threads; -+static DECLARE_WAIT_QUEUE_HEAD(clusterd_events); -+ -+static int kTOICluster(void *data) -+{ -+ unsigned long my_id; -+ -+ my_id = atomic_add_return(1, &num_cluster_threads) - 1; -+ node_array[my_id].current_message = (unsigned long) data; -+ -+ PRINTK("kTOICluster daemon %lu starting.\n", my_id); -+ -+ current->flags |= PF_NOFREEZE; -+ -+ while (node_array[my_id].current_message) { -+ toi_send_if(node_array[my_id].current_message, my_id); -+ sleep_on_timeout(&clusterd_events, -+ cluster_message_timeout); -+ PRINTK("Link state %lu is %d.\n", my_id, -+ node_array[my_id].current_message); -+ } -+ -+ toi_send_if(MSG_BYE, my_id); -+ atomic_dec(&num_cluster_threads); -+ wake_up(&clusterd_events); -+ -+ PRINTK("kTOICluster daemon %lu exiting.\n", my_id); -+ __set_current_state(TASK_RUNNING); -+ return 0; -+} -+ -+static void kill_clusterd(void) -+{ -+ int i; -+ -+ for (i = 0; i < num_local_nodes; i++) { -+ if (node_array[i].current_message) { -+ PRINTK("Seeking to kill clusterd %d.\n", i); -+ node_array[i].current_message = 0; -+ } -+ } -+ wait_event(clusterd_events, -+ !atomic_read(&num_cluster_threads)); -+ PRINTK("All cluster daemons have exited.\n"); -+} -+ -+static int peers_not_in_message(int index, int message, int precise) -+{ -+ struct cluster_member *this; -+ unsigned long flags; -+ int result = 0; -+ -+ spin_lock_irqsave(&node_array[index].member_list_lock, flags); -+ list_for_each_entry(this, &node_array[index].member_list, list) { -+ if (this->ignore) -+ continue; -+ -+ PRINTK("Peer %d.%d.%d.%d sending %s. " -+ "Seeking %s.\n", -+ NIPQUAD(this->addr), -+ str_message(this->message), str_message(message)); -+ if ((precise ? this->message : -+ this->message & MSG_STATE_MASK) != -+ message) -+ result++; -+ } -+ spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); -+ PRINTK("%d peers in sought message.\n", result); -+ return result; -+} -+ -+static void reset_ignored(int index) -+{ -+ struct cluster_member *this; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&node_array[index].member_list_lock, flags); -+ list_for_each_entry(this, &node_array[index].member_list, list) -+ this->ignore = 0; -+ node_array[index].ignored_peer_count = 0; -+ spin_unlock_irqrestore(&node_array[index].member_list_lock, flags); -+} -+ -+static int peers_in_message(int index, int message, int precise) -+{ -+ return node_array[index].peer_count - -+ node_array[index].ignored_peer_count - -+ peers_not_in_message(index, message, precise); -+} -+ -+static int time_to_continue(int index, unsigned long start, int message) -+{ -+ int first = peers_not_in_message(index, message, 0); -+ int second = peers_in_message(index, message, 1); -+ -+ PRINTK("First part returns %d, second returns %d.\n", first, second); -+ -+ if (!first && !second) { -+ PRINTK("All peers answered message %d.\n", -+ message); -+ return 1; -+ } -+ -+ if (time_after(jiffies, start + continue_delay)) { -+ PRINTK("Timeout reached.\n"); -+ return 1; -+ } -+ -+ PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies, -+ start + continue_delay); -+ return 0; -+} -+ -+void toi_initiate_cluster_hibernate(void) -+{ -+ int result; -+ unsigned long start; -+ -+ result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE); -+ if (result) -+ return; -+ -+ toi_send_if(MSG_HIBERNATE, 0); -+ -+ start = jiffies; -+ wait_event(node_array[0].member_events, -+ time_to_continue(0, start, MSG_HIBERNATE)); -+ -+ if (test_action_state(TOI_FREEZER_TEST)) { -+ toi_send_if(MSG_ABORT, 0); -+ -+ start = jiffies; -+ wait_event(node_array[0].member_events, -+ time_to_continue(0, start, MSG_RUNNING)); -+ -+ do_toi_step(STEP_QUIET_CLEANUP); -+ return; -+ } -+ -+ toi_send_if(MSG_IO, 0); -+ -+ result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE); -+ if (result) -+ return; -+ -+ /* This code runs at resume time too! */ -+ if (toi_in_hibernate) -+ result = do_toi_step(STEP_HIBERNATE_POWERDOWN); -+} -+EXPORT_SYMBOL_GPL(toi_initiate_cluster_hibernate); -+ -+/* toi_cluster_print_debug_stats -+ * -+ * Description: Print information to be recorded for debugging purposes into a -+ * buffer. -+ * Arguments: buffer: Pointer to a buffer into which the debug info will be -+ * printed. -+ * size: Size of the buffer. -+ * Returns: Number of characters written to the buffer. -+ */ -+static int toi_cluster_print_debug_stats(char *buffer, int size) -+{ -+ int len; -+ -+ if (strlen(toi_cluster_iface)) -+ len = scnprintf(buffer, size, -+ "- Cluster interface is '%s'.\n", -+ toi_cluster_iface); -+ else -+ len = scnprintf(buffer, size, -+ "- Cluster support is disabled.\n"); -+ return len; -+} -+ -+/* cluster_memory_needed -+ * -+ * Description: Tell the caller how much memory we need to operate during -+ * hibernate/resume. -+ * Returns: Unsigned long. Maximum number of bytes of memory required for -+ * operation. -+ */ -+static int toi_cluster_memory_needed(void) -+{ -+ return 0; -+} -+ -+static int toi_cluster_storage_needed(void) -+{ -+ return 1 + strlen(toi_cluster_iface); -+} -+ -+/* toi_cluster_save_config_info -+ * -+ * Description: Save informaton needed when reloading the image at resume time. -+ * Arguments: Buffer: Pointer to a buffer of size PAGE_SIZE. -+ * Returns: Number of bytes used for saving our data. -+ */ -+static int toi_cluster_save_config_info(char *buffer) -+{ -+ strcpy(buffer, toi_cluster_iface); -+ return strlen(toi_cluster_iface + 1); -+} -+ -+/* toi_cluster_load_config_info -+ * -+ * Description: Reload information needed for declustering the image at -+ * resume time. -+ * Arguments: Buffer: Pointer to the start of the data. -+ * Size: Number of bytes that were saved. -+ */ -+static void toi_cluster_load_config_info(char *buffer, int size) -+{ -+ strncpy(toi_cluster_iface, buffer, size); -+ return; -+} -+ -+static void cluster_startup(void) -+{ -+ int have_image = do_check_can_resume(), i; -+ unsigned long start = jiffies, initial_message; -+ struct task_struct *p; -+ -+ initial_message = MSG_IMAGE; -+ -+ have_image = 1; -+ -+ for (i = 0; i < num_local_nodes; i++) { -+ PRINTK("Starting ktoiclusterd %d.\n", i); -+ p = kthread_create(kTOICluster, (void *) initial_message, -+ "ktoiclusterd/%d", i); -+ if (IS_ERR(p)) { -+ printk(KERN_ERR "Failed to start ktoiclusterd.\n"); -+ return; -+ } -+ -+ wake_up_process(p); -+ } -+ -+ /* Wait for delay or someone else sending first message */ -+ wait_event(node_array[0].member_events, time_to_continue(0, start, -+ MSG_IMAGE)); -+ -+ others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1); -+ -+ printk(KERN_INFO "Continuing. I %shave an image. Peers with image:" -+ " %d.\n", have_image ? "" : "don't ", others_have_image); -+ -+ if (have_image) { -+ int result; -+ -+ /* Start to resume */ -+ printk(KERN_INFO " === Starting to resume === \n"); -+ node_array[0].current_message = MSG_IO; -+ toi_send_if(MSG_IO, 0); -+ -+ /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */ -+ result = 0; -+ -+ if (!result) { -+ /* -+ * Atomic restore - we'll come back in the hibernation -+ * path. -+ */ -+ -+ /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */ -+ result = 0; -+ -+ /* do_toi_step(STEP_QUIET_CLEANUP); */ -+ } -+ -+ node_array[0].current_message |= MSG_NACK; -+ -+ /* For debugging - disable for real life? */ -+ wait_event(node_array[0].member_events, -+ time_to_continue(0, start, MSG_IO)); -+ } -+ -+ if (others_have_image) { -+ /* Wait for them to resume */ -+ printk(KERN_INFO "Waiting for other nodes to resume.\n"); -+ start = jiffies; -+ wait_event(node_array[0].member_events, -+ time_to_continue(0, start, MSG_RUNNING)); -+ if (peers_not_in_message(0, MSG_RUNNING, 0)) -+ printk(KERN_INFO "Timed out while waiting for other " -+ "nodes to resume.\n"); -+ } -+ -+ /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE -+ * as appropriate. -+ * -+ * If we don't have an image: -+ * - Wait until someone else says they have one, or conditions are met -+ * for continuing to boot (n machines or t seconds). -+ * - If anyone has an image, wait for them to resume before continuing -+ * to boot. -+ * -+ * If we have an image: -+ * - Wait until conditions are met before continuing to resume (n -+ * machines or t seconds). Send RESUME_PREP and freeze processes. -+ * NACK_PREP if freezing fails (shouldn't) and follow logic for -+ * us having no image above. On success, wait for [N]ACK_PREP from -+ * other machines. Read image (including atomic restore) until done. -+ * Wait for ACK_READ from others (should never fail). Thaw processes -+ * and do post-resume. (The section after the atomic restore is done -+ * via the code for hibernating). -+ */ -+ -+ node_array[0].current_message = MSG_RUNNING; -+} -+ -+/* toi_cluster_open_iface -+ * -+ * Description: Prepare to use an interface. -+ */ -+ -+static int toi_cluster_open_iface(void) -+{ -+ struct net_device *dev; -+ -+ rtnl_lock(); -+ -+ for_each_netdev(&init_net, dev) { -+ if (/* dev == &init_net.loopback_dev || */ -+ strcmp(dev->name, toi_cluster_iface)) -+ continue; -+ -+ net_dev = dev; -+ break; -+ } -+ -+ rtnl_unlock(); -+ -+ if (!net_dev) { -+ printk(KERN_ERR MYNAME ": Device %s not found.\n", -+ toi_cluster_iface); -+ return -ENODEV; -+ } -+ -+ dev_add_pack(&toi_cluster_packet_type); -+ added_pack = 1; -+ -+ loopback_mode = (net_dev == init_net.loopback_dev); -+ num_local_nodes = loopback_mode ? 8 : 1; -+ -+ PRINTK("Loopback mode is %s. Number of local nodes is %d.\n", -+ loopback_mode ? "on" : "off", num_local_nodes); -+ -+ cluster_startup(); -+ return 0; -+} -+ -+/* toi_cluster_close_iface -+ * -+ * Description: Stop using an interface. -+ */ -+ -+static int toi_cluster_close_iface(void) -+{ -+ kill_clusterd(); -+ if (added_pack) { -+ dev_remove_pack(&toi_cluster_packet_type); -+ added_pack = 0; -+ } -+ return 0; -+} -+ -+static void write_side_effect(void) -+{ -+ if (toi_cluster_ops.enabled) { -+ toi_cluster_open_iface(); -+ set_toi_state(TOI_CLUSTER_MODE); -+ } else { -+ toi_cluster_close_iface(); -+ clear_toi_state(TOI_CLUSTER_MODE); -+ } -+} -+ -+static void node_write_side_effect(void) -+{ -+} -+ -+/* -+ * data for our sysfs entries. -+ */ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0, -+ NULL), -+ SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0, -+ write_side_effect), -+ SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL), -+ SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script, -+ 256, 0, NULL), -+ SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script, -+ 256, 0, STRING), -+ SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ, -+ 0) -+}; -+ -+/* -+ * Ops structure. -+ */ -+ -+static struct toi_module_ops toi_cluster_ops = { -+ .type = FILTER_MODULE, -+ .name = "Cluster", -+ .directory = "cluster", -+ .module = THIS_MODULE, -+ .memory_needed = toi_cluster_memory_needed, -+ .print_debug_info = toi_cluster_print_debug_stats, -+ .save_config_info = toi_cluster_save_config_info, -+ .load_config_info = toi_cluster_load_config_info, -+ .storage_needed = toi_cluster_storage_needed, -+ -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+/* ---- Registration ---- */ -+ -+#ifdef MODULE -+#define INIT static __init -+#define EXIT static __exit -+#else -+#define INIT -+#define EXIT -+#endif -+ -+INIT int toi_cluster_init(void) -+{ -+ int temp = toi_register_module(&toi_cluster_ops), i; -+ struct kobject *kobj = toi_cluster_ops.dir_kobj; -+ -+ for (i = 0; i < MAX_LOCAL_NODES; i++) { -+ node_array[i].current_message = 0; -+ INIT_LIST_HEAD(&node_array[i].member_list); -+ init_waitqueue_head(&node_array[i].member_events); -+ spin_lock_init(&node_array[i].member_list_lock); -+ spin_lock_init(&node_array[i].receive_lock); -+ -+ /* Set up sysfs entry */ -+ node_array[i].sysfs_data.attr.name = toi_kzalloc(8, -+ sizeof(node_array[i].sysfs_data.attr.name), -+ GFP_KERNEL); -+ sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d", -+ i); -+ node_array[i].sysfs_data.attr.mode = SYSFS_RW; -+ node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER; -+ node_array[i].sysfs_data.flags = 0; -+ node_array[i].sysfs_data.data.integer.variable = -+ (int *) &node_array[i].current_message; -+ node_array[i].sysfs_data.data.integer.minimum = 0; -+ node_array[i].sysfs_data.data.integer.maximum = INT_MAX; -+ node_array[i].sysfs_data.write_side_effect = -+ node_write_side_effect; -+ toi_register_sysfs_file(kobj, &node_array[i].sysfs_data); -+ } -+ -+ toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0); -+ -+ if (toi_cluster_ops.enabled) -+ toi_cluster_open_iface(); -+ -+ return temp; -+} -+ -+EXIT void toi_cluster_exit(void) -+{ -+ int i; -+ toi_cluster_close_iface(); -+ -+ for (i = 0; i < MAX_LOCAL_NODES; i++) -+ toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj, -+ &node_array[i].sysfs_data); -+ toi_unregister_module(&toi_cluster_ops); -+} -+ -+static int __init toi_cluster_iface_setup(char *iface) -+{ -+ toi_cluster_ops.enabled = (*iface && -+ strcmp(iface, "off")); -+ -+ if (toi_cluster_ops.enabled) -+ strncpy(toi_cluster_iface, iface, strlen(iface)); -+} -+ -+__setup("toi_cluster=", toi_cluster_iface_setup); -+ -+#ifdef MODULE -+MODULE_LICENSE("GPL"); -+module_init(toi_cluster_init); -+module_exit(toi_cluster_exit); -+MODULE_AUTHOR("Nigel Cunningham"); -+MODULE_DESCRIPTION("Cluster Support for TuxOnIce"); -+#endif -diff --git a/kernel/power/tuxonice_cluster.h b/kernel/power/tuxonice_cluster.h -new file mode 100644 -index 0000000..5c46acc ---- /dev/null -+++ b/kernel/power/tuxonice_cluster.h -@@ -0,0 +1,18 @@ -+/* -+ * kernel/power/tuxonice_cluster.h -+ * -+ * Copyright (C) 2006-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ */ -+ -+#ifdef CONFIG_TOI_CLUSTER -+extern int toi_cluster_init(void); -+extern void toi_cluster_exit(void); -+extern void toi_initiate_cluster_hibernate(void); -+#else -+static inline int toi_cluster_init(void) { return 0; } -+static inline void toi_cluster_exit(void) { } -+static inline void toi_initiate_cluster_hibernate(void) { } -+#endif -+ -diff --git a/kernel/power/tuxonice_compress.c b/kernel/power/tuxonice_compress.c -new file mode 100644 -index 0000000..362f8fb ---- /dev/null -+++ b/kernel/power/tuxonice_compress.c -@@ -0,0 +1,465 @@ -+/* -+ * kernel/power/compression.c -+ * -+ * Copyright (C) 2003-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * This file contains data compression routines for TuxOnIce, -+ * using cryptoapi. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include "tuxonice_builtin.h" -+#include "tuxonice.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_io.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_alloc.h" -+ -+static int toi_expected_compression; -+ -+static struct toi_module_ops toi_compression_ops; -+static struct toi_module_ops *next_driver; -+ -+static char toi_compressor_name[32] = "lzo"; -+ -+static DEFINE_MUTEX(stats_lock); -+ -+struct cpu_context { -+ u8 *page_buffer; -+ struct crypto_comp *transform; -+ unsigned int len; -+ u8 *buffer_start; -+ u8 *output_buffer; -+}; -+ -+#define OUT_BUF_SIZE (2 * PAGE_SIZE) -+ -+static DEFINE_PER_CPU(struct cpu_context, contexts); -+ -+/* -+ * toi_crypto_prepare -+ * -+ * Prepare to do some work by allocating buffers and transforms. -+ */ -+static int toi_compress_crypto_prepare(void) -+{ -+ int cpu; -+ -+ if (!*toi_compressor_name) { -+ printk(KERN_INFO "TuxOnIce: Compression enabled but no " -+ "compressor name set.\n"); -+ return 1; -+ } -+ -+ for_each_online_cpu(cpu) { -+ struct cpu_context *this = &per_cpu(contexts, cpu); -+ this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0); -+ if (IS_ERR(this->transform)) { -+ printk(KERN_INFO "TuxOnIce: Failed to initialise the " -+ "%s compression transform.\n", -+ toi_compressor_name); -+ this->transform = NULL; -+ return 1; -+ } -+ -+ this->page_buffer = -+ (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP); -+ -+ if (!this->page_buffer) { -+ printk(KERN_ERR -+ "Failed to allocate a page buffer for TuxOnIce " -+ "compression driver.\n"); -+ return -ENOMEM; -+ } -+ -+ this->output_buffer = -+ (char *) vmalloc_32(OUT_BUF_SIZE); -+ -+ if (!this->output_buffer) { -+ printk(KERN_ERR -+ "Failed to allocate a output buffer for TuxOnIce " -+ "compression driver.\n"); -+ return -ENOMEM; -+ } -+ } -+ -+ return 0; -+} -+ -+static int toi_compress_rw_cleanup(int writing) -+{ -+ int cpu; -+ -+ for_each_online_cpu(cpu) { -+ struct cpu_context *this = &per_cpu(contexts, cpu); -+ if (this->transform) { -+ crypto_free_comp(this->transform); -+ this->transform = NULL; -+ } -+ -+ if (this->page_buffer) -+ toi_free_page(16, (unsigned long) this->page_buffer); -+ -+ this->page_buffer = NULL; -+ -+ if (this->output_buffer) -+ vfree(this->output_buffer); -+ -+ this->output_buffer = NULL; -+ } -+ -+ return 0; -+} -+ -+/* -+ * toi_compress_init -+ */ -+ -+static int toi_compress_init(int toi_or_resume) -+{ -+ if (!toi_or_resume) -+ return 0; -+ -+ toi_compress_bytes_in = 0; -+ toi_compress_bytes_out = 0; -+ -+ next_driver = toi_get_next_filter(&toi_compression_ops); -+ -+ return next_driver ? 0 : -ECHILD; -+} -+ -+/* -+ * toi_compress_rw_init() -+ */ -+ -+static int toi_compress_rw_init(int rw, int stream_number) -+{ -+ if (toi_compress_crypto_prepare()) { -+ printk(KERN_ERR "Failed to initialise compression " -+ "algorithm.\n"); -+ if (rw == READ) { -+ printk(KERN_INFO "Unable to read the image.\n"); -+ return -ENODEV; -+ } else { -+ printk(KERN_INFO "Continuing without " -+ "compressing the image.\n"); -+ toi_compression_ops.enabled = 0; -+ } -+ } -+ -+ return 0; -+} -+ -+/* -+ * toi_compress_write_page() -+ * -+ * Compress a page of data, buffering output and passing on filled -+ * pages to the next module in the pipeline. -+ * -+ * Buffer_page: Pointer to a buffer of size PAGE_SIZE, containing -+ * data to be compressed. -+ * -+ * Returns: 0 on success. Otherwise the error is that returned by later -+ * modules, -ECHILD if we have a broken pipeline or -EIO if -+ * zlib errs. -+ */ -+static int toi_compress_write_page(unsigned long index, int buf_type, -+ void *buffer_page, unsigned int buf_size) -+{ -+ int ret = 0, cpu = smp_processor_id(); -+ struct cpu_context *ctx = &per_cpu(contexts, cpu); -+ u8* output_buffer = buffer_page; -+ int output_len = buf_size; -+ int out_buf_type = buf_type; -+ -+ if (ctx->transform) { -+ -+ ctx->buffer_start = TOI_MAP(buf_type, buffer_page); -+ ctx->len = OUT_BUF_SIZE; -+ -+ ret = crypto_comp_compress(ctx->transform, -+ ctx->buffer_start, buf_size, -+ ctx->output_buffer, &ctx->len); -+ -+ TOI_UNMAP(buf_type, buffer_page); -+ -+ toi_message(TOI_COMPRESS, TOI_VERBOSE, 0, -+ "CPU %d, index %lu: %d bytes", -+ cpu, index, ctx->len); -+ -+ if (!ret && ctx->len < buf_size) { /* some compression */ -+ output_buffer = ctx->output_buffer; -+ output_len = ctx->len; -+ out_buf_type = TOI_VIRT; -+ } -+ -+ } -+ -+ mutex_lock(&stats_lock); -+ -+ toi_compress_bytes_in += buf_size; -+ toi_compress_bytes_out += output_len; -+ -+ mutex_unlock(&stats_lock); -+ -+ if (!ret) -+ ret = next_driver->write_page(index, out_buf_type, -+ output_buffer, output_len); -+ -+ return ret; -+} -+ -+/* -+ * toi_compress_read_page() -+ * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE. -+ * -+ * Retrieve data from later modules and decompress it until the input buffer -+ * is filled. -+ * Zero if successful. Error condition from me or from downstream on failure. -+ */ -+static int toi_compress_read_page(unsigned long *index, int buf_type, -+ void *buffer_page, unsigned int *buf_size) -+{ -+ int ret, cpu = smp_processor_id(); -+ unsigned int len; -+ unsigned int outlen = PAGE_SIZE; -+ char *buffer_start; -+ struct cpu_context *ctx = &per_cpu(contexts, cpu); -+ -+ if (!ctx->transform) -+ return next_driver->read_page(index, TOI_PAGE, buffer_page, -+ buf_size); -+ -+ /* -+ * All our reads must be synchronous - we can't decompress -+ * data that hasn't been read yet. -+ */ -+ -+ ret = next_driver->read_page(index, TOI_VIRT, ctx->page_buffer, &len); -+ -+ buffer_start = kmap(buffer_page); -+ -+ /* Error or uncompressed data */ -+ if (ret || len == PAGE_SIZE) { -+ memcpy(buffer_start, ctx->page_buffer, len); -+ goto out; -+ } -+ -+ ret = crypto_comp_decompress( -+ ctx->transform, -+ ctx->page_buffer, -+ len, buffer_start, &outlen); -+ -+ toi_message(TOI_COMPRESS, TOI_VERBOSE, 0, -+ "CPU %d, index %lu: %d=>%d (%d).", -+ cpu, *index, len, outlen, ret); -+ -+ if (ret) -+ abort_hibernate(TOI_FAILED_IO, -+ "Compress_read returned %d.\n", ret); -+ else if (outlen != PAGE_SIZE) { -+ abort_hibernate(TOI_FAILED_IO, -+ "Decompression yielded %d bytes instead of %ld.\n", -+ outlen, PAGE_SIZE); -+ printk(KERN_ERR "Decompression yielded %d bytes instead of " -+ "%ld.\n", outlen, PAGE_SIZE); -+ ret = -EIO; -+ *buf_size = outlen; -+ } -+out: -+ TOI_UNMAP(buf_type, buffer_page); -+ return ret; -+} -+ -+/* -+ * toi_compress_print_debug_stats -+ * @buffer: Pointer to a buffer into which the debug info will be printed. -+ * @size: Size of the buffer. -+ * -+ * Print information to be recorded for debugging purposes into a buffer. -+ * Returns: Number of characters written to the buffer. -+ */ -+ -+static int toi_compress_print_debug_stats(char *buffer, int size) -+{ -+ unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT, -+ pages_out = toi_compress_bytes_out >> PAGE_SHIFT; -+ int len; -+ -+ /* Output the compression ratio achieved. */ -+ if (*toi_compressor_name) -+ len = scnprintf(buffer, size, "- Compressor is '%s'.\n", -+ toi_compressor_name); -+ else -+ len = scnprintf(buffer, size, "- Compressor is not set.\n"); -+ -+ if (pages_in) -+ len += scnprintf(buffer+len, size - len, " Compressed " -+ "%lu bytes into %lu (%ld percent compression).\n", -+ toi_compress_bytes_in, -+ toi_compress_bytes_out, -+ (pages_in - pages_out) * 100 / pages_in); -+ return len; -+} -+ -+/* -+ * toi_compress_compression_memory_needed -+ * -+ * Tell the caller how much memory we need to operate during hibernate/resume. -+ * Returns: Unsigned long. Maximum number of bytes of memory required for -+ * operation. -+ */ -+static int toi_compress_memory_needed(void) -+{ -+ return 2 * PAGE_SIZE; -+} -+ -+static int toi_compress_storage_needed(void) -+{ -+ return 2 * sizeof(unsigned long) + 2 * sizeof(int) + -+ strlen(toi_compressor_name) + 1; -+} -+ -+/* -+ * toi_compress_save_config_info -+ * @buffer: Pointer to a buffer of size PAGE_SIZE. -+ * -+ * Save informaton needed when reloading the image at resume time. -+ * Returns: Number of bytes used for saving our data. -+ */ -+static int toi_compress_save_config_info(char *buffer) -+{ -+ int len = strlen(toi_compressor_name) + 1, offset = 0; -+ -+ *((unsigned long *) buffer) = toi_compress_bytes_in; -+ offset += sizeof(unsigned long); -+ *((unsigned long *) (buffer + offset)) = toi_compress_bytes_out; -+ offset += sizeof(unsigned long); -+ *((int *) (buffer + offset)) = toi_expected_compression; -+ offset += sizeof(int); -+ *((int *) (buffer + offset)) = len; -+ offset += sizeof(int); -+ strncpy(buffer + offset, toi_compressor_name, len); -+ return offset + len; -+} -+ -+/* toi_compress_load_config_info -+ * @buffer: Pointer to the start of the data. -+ * @size: Number of bytes that were saved. -+ * -+ * Description: Reload information needed for decompressing the image at -+ * resume time. -+ */ -+static void toi_compress_load_config_info(char *buffer, int size) -+{ -+ int len, offset = 0; -+ -+ toi_compress_bytes_in = *((unsigned long *) buffer); -+ offset += sizeof(unsigned long); -+ toi_compress_bytes_out = *((unsigned long *) (buffer + offset)); -+ offset += sizeof(unsigned long); -+ toi_expected_compression = *((int *) (buffer + offset)); -+ offset += sizeof(int); -+ len = *((int *) (buffer + offset)); -+ offset += sizeof(int); -+ strncpy(toi_compressor_name, buffer + offset, len); -+} -+ -+static void toi_compress_pre_atomic_restore(struct toi_boot_kernel_data *bkd) -+{ -+ bkd->compress_bytes_in = toi_compress_bytes_in; -+ bkd->compress_bytes_out = toi_compress_bytes_out; -+} -+ -+static void toi_compress_post_atomic_restore(struct toi_boot_kernel_data *bkd) -+{ -+ toi_compress_bytes_in = bkd->compress_bytes_in; -+ toi_compress_bytes_out = bkd->compress_bytes_out; -+} -+ -+/* -+ * toi_expected_compression_ratio -+ * -+ * Description: Returns the expected ratio between data passed into this module -+ * and the amount of data output when writing. -+ * Returns: 100 if the module is disabled. Otherwise the value set by the -+ * user via our sysfs entry. -+ */ -+ -+static int toi_compress_expected_ratio(void) -+{ -+ if (!toi_compression_ops.enabled) -+ return 100; -+ else -+ return 100 - toi_expected_compression; -+} -+ -+/* -+ * data for our sysfs entries. -+ */ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression, -+ 0, 99, 0, NULL), -+ SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0, -+ NULL), -+ SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL), -+}; -+ -+/* -+ * Ops structure. -+ */ -+static struct toi_module_ops toi_compression_ops = { -+ .type = FILTER_MODULE, -+ .name = "compression", -+ .directory = "compression", -+ .module = THIS_MODULE, -+ .initialise = toi_compress_init, -+ .memory_needed = toi_compress_memory_needed, -+ .print_debug_info = toi_compress_print_debug_stats, -+ .save_config_info = toi_compress_save_config_info, -+ .load_config_info = toi_compress_load_config_info, -+ .storage_needed = toi_compress_storage_needed, -+ .expected_compression = toi_compress_expected_ratio, -+ -+ .pre_atomic_restore = toi_compress_pre_atomic_restore, -+ .post_atomic_restore = toi_compress_post_atomic_restore, -+ -+ .rw_init = toi_compress_rw_init, -+ .rw_cleanup = toi_compress_rw_cleanup, -+ -+ .write_page = toi_compress_write_page, -+ .read_page = toi_compress_read_page, -+ -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+/* ---- Registration ---- */ -+ -+static __init int toi_compress_load(void) -+{ -+ return toi_register_module(&toi_compression_ops); -+} -+ -+#ifdef MODULE -+static __exit void toi_compress_unload(void) -+{ -+ toi_unregister_module(&toi_compression_ops); -+} -+ -+module_init(toi_compress_load); -+module_exit(toi_compress_unload); -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Nigel Cunningham"); -+MODULE_DESCRIPTION("Compression Support for TuxOnIce"); -+#else -+late_initcall(toi_compress_load); -+#endif -diff --git a/kernel/power/tuxonice_extent.c b/kernel/power/tuxonice_extent.c -new file mode 100644 -index 0000000..cf111c1 ---- /dev/null -+++ b/kernel/power/tuxonice_extent.c -@@ -0,0 +1,123 @@ -+/* -+ * kernel/power/tuxonice_extent.c -+ * -+ * Copyright (C) 2003-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ * These functions encapsulate the manipulation of storage metadata. -+ */ -+ -+#include -+#include "tuxonice_modules.h" -+#include "tuxonice_extent.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_ui.h" -+#include "tuxonice.h" -+ -+/** -+ * toi_get_extent - return a free extent -+ * -+ * May fail, returning NULL instead. -+ **/ -+static struct hibernate_extent *toi_get_extent(void) -+{ -+ return (struct hibernate_extent *) toi_kzalloc(2, -+ sizeof(struct hibernate_extent), TOI_ATOMIC_GFP); -+} -+ -+/** -+ * toi_put_extent_chain - free a whole chain of extents -+ * @chain: Chain to free. -+ **/ -+void toi_put_extent_chain(struct hibernate_extent_chain *chain) -+{ -+ struct hibernate_extent *this; -+ -+ this = chain->first; -+ -+ while (this) { -+ struct hibernate_extent *next = this->next; -+ toi_kfree(2, this, sizeof(*this)); -+ chain->num_extents--; -+ this = next; -+ } -+ -+ chain->first = NULL; -+ chain->last_touched = NULL; -+ chain->current_extent = NULL; -+ chain->size = 0; -+} -+EXPORT_SYMBOL_GPL(toi_put_extent_chain); -+ -+/** -+ * toi_add_to_extent_chain - add an extent to an existing chain -+ * @chain: Chain to which the extend should be added -+ * @start: Start of the extent (first physical block) -+ * @end: End of the extent (last physical block) -+ * -+ * The chain information is updated if the insertion is successful. -+ **/ -+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain, -+ unsigned long start, unsigned long end) -+{ -+ struct hibernate_extent *new_ext = NULL, *cur_ext = NULL; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, -+ "Adding extent %lu-%lu to chain %p.\n", start, end, chain); -+ -+ /* Find the right place in the chain */ -+ if (chain->last_touched && chain->last_touched->start < start) -+ cur_ext = chain->last_touched; -+ else if (chain->first && chain->first->start < start) -+ cur_ext = chain->first; -+ -+ if (cur_ext) { -+ while (cur_ext->next && cur_ext->next->start < start) -+ cur_ext = cur_ext->next; -+ -+ if (cur_ext->end == (start - 1)) { -+ struct hibernate_extent *next_ext = cur_ext->next; -+ cur_ext->end = end; -+ -+ /* Merge with the following one? */ -+ if (next_ext && cur_ext->end + 1 == next_ext->start) { -+ cur_ext->end = next_ext->end; -+ cur_ext->next = next_ext->next; -+ toi_kfree(2, next_ext, sizeof(*next_ext)); -+ chain->num_extents--; -+ } -+ -+ chain->last_touched = cur_ext; -+ chain->size += (end - start + 1); -+ -+ return 0; -+ } -+ } -+ -+ new_ext = toi_get_extent(); -+ if (!new_ext) { -+ printk(KERN_INFO "Error unable to append a new extent to the " -+ "chain.\n"); -+ return -ENOMEM; -+ } -+ -+ chain->num_extents++; -+ chain->size += (end - start + 1); -+ new_ext->start = start; -+ new_ext->end = end; -+ -+ chain->last_touched = new_ext; -+ -+ if (cur_ext) { -+ new_ext->next = cur_ext->next; -+ cur_ext->next = new_ext; -+ } else { -+ if (chain->first) -+ new_ext->next = chain->first; -+ chain->first = new_ext; -+ } -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(toi_add_to_extent_chain); -diff --git a/kernel/power/tuxonice_extent.h b/kernel/power/tuxonice_extent.h -new file mode 100644 -index 0000000..3c9a737 ---- /dev/null -+++ b/kernel/power/tuxonice_extent.h -@@ -0,0 +1,44 @@ -+/* -+ * kernel/power/tuxonice_extent.h -+ * -+ * Copyright (C) 2003-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * It contains declarations related to extents. Extents are -+ * TuxOnIce's method of storing some of the metadata for the image. -+ * See tuxonice_extent.c for more info. -+ * -+ */ -+ -+#include "tuxonice_modules.h" -+ -+#ifndef EXTENT_H -+#define EXTENT_H -+ -+struct hibernate_extent { -+ unsigned long start, end; -+ struct hibernate_extent *next; -+}; -+ -+struct hibernate_extent_chain { -+ unsigned long size; /* size of the chain ie sum (max-min+1) */ -+ int num_extents; -+ struct hibernate_extent *first, *last_touched; -+ struct hibernate_extent *current_extent; -+ unsigned long current_offset; -+}; -+ -+/* Simplify iterating through all the values in an extent chain */ -+#define toi_extent_for_each(extent_chain, extentpointer, value) \ -+if ((extent_chain)->first) \ -+ for ((extentpointer) = (extent_chain)->first, (value) = \ -+ (extentpointer)->start; \ -+ ((extentpointer) && ((extentpointer)->next || (value) <= \ -+ (extentpointer)->end)); \ -+ (((value) == (extentpointer)->end) ? \ -+ ((extentpointer) = (extentpointer)->next, (value) = \ -+ ((extentpointer) ? (extentpointer)->start : 0)) : \ -+ (value)++)) -+ -+#endif -diff --git a/kernel/power/tuxonice_file.c b/kernel/power/tuxonice_file.c -new file mode 100644 -index 0000000..b425767 ---- /dev/null -+++ b/kernel/power/tuxonice_file.c -@@ -0,0 +1,497 @@ -+/* -+ * kernel/power/tuxonice_file.c -+ * -+ * Copyright (C) 2005-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ * This file encapsulates functions for usage of a simple file as a -+ * backing store. It is based upon the swapallocator, and shares the -+ * same basic working. Here, though, we have nothing to do with -+ * swapspace, and only one device to worry about. -+ * -+ * The user can just -+ * -+ * echo TuxOnIce > /path/to/my_file -+ * -+ * dd if=/dev/zero bs=1M count= >> /path/to/my_file -+ * -+ * and -+ * -+ * echo /path/to/my_file > /sys/power/tuxonice/file/target -+ * -+ * then put what they find in /sys/power/tuxonice/resume -+ * as their resume= parameter in lilo.conf (and rerun lilo if using it). -+ * -+ * Having done this, they're ready to hibernate and resume. -+ * -+ * TODO: -+ * - File resizing. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include "tuxonice.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_bio.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_builtin.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_io.h" -+ -+#define target_is_normal_file() (S_ISREG(target_inode->i_mode)) -+ -+static struct toi_module_ops toi_fileops; -+ -+static struct file *target_file; -+static struct block_device *toi_file_target_bdev; -+static unsigned long pages_available, pages_allocated; -+static char toi_file_target[256]; -+static struct inode *target_inode; -+static int file_target_priority; -+static int used_devt; -+static int target_claim; -+static dev_t toi_file_dev_t; -+static int sig_page_index; -+ -+/* For test_toi_file_target */ -+static struct toi_bdev_info *file_chain; -+ -+static int has_contiguous_blocks(struct toi_bdev_info *dev_info, int page_num) -+{ -+ int j; -+ sector_t last = 0; -+ -+ for (j = 0; j < dev_info->blocks_per_page; j++) { -+ sector_t this = bmap(target_inode, -+ page_num * dev_info->blocks_per_page + j); -+ -+ if (!this || (last && (last + 1) != this)) -+ break; -+ -+ last = this; -+ } -+ -+ return j == dev_info->blocks_per_page; -+} -+ -+static unsigned long get_usable_pages(struct toi_bdev_info *dev_info) -+{ -+ unsigned long result = 0; -+ struct block_device *bdev = dev_info->bdev; -+ int i; -+ -+ switch (target_inode->i_mode & S_IFMT) { -+ case S_IFSOCK: -+ case S_IFCHR: -+ case S_IFIFO: /* Socket, Char, Fifo */ -+ return -1; -+ case S_IFREG: /* Regular file: current size - holes + free -+ space on part */ -+ for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++) { -+ if (has_contiguous_blocks(dev_info, i)) -+ result++; -+ } -+ break; -+ case S_IFBLK: /* Block device */ -+ if (!bdev->bd_disk) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, -+ "bdev->bd_disk null."); -+ return 0; -+ } -+ -+ result = (bdev->bd_part ? -+ bdev->bd_part->nr_sects : -+ get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9); -+ } -+ -+ -+ return result; -+} -+ -+static int toi_file_register_storage(void) -+{ -+ struct toi_bdev_info *devinfo; -+ int result = 0; -+ struct fs_info *fs_info; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_file_register_storage."); -+ if (!strlen(toi_file_target)) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Register file storage: " -+ "No target filename set."); -+ return 0; -+ } -+ -+ target_file = filp_open(toi_file_target, O_RDONLY|O_LARGEFILE, 0); -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "filp_open %s returned %p.", -+ toi_file_target, target_file); -+ -+ if (IS_ERR(target_file) || !target_file) { -+ target_file = NULL; -+ toi_file_dev_t = name_to_dev_t(toi_file_target); -+ if (!toi_file_dev_t) { -+ struct kstat stat; -+ int error = vfs_stat(toi_file_target, &stat); -+ printk(KERN_INFO "Open file %s returned %p and " -+ "name_to_devt failed.\n", -+ toi_file_target, target_file); -+ if (error) { -+ printk(KERN_INFO "Stating the file also failed." -+ " Nothing more we can do.\n"); -+ return 0; -+ } else -+ toi_file_dev_t = stat.rdev; -+ } -+ -+ toi_file_target_bdev = toi_open_by_devnum(toi_file_dev_t); -+ if (IS_ERR(toi_file_target_bdev)) { -+ printk(KERN_INFO "Got a dev_num (%lx) but failed to " -+ "open it.\n", -+ (unsigned long) toi_file_dev_t); -+ toi_file_target_bdev = NULL; -+ return 0; -+ } -+ used_devt = 1; -+ target_inode = toi_file_target_bdev->bd_inode; -+ } else -+ target_inode = target_file->f_mapping->host; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Succeeded in opening the target."); -+ if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) || -+ S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) { -+ printk(KERN_INFO "File support works with regular files," -+ " character files and block devices.\n"); -+ /* Cleanup routine will undo the above */ -+ return 0; -+ } -+ -+ if (!used_devt) { -+ if (S_ISBLK(target_inode->i_mode)) { -+ toi_file_target_bdev = I_BDEV(target_inode); -+ if (!blkdev_get(toi_file_target_bdev, FMODE_WRITE | -+ FMODE_READ, NULL)) -+ target_claim = 1; -+ } else -+ toi_file_target_bdev = target_inode->i_sb->s_bdev; -+ if (!toi_file_target_bdev) { -+ printk(KERN_INFO "%s is not a valid file allocator " -+ "target.\n", toi_file_target); -+ return 0; -+ } -+ toi_file_dev_t = toi_file_target_bdev->bd_dev; -+ } -+ -+ devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), GFP_ATOMIC); -+ if (!devinfo) { -+ printk("Failed to allocate a toi_bdev_info struct for the file allocator.\n"); -+ return -ENOMEM; -+ } -+ -+ devinfo->bdev = toi_file_target_bdev; -+ devinfo->allocator = &toi_fileops; -+ devinfo->allocator_index = 0; -+ -+ fs_info = fs_info_from_block_dev(toi_file_target_bdev); -+ if (fs_info && !IS_ERR(fs_info)) { -+ memcpy(devinfo->uuid, &fs_info->uuid, 16); -+ free_fs_info(fs_info); -+ } else -+ result = (int) PTR_ERR(fs_info); -+ -+ /* Unlike swap code, only complain if fs_info_from_block_dev returned -+ * -ENOMEM. The 'file' might be a full partition, so might validly not -+ * have an identifiable type, UUID etc. -+ */ -+ if (result) -+ printk(KERN_DEBUG "Failed to get fs_info for file device (%d).\n", -+ result); -+ devinfo->dev_t = toi_file_dev_t; -+ devinfo->prio = file_target_priority; -+ devinfo->bmap_shift = target_inode->i_blkbits - 9; -+ devinfo->blocks_per_page = -+ (1 << (PAGE_SHIFT - target_inode->i_blkbits)); -+ sprintf(devinfo->name, "file %s", toi_file_target); -+ file_chain = devinfo; -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Dev_t is %lx. Prio is %d. Bmap " -+ "shift is %d. Blocks per page %d.", -+ devinfo->dev_t, devinfo->prio, devinfo->bmap_shift, -+ devinfo->blocks_per_page); -+ -+ /* Keep one aside for the signature */ -+ pages_available = get_usable_pages(devinfo) - 1; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering file storage, %lu " -+ "pages.", pages_available); -+ -+ toi_bio_ops.register_storage(devinfo); -+ return 0; -+} -+ -+static unsigned long toi_file_storage_available(void) -+{ -+ return pages_available; -+} -+ -+static int toi_file_allocate_storage(struct toi_bdev_info *chain, -+ unsigned long request) -+{ -+ unsigned long available = pages_available - pages_allocated; -+ unsigned long to_add = min(available, request); -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Pages available is %lu. Allocated " -+ "is %lu. Allocating %lu pages from file.", -+ pages_available, pages_allocated, to_add); -+ pages_allocated += to_add; -+ -+ return to_add; -+} -+ -+/** -+ * __populate_block_list - add an extent to the chain -+ * @min: Start of the extent (first physical block = sector) -+ * @max: End of the extent (last physical block = sector) -+ * -+ * If TOI_TEST_BIO is set, print a debug message, outputting the min and max -+ * fs block numbers. -+ **/ -+static int __populate_block_list(struct toi_bdev_info *chain, int min, int max) -+{ -+ if (test_action_state(TOI_TEST_BIO)) -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %d-%d.", -+ min << chain->bmap_shift, -+ ((max + 1) << chain->bmap_shift) - 1); -+ -+ return toi_add_to_extent_chain(&chain->blocks, min, max); -+} -+ -+static int get_main_pool_phys_params(struct toi_bdev_info *chain) -+{ -+ int i, extent_min = -1, extent_max = -1, result = 0, have_sig_page = 0; -+ unsigned long pages_mapped = 0; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Getting file allocator blocks."); -+ -+ if (chain->blocks.first) -+ toi_put_extent_chain(&chain->blocks); -+ -+ if (!target_is_normal_file()) { -+ result = (pages_available > 0) ? -+ __populate_block_list(chain, chain->blocks_per_page, -+ (pages_allocated + 1) * -+ chain->blocks_per_page - 1) : 0; -+ return result; -+ } -+ -+ /* -+ * FIXME: We are assuming the first page is contiguous. Is that -+ * assumption always right? -+ */ -+ -+ for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) { -+ sector_t new_sector; -+ -+ if (!has_contiguous_blocks(chain, i)) -+ continue; -+ -+ if (!have_sig_page) { -+ have_sig_page = 1; -+ sig_page_index = i; -+ continue; -+ } -+ -+ pages_mapped++; -+ -+ /* Ignore first page - it has the header */ -+ if (pages_mapped == 1) -+ continue; -+ -+ new_sector = bmap(target_inode, (i * chain->blocks_per_page)); -+ -+ /* -+ * I'd love to be able to fill in holes and resize -+ * files, but not yet... -+ */ -+ -+ if (new_sector == extent_max + 1) -+ extent_max += chain->blocks_per_page; -+ else { -+ if (extent_min > -1) { -+ result = __populate_block_list(chain, -+ extent_min, extent_max); -+ if (result) -+ return result; -+ } -+ -+ extent_min = new_sector; -+ extent_max = extent_min + -+ chain->blocks_per_page - 1; -+ } -+ -+ if (pages_mapped == pages_allocated) -+ break; -+ } -+ -+ if (extent_min > -1) { -+ result = __populate_block_list(chain, extent_min, extent_max); -+ if (result) -+ return result; -+ } -+ -+ return 0; -+} -+ -+static void toi_file_free_storage(struct toi_bdev_info *chain) -+{ -+ pages_allocated = 0; -+ file_chain = NULL; -+} -+ -+/** -+ * toi_file_print_debug_stats - print debug info -+ * @buffer: Buffer to data to populate -+ * @size: Size of the buffer -+ **/ -+static int toi_file_print_debug_stats(char *buffer, int size) -+{ -+ int len = scnprintf(buffer, size, "- File Allocator active.\n"); -+ -+ len += scnprintf(buffer+len, size-len, " Storage available for " -+ "image: %lu pages.\n", pages_available); -+ -+ return len; -+} -+ -+static void toi_file_cleanup(int finishing_cycle) -+{ -+ if (toi_file_target_bdev) { -+ if (target_claim) { -+ blkdev_put(toi_file_target_bdev, FMODE_WRITE | FMODE_READ); -+ target_claim = 0; -+ } -+ -+ if (used_devt) { -+ blkdev_put(toi_file_target_bdev, -+ FMODE_READ | FMODE_NDELAY); -+ used_devt = 0; -+ } -+ toi_file_target_bdev = NULL; -+ target_inode = NULL; -+ } -+ -+ if (target_file) { -+ filp_close(target_file, NULL); -+ target_file = NULL; -+ } -+ -+ pages_available = 0; -+} -+ -+/** -+ * test_toi_file_target - sysfs callback for /sys/power/tuxonince/file/target -+ * -+ * Test wheter the target file is valid for hibernating. -+ **/ -+static void test_toi_file_target(void) -+{ -+ int result = toi_file_register_storage(); -+ sector_t sector; -+ char buf[50]; -+ struct fs_info *fs_info; -+ -+ if (result || !file_chain) -+ return; -+ -+ /* This doesn't mean we're in business. Is any storage available? */ -+ if (!pages_available) -+ goto out; -+ -+ toi_file_allocate_storage(file_chain, 1); -+ result = get_main_pool_phys_params(file_chain); -+ if (result) -+ goto out; -+ -+ -+ sector = bmap(target_inode, sig_page_index * -+ file_chain->blocks_per_page) << file_chain->bmap_shift; -+ -+ /* Use the uuid, or the dev_t if that fails */ -+ fs_info = fs_info_from_block_dev(toi_file_target_bdev); -+ if (!fs_info || IS_ERR(fs_info)) { -+ bdevname(toi_file_target_bdev, buf); -+ sprintf(resume_file, "/dev/%s:%llu", buf, -+ (unsigned long long) sector); -+ } else { -+ int i; -+ hex_dump_to_buffer(fs_info->uuid, 16, 32, 1, buf, 50, 0); -+ -+ /* Remove the spaces */ -+ for (i = 1; i < 16; i++) { -+ buf[2 * i] = buf[3 * i]; -+ buf[2 * i + 1] = buf[3 * i + 1]; -+ } -+ buf[32] = 0; -+ sprintf(resume_file, "UUID=%s:0x%llx", buf, -+ (unsigned long long) sector); -+ free_fs_info(fs_info); -+ } -+ -+ toi_attempt_to_parse_resume_device(0); -+out: -+ toi_file_free_storage(file_chain); -+ toi_bio_ops.free_storage(); -+} -+ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256, -+ SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target), -+ SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0, NULL), -+ SYSFS_INT("priority", SYSFS_RW, &file_target_priority, -4095, -+ 4096, 0, NULL), -+}; -+ -+static struct toi_bio_allocator_ops toi_bio_fileops = { -+ .register_storage = toi_file_register_storage, -+ .storage_available = toi_file_storage_available, -+ .allocate_storage = toi_file_allocate_storage, -+ .bmap = get_main_pool_phys_params, -+ .free_storage = toi_file_free_storage, -+}; -+ -+static struct toi_module_ops toi_fileops = { -+ .type = BIO_ALLOCATOR_MODULE, -+ .name = "file storage", -+ .directory = "file", -+ .module = THIS_MODULE, -+ .print_debug_info = toi_file_print_debug_stats, -+ .cleanup = toi_file_cleanup, -+ .bio_allocator_ops = &toi_bio_fileops, -+ -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+/* ---- Registration ---- */ -+static __init int toi_file_load(void) -+{ -+ return toi_register_module(&toi_fileops); -+} -+ -+#ifdef MODULE -+static __exit void toi_file_unload(void) -+{ -+ toi_unregister_module(&toi_fileops); -+} -+ -+module_init(toi_file_load); -+module_exit(toi_file_unload); -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Nigel Cunningham"); -+MODULE_DESCRIPTION("TuxOnIce FileAllocator"); -+#else -+late_initcall(toi_file_load); -+#endif -diff --git a/kernel/power/tuxonice_highlevel.c b/kernel/power/tuxonice_highlevel.c -new file mode 100644 -index 0000000..4f49e22 ---- /dev/null -+++ b/kernel/power/tuxonice_highlevel.c -@@ -0,0 +1,1351 @@ -+/* -+ * kernel/power/tuxonice_highlevel.c -+ */ -+/** \mainpage TuxOnIce. -+ * -+ * TuxOnIce provides support for saving and restoring an image of -+ * system memory to an arbitrary storage device, either on the local computer, -+ * or across some network. The support is entirely OS based, so TuxOnIce -+ * works without requiring BIOS, APM or ACPI support. The vast majority of the -+ * code is also architecture independant, so it should be very easy to port -+ * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem -+ * and preemption. Initramfses and initrds are also supported. -+ * -+ * TuxOnIce uses a modular design, in which the method of storing the image is -+ * completely abstracted from the core code, as are transformations on the data -+ * such as compression and/or encryption (multiple 'modules' can be used to -+ * provide arbitrary combinations of functionality). The user interface is also -+ * modular, so that arbitrarily simple or complex interfaces can be used to -+ * provide anything from debugging information through to eye candy. -+ * -+ * \section Copyright -+ * -+ * TuxOnIce is released under the GPLv2. -+ * -+ * Copyright (C) 1998-2001 Gabor Kuti
-+ * Copyright (C) 1998,2001,2002 Pavel Machek
-+ * Copyright (C) 2002-2003 Florent Chabaud
-+ * Copyright (C) 2002-2014 Nigel Cunningham (nigel at tuxonice net)
-+ * -+ * \section Credits -+ * -+ * Nigel would like to thank the following people for their work: -+ * -+ * Bernard Blackham
-+ * Web page & Wiki administration, some coding. A person without whom -+ * TuxOnIce would not be where it is. -+ * -+ * Michael Frank
-+ * Extensive testing and help with improving stability. I was constantly -+ * amazed by the quality and quantity of Michael's help. -+ * -+ * Pavel Machek
-+ * Modifications, defectiveness pointing, being with Gabor at the very -+ * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and -+ * 2.5.17. Even though Pavel and I disagree on the direction suspend to -+ * disk should take, I appreciate the valuable work he did in helping Gabor -+ * get the concept working. -+ * -+ * ..and of course the myriads of TuxOnIce users who have helped diagnose -+ * and fix bugs, made suggestions on how to improve the code, proofread -+ * documentation, and donated time and money. -+ * -+ * Thanks also to corporate sponsors: -+ * -+ * Redhat.Sometime employer from May 2006 (my fault, not Redhat's!). -+ * -+ * Cyclades.com. Nigel's employers from Dec 2004 until May 2006, who -+ * allowed him to work on TuxOnIce and PM related issues on company time. -+ * -+ * LinuxFund.org. Sponsored Nigel's work on TuxOnIce for four months Oct -+ * 2003 to Jan 2004. -+ * -+ * LAC Linux. Donated P4 hardware that enabled development and ongoing -+ * maintenance of SMP and Highmem support. -+ * -+ * OSDL. Provided access to various hardware configurations, make -+ * occasional small donations to the project. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include /* for get/set_fs & KERNEL_DS on i386 */ -+#include -+#include -+ -+#include "tuxonice.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_prepare_image.h" -+#include "tuxonice_io.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_power_off.h" -+#include "tuxonice_storage.h" -+#include "tuxonice_checksum.h" -+#include "tuxonice_builtin.h" -+#include "tuxonice_atomic_copy.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_cluster.h" -+ -+/*! Pageset metadata. */ -+struct pagedir pagedir2 = {2}; -+EXPORT_SYMBOL_GPL(pagedir2); -+ -+static mm_segment_t oldfs; -+static DEFINE_MUTEX(tuxonice_in_use); -+static int block_dump_save; -+ -+/* Binary signature if an image is present */ -+char tuxonice_signature[9] = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c"; -+EXPORT_SYMBOL_GPL(tuxonice_signature); -+ -+unsigned long boot_kernel_data_buffer; -+ -+static char *result_strings[] = { -+ "Hibernation was aborted", -+ "The user requested that we cancel the hibernation", -+ "No storage was available", -+ "Insufficient storage was available", -+ "Freezing filesystems and/or tasks failed", -+ "A pre-existing image was used", -+ "We would free memory, but image size limit doesn't allow this", -+ "Unable to free enough memory to hibernate", -+ "Unable to obtain the Power Management Semaphore", -+ "A device suspend/resume returned an error", -+ "A system device suspend/resume returned an error", -+ "The extra pages allowance is too small", -+ "We were unable to successfully prepare an image", -+ "TuxOnIce module initialisation failed", -+ "TuxOnIce module cleanup failed", -+ "I/O errors were encountered", -+ "Ran out of memory", -+ "An error was encountered while reading the image", -+ "Platform preparation failed", -+ "CPU Hotplugging failed", -+ "Architecture specific preparation failed", -+ "Pages needed resaving, but we were told to abort if this happens", -+ "We can't hibernate at the moment (invalid resume= or filewriter " -+ "target?)", -+ "A hibernation preparation notifier chain member cancelled the " -+ "hibernation", -+ "Pre-snapshot preparation failed", -+ "Pre-restore preparation failed", -+ "Failed to disable usermode helpers", -+ "Can't resume from alternate image", -+ "Header reservation too small", -+ "Device Power Management Preparation failed", -+}; -+ -+/** -+ * toi_finish_anything - cleanup after doing anything -+ * @hibernate_or_resume: Whether finishing a cycle or attempt at -+ * resuming. -+ * -+ * This is our basic clean-up routine, matching start_anything below. We -+ * call cleanup routines, drop module references and restore process fs and -+ * cpus allowed masks, together with the global block_dump variable's value. -+ **/ -+void toi_finish_anything(int hibernate_or_resume) -+{ -+ toi_cleanup_modules(hibernate_or_resume); -+ toi_put_modules(); -+ if (hibernate_or_resume) { -+ block_dump = block_dump_save; -+ set_cpus_allowed_ptr(current, cpu_all_mask); -+ toi_alloc_print_debug_stats(); -+ atomic_inc(&snapshot_device_available); -+ unlock_system_sleep(); -+ } -+ -+ set_fs(oldfs); -+ mutex_unlock(&tuxonice_in_use); -+} -+ -+/** -+ * toi_start_anything - basic initialisation for TuxOnIce -+ * @toi_or_resume: Whether starting a cycle or attempt at resuming. -+ * -+ * Our basic initialisation routine. Take references on modules, use the -+ * kernel segment, recheck resume= if no active allocator is set, initialise -+ * modules, save and reset block_dump and ensure we're running on CPU0. -+ **/ -+int toi_start_anything(int hibernate_or_resume) -+{ -+ mutex_lock(&tuxonice_in_use); -+ -+ oldfs = get_fs(); -+ set_fs(KERNEL_DS); -+ -+ if (hibernate_or_resume) { -+ lock_system_sleep(); -+ -+ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) -+ goto snapshotdevice_unavailable; -+ } -+ -+ if (hibernate_or_resume == SYSFS_HIBERNATE) -+ toi_print_modules(); -+ -+ if (toi_get_modules()) { -+ printk(KERN_INFO "TuxOnIce: Get modules failed!\n"); -+ goto prehibernate_err; -+ } -+ -+ if (hibernate_or_resume) { -+ block_dump_save = block_dump; -+ block_dump = 0; -+ set_cpus_allowed_ptr(current, -+ cpumask_of(cpumask_first(cpu_online_mask))); -+ } -+ -+ if (toi_initialise_modules_early(hibernate_or_resume)) -+ goto early_init_err; -+ -+ if (!toiActiveAllocator) -+ toi_attempt_to_parse_resume_device(!hibernate_or_resume); -+ -+ if (!toi_initialise_modules_late(hibernate_or_resume)) -+ return 0; -+ -+ toi_cleanup_modules(hibernate_or_resume); -+early_init_err: -+ if (hibernate_or_resume) { -+ block_dump_save = block_dump; -+ set_cpus_allowed_ptr(current, cpu_all_mask); -+ } -+ toi_put_modules(); -+prehibernate_err: -+ if (hibernate_or_resume) -+ atomic_inc(&snapshot_device_available); -+snapshotdevice_unavailable: -+ if (hibernate_or_resume) -+ mutex_unlock(&pm_mutex); -+ set_fs(oldfs); -+ mutex_unlock(&tuxonice_in_use); -+ return -EBUSY; -+} -+ -+/* -+ * Nosave page tracking. -+ * -+ * Here rather than in prepare_image because we want to do it once only at the -+ * start of a cycle. -+ */ -+ -+/** -+ * mark_nosave_pages - set up our Nosave bitmap -+ * -+ * Build a bitmap of Nosave pages from the list. The bitmap allows faster -+ * use when preparing the image. -+ **/ -+static void mark_nosave_pages(void) -+{ -+ struct nosave_region *region; -+ -+ list_for_each_entry(region, &nosave_regions, list) { -+ unsigned long pfn; -+ -+ for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) -+ if (pfn_valid(pfn)) -+ SetPageNosave(pfn_to_page(pfn)); -+ } -+} -+ -+static int toi_alloc_bitmap(struct memory_bitmap **bm) -+{ -+ int result = 0; -+ -+ *bm = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); -+ if (!*bm) { -+ printk(KERN_ERR "Failed to kzalloc memory for a bitmap.\n"); -+ return -ENOMEM; -+ } -+ -+ result = memory_bm_create(*bm, GFP_KERNEL, 0); -+ -+ if (result) { -+ printk(KERN_ERR "Failed to create a bitmap.\n"); -+ kfree(*bm); -+ *bm = NULL; -+ } -+ -+ return result; -+} -+ -+/** -+ * allocate_bitmaps - allocate bitmaps used to record page states -+ * -+ * Allocate the bitmaps we use to record the various TuxOnIce related -+ * page states. -+ **/ -+static int allocate_bitmaps(void) -+{ -+ if (toi_alloc_bitmap(&pageset1_map) || -+ toi_alloc_bitmap(&pageset1_copy_map) || -+ toi_alloc_bitmap(&pageset2_map) || -+ toi_alloc_bitmap(&io_map) || -+ toi_alloc_bitmap(&nosave_map) || -+ toi_alloc_bitmap(&free_map) || -+ toi_alloc_bitmap(&compare_map) || -+ toi_alloc_bitmap(&page_resave_map)) -+ return 1; -+ -+ return 0; -+} -+ -+static void toi_free_bitmap(struct memory_bitmap **bm) -+{ -+ if (!*bm) -+ return; -+ -+ memory_bm_free(*bm, 0); -+ kfree(*bm); -+ *bm = NULL; -+} -+ -+/** -+ * free_bitmaps - free the bitmaps used to record page states -+ * -+ * Free the bitmaps allocated above. It is not an error to call -+ * memory_bm_free on a bitmap that isn't currently allocated. -+ **/ -+static void free_bitmaps(void) -+{ -+ toi_free_bitmap(&pageset1_map); -+ toi_free_bitmap(&pageset1_copy_map); -+ toi_free_bitmap(&pageset2_map); -+ toi_free_bitmap(&io_map); -+ toi_free_bitmap(&nosave_map); -+ toi_free_bitmap(&free_map); -+ toi_free_bitmap(&compare_map); -+ toi_free_bitmap(&page_resave_map); -+} -+ -+/** -+ * io_MB_per_second - return the number of MB/s read or written -+ * @write: Whether to return the speed at which we wrote. -+ * -+ * Calculate the number of megabytes per second that were read or written. -+ **/ -+static int io_MB_per_second(int write) -+{ -+ return (toi_bkd.toi_io_time[write][1]) ? -+ MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ / -+ toi_bkd.toi_io_time[write][1] : 0; -+} -+ -+#define SNPRINTF(a...) do { len += scnprintf(((char *) buffer) + len, \ -+ count - len - 1, ## a); } while (0) -+ -+/** -+ * get_debug_info - fill a buffer with debugging information -+ * @buffer: The buffer to be filled. -+ * @count: The size of the buffer, in bytes. -+ * -+ * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will -+ * either printk or return via sysfs. -+ **/ -+static int get_toi_debug_info(const char *buffer, int count) -+{ -+ int len = 0, i, first_result = 1; -+ -+ SNPRINTF("TuxOnIce debugging info:\n"); -+ SNPRINTF("- TuxOnIce core : " TOI_CORE_VERSION "\n"); -+ SNPRINTF("- Kernel Version : " UTS_RELEASE "\n"); -+ SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__); -+ SNPRINTF("- Attempt number : %d\n", nr_hibernates); -+ SNPRINTF("- Parameters : %ld %ld %ld %d %ld %ld\n", -+ toi_result, -+ toi_bkd.toi_action, -+ toi_bkd.toi_debug_state, -+ toi_bkd.toi_default_console_level, -+ image_size_limit, -+ toi_poweroff_method); -+ SNPRINTF("- Overall expected compression percentage: %d.\n", -+ 100 - toi_expected_compression_ratio()); -+ len += toi_print_module_debug_info(((char *) buffer) + len, -+ count - len - 1); -+ if (toi_bkd.toi_io_time[0][1]) { -+ if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) { -+ SNPRINTF("- I/O speed: Write %ld KB/s", -+ (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ / -+ toi_bkd.toi_io_time[0][1])); -+ if (toi_bkd.toi_io_time[1][1]) -+ SNPRINTF(", Read %ld KB/s", -+ (KB((unsigned long) -+ toi_bkd.toi_io_time[1][0]) * HZ / -+ toi_bkd.toi_io_time[1][1])); -+ } else { -+ SNPRINTF("- I/O speed: Write %ld MB/s", -+ (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ / -+ toi_bkd.toi_io_time[0][1])); -+ if (toi_bkd.toi_io_time[1][1]) -+ SNPRINTF(", Read %ld MB/s", -+ (MB((unsigned long) -+ toi_bkd.toi_io_time[1][0]) * HZ / -+ toi_bkd.toi_io_time[1][1])); -+ } -+ SNPRINTF(".\n"); -+ } else -+ SNPRINTF("- No I/O speed stats available.\n"); -+ SNPRINTF("- Extra pages : %lu used/%lu.\n", -+ extra_pd1_pages_used, extra_pd1_pages_allowance); -+ -+ for (i = 0; i < TOI_NUM_RESULT_STATES; i++) -+ if (test_result_state(i)) { -+ SNPRINTF("%s: %s.\n", first_result ? -+ "- Result " : -+ " ", -+ result_strings[i]); -+ first_result = 0; -+ } -+ if (first_result) -+ SNPRINTF("- Result : %s.\n", nr_hibernates ? -+ "Succeeded" : -+ "No hibernation attempts so far"); -+ return len; -+} -+ -+/** -+ * do_cleanup - cleanup after attempting to hibernate or resume -+ * @get_debug_info: Whether to allocate and return debugging info. -+ * -+ * Cleanup after attempting to hibernate or resume, possibly getting -+ * debugging info as we do so. -+ **/ -+static void do_cleanup(int get_debug_info, int restarting) -+{ -+ int i = 0; -+ char *buffer = NULL; -+ -+ trap_non_toi_io = 0; -+ -+ if (get_debug_info) -+ toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up..."); -+ -+ free_checksum_pages(); -+ -+ if (get_debug_info) -+ buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP); -+ -+ if (buffer) -+ i = get_toi_debug_info(buffer, PAGE_SIZE); -+ -+ toi_free_extra_pagedir_memory(); -+ -+ pagedir1.size = 0; -+ pagedir2.size = 0; -+ set_highmem_size(pagedir1, 0); -+ set_highmem_size(pagedir2, 0); -+ -+ if (boot_kernel_data_buffer) { -+ if (!test_toi_state(TOI_BOOT_KERNEL)) -+ toi_free_page(37, boot_kernel_data_buffer); -+ boot_kernel_data_buffer = 0; -+ } -+ -+ if (test_toi_state(TOI_DEVICE_HOTPLUG_LOCKED)) { -+ unlock_device_hotplug(); -+ clear_toi_state(TOI_DEVICE_HOTPLUG_LOCKED); -+ } -+ -+ clear_toi_state(TOI_BOOT_KERNEL); -+ if (current->flags & PF_SUSPEND_TASK) -+ thaw_processes(); -+ -+ if (!restarting) -+ toi_stop_other_threads(); -+ -+ if (test_action_state(TOI_KEEP_IMAGE) && -+ !test_result_state(TOI_ABORTED)) { -+ toi_message(TOI_ANY_SECTION, TOI_LOW, 1, -+ "TuxOnIce: Not invalidating the image due " -+ "to Keep Image being enabled."); -+ set_result_state(TOI_KEPT_IMAGE); -+ } else -+ if (toiActiveAllocator) -+ toiActiveAllocator->remove_image(); -+ -+ free_bitmaps(); -+ usermodehelper_enable(); -+ -+ if (test_toi_state(TOI_NOTIFIERS_PREPARE)) { -+ pm_notifier_call_chain(PM_POST_HIBERNATION); -+ clear_toi_state(TOI_NOTIFIERS_PREPARE); -+ } -+ -+ if (buffer && i) { -+ /* Printk can only handle 1023 bytes, including -+ * its level mangling. */ -+ for (i = 0; i < 3; i++) -+ printk(KERN_ERR "%s", buffer + (1023 * i)); -+ toi_free_page(20, (unsigned long) buffer); -+ } -+ -+ if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) -+ enable_nonboot_cpus(); -+ -+ if (!restarting) -+ toi_cleanup_console(); -+ -+ free_attention_list(); -+ -+ if (!restarting) -+ toi_deactivate_storage(0); -+ -+ clear_toi_state(TOI_IGNORE_LOGLEVEL); -+ clear_toi_state(TOI_TRYING_TO_RESUME); -+ clear_toi_state(TOI_NOW_RESUMING); -+} -+ -+/** -+ * check_still_keeping_image - we kept an image; check whether to reuse it. -+ * -+ * We enter this routine when we have kept an image. If the user has said they -+ * want to still keep it, all we need to do is powerdown. If powering down -+ * means hibernating to ram and the power doesn't run out, we'll return 1. -+ * If we do power off properly or the battery runs out, we'll resume via the -+ * normal paths. -+ * -+ * If the user has said they want to remove the previously kept image, we -+ * remove it, and return 0. We'll then store a new image. -+ **/ -+static int check_still_keeping_image(void) -+{ -+ if (test_action_state(TOI_KEEP_IMAGE)) { -+ printk(KERN_INFO "Image already stored: powering down " -+ "immediately."); -+ do_toi_step(STEP_HIBERNATE_POWERDOWN); -+ return 1; /* Just in case we're using S3 */ -+ } -+ -+ printk(KERN_INFO "Invalidating previous image.\n"); -+ toiActiveAllocator->remove_image(); -+ -+ return 0; -+} -+ -+/** -+ * toi_init - prepare to hibernate to disk -+ * -+ * Initialise variables & data structures, in preparation for -+ * hibernating to disk. -+ **/ -+static int toi_init(int restarting) -+{ -+ int result, i, j; -+ -+ toi_result = 0; -+ -+ printk(KERN_INFO "Initiating a hibernation cycle.\n"); -+ -+ nr_hibernates++; -+ -+ for (i = 0; i < 2; i++) -+ for (j = 0; j < 2; j++) -+ toi_bkd.toi_io_time[i][j] = 0; -+ -+ if (!test_toi_state(TOI_CAN_HIBERNATE) || -+ allocate_bitmaps()) -+ return 1; -+ -+ mark_nosave_pages(); -+ -+ if (!restarting) -+ toi_prepare_console(); -+ -+ result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); -+ if (result) { -+ set_result_state(TOI_NOTIFIERS_PREPARE_FAILED); -+ return 1; -+ } -+ set_toi_state(TOI_NOTIFIERS_PREPARE); -+ -+ if (!restarting) { -+ printk(KERN_ERR "Starting other threads."); -+ toi_start_other_threads(); -+ } -+ -+ result = usermodehelper_disable(); -+ if (result) { -+ printk(KERN_ERR "TuxOnIce: Failed to disable usermode " -+ "helpers\n"); -+ set_result_state(TOI_USERMODE_HELPERS_ERR); -+ return 1; -+ } -+ -+ boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP); -+ if (!boot_kernel_data_buffer) { -+ printk(KERN_ERR "TuxOnIce: Failed to allocate " -+ "boot_kernel_data_buffer.\n"); -+ set_result_state(TOI_OUT_OF_MEMORY); -+ return 1; -+ } -+ -+ if (!test_action_state(TOI_LATE_CPU_HOTPLUG) && -+ disable_nonboot_cpus()) { -+ set_abort_result(TOI_CPU_HOTPLUG_FAILED); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/** -+ * can_hibernate - perform basic 'Can we hibernate?' tests -+ * -+ * Perform basic tests that must pass if we're going to be able to hibernate: -+ * Can we get the pm_mutex? Is resume= valid (we need to know where to write -+ * the image header). -+ **/ -+static int can_hibernate(void) -+{ -+ if (!test_toi_state(TOI_CAN_HIBERNATE)) -+ toi_attempt_to_parse_resume_device(0); -+ -+ if (!test_toi_state(TOI_CAN_HIBERNATE)) { -+ printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n" -+ "This may be because you haven't put something along " -+ "the lines of\n\nresume=swap:/dev/hda1\n\n" -+ "in lilo.conf or equivalent. (Where /dev/hda1 is your " -+ "swap partition).\n"); -+ set_abort_result(TOI_CANT_SUSPEND); -+ return 0; -+ } -+ -+ if (strlen(alt_resume_param)) { -+ attempt_to_parse_alt_resume_param(); -+ -+ if (!strlen(alt_resume_param)) { -+ printk(KERN_INFO "Alternate resume parameter now " -+ "invalid. Aborting.\n"); -+ set_abort_result(TOI_CANT_USE_ALT_RESUME); -+ return 0; -+ } -+ } -+ -+ return 1; -+} -+ -+/** -+ * do_post_image_write - having written an image, figure out what to do next -+ * -+ * After writing an image, we might load an alternate image or power down. -+ * Powering down might involve hibernating to ram, in which case we also -+ * need to handle reloading pageset2. -+ **/ -+static int do_post_image_write(void) -+{ -+ /* If switching images fails, do normal powerdown */ -+ if (alt_resume_param[0]) -+ do_toi_step(STEP_RESUME_ALT_IMAGE); -+ -+ toi_power_down(); -+ -+ barrier(); -+ mb(); -+ return 0; -+} -+ -+/** -+ * __save_image - do the hard work of saving the image -+ * -+ * High level routine for getting the image saved. The key assumptions made -+ * are that processes have been frozen and sufficient memory is available. -+ * -+ * We also exit through here at resume time, coming back from toi_hibernate -+ * after the atomic restore. This is the reason for the toi_in_hibernate -+ * test. -+ **/ -+static int __save_image(void) -+{ -+ int temp_result, did_copy = 0; -+ -+ toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image.."); -+ -+ toi_message(TOI_ANY_SECTION, TOI_LOW, 1, -+ " - Final values: %d and %d.", -+ pagedir1.size, pagedir2.size); -+ -+ toi_cond_pause(1, "About to write pagedir2."); -+ -+ temp_result = write_pageset(&pagedir2); -+ -+ if (temp_result == -1 || test_result_state(TOI_ABORTED)) -+ return 1; -+ -+ toi_cond_pause(1, "About to copy pageset 1."); -+ -+ if (test_result_state(TOI_ABORTED)) -+ return 1; -+ -+ toi_deactivate_storage(1); -+ -+ toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore."); -+ -+ toi_in_hibernate = 1; -+ -+ if (toi_go_atomic(PMSG_FREEZE, 1)) -+ goto Failed; -+ -+ temp_result = toi_hibernate(); -+ -+#ifdef CONFIG_KGDB -+ if (test_action_state(TOI_POST_RESUME_BREAKPOINT)) -+ kgdb_breakpoint(); -+#endif -+ -+ if (!temp_result) -+ did_copy = 1; -+ -+ /* We return here at resume time too! */ -+ toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result); -+ -+Failed: -+ if (toi_activate_storage(1)) -+ panic("Failed to reactivate our storage."); -+ -+ /* Resume time? */ -+ if (!toi_in_hibernate) { -+ copyback_post(); -+ return 0; -+ } -+ -+ /* Nope. Hibernating. So, see if we can save the image... */ -+ -+ if (temp_result || test_result_state(TOI_ABORTED)) { -+ if (did_copy) -+ goto abort_reloading_pagedir_two; -+ else -+ return 1; -+ } -+ -+ toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size, -+ NULL); -+ -+ if (test_result_state(TOI_ABORTED)) -+ goto abort_reloading_pagedir_two; -+ -+ toi_cond_pause(1, "About to write pageset1."); -+ -+ toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1"); -+ -+ temp_result = write_pageset(&pagedir1); -+ -+ /* We didn't overwrite any memory, so no reread needs to be done. */ -+ if (test_action_state(TOI_TEST_FILTER_SPEED) || -+ test_action_state(TOI_TEST_BIO)) -+ return 1; -+ -+ if (temp_result == 1 || test_result_state(TOI_ABORTED)) -+ goto abort_reloading_pagedir_two; -+ -+ toi_cond_pause(1, "About to write header."); -+ -+ if (test_result_state(TOI_ABORTED)) -+ goto abort_reloading_pagedir_two; -+ -+ temp_result = write_image_header(); -+ -+ if (!temp_result && !test_result_state(TOI_ABORTED)) -+ return 0; -+ -+abort_reloading_pagedir_two: -+ temp_result = read_pageset2(1); -+ -+ /* If that failed, we're sunk. Panic! */ -+ if (temp_result) -+ panic("Attempt to reload pagedir 2 while aborting " -+ "a hibernate failed."); -+ -+ return 1; -+} -+ -+static void map_ps2_pages(int enable) -+{ -+ unsigned long pfn = 0; -+ -+ pfn = memory_bm_next_pfn(pageset2_map); -+ -+ while (pfn != BM_END_OF_MAP) { -+ struct page *page = pfn_to_page(pfn); -+ kernel_map_pages(page, 1, enable); -+ pfn = memory_bm_next_pfn(pageset2_map); -+ } -+} -+ -+/** -+ * do_save_image - save the image and handle the result -+ * -+ * Save the prepared image. If we fail or we're in the path returning -+ * from the atomic restore, cleanup. -+ **/ -+static int do_save_image(void) -+{ -+ int result; -+ map_ps2_pages(0); -+ result = __save_image(); -+ map_ps2_pages(1); -+ return result; -+} -+ -+/** -+ * do_prepare_image - try to prepare an image -+ * -+ * Seek to initialise and prepare an image to be saved. On failure, -+ * cleanup. -+ **/ -+static int do_prepare_image(void) -+{ -+ int restarting = test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL); -+ -+ if (!restarting && toi_activate_storage(0)) -+ return 1; -+ -+ /* -+ * If kept image and still keeping image and hibernating to RAM, we will -+ * return 1 after hibernating and resuming (provided the power doesn't -+ * run out. In that case, we skip directly to cleaning up and exiting. -+ */ -+ -+ if (!can_hibernate() || -+ (test_result_state(TOI_KEPT_IMAGE) && -+ check_still_keeping_image())) -+ return 1; -+ -+ if (toi_init(restarting) || toi_prepare_image() || -+ test_result_state(TOI_ABORTED)) -+ return 1; -+ -+ trap_non_toi_io = 1; -+ -+ return 0; -+} -+ -+/** -+ * do_check_can_resume - find out whether an image has been stored -+ * -+ * Read whether an image exists. We use the same routine as the -+ * image_exists sysfs entry, and just look to see whether the -+ * first character in the resulting buffer is a '1'. -+ **/ -+int do_check_can_resume(void) -+{ -+ int result = -1; -+ -+ if (toi_activate_storage(0)) -+ return -1; -+ -+ if (!test_toi_state(TOI_RESUME_DEVICE_OK)) -+ toi_attempt_to_parse_resume_device(1); -+ -+ if (toiActiveAllocator) -+ result = toiActiveAllocator->image_exists(1); -+ -+ toi_deactivate_storage(0); -+ return result; -+} -+EXPORT_SYMBOL_GPL(do_check_can_resume); -+ -+/** -+ * do_load_atomic_copy - load the first part of an image, if it exists -+ * -+ * Check whether we have an image. If one exists, do sanity checking -+ * (possibly invalidating the image or even rebooting if the user -+ * requests that) before loading it into memory in preparation for the -+ * atomic restore. -+ * -+ * If and only if we have an image loaded and ready to restore, we return 1. -+ **/ -+static int do_load_atomic_copy(void) -+{ -+ int read_image_result = 0; -+ -+ if (sizeof(swp_entry_t) != sizeof(long)) { -+ printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size" -+ " of long. Please report this!\n"); -+ return 1; -+ } -+ -+ if (!resume_file[0]) -+ printk(KERN_WARNING "TuxOnIce: " -+ "You need to use a resume= command line parameter to " -+ "tell TuxOnIce where to look for an image.\n"); -+ -+ toi_activate_storage(0); -+ -+ if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) && -+ !toi_attempt_to_parse_resume_device(0)) { -+ /* -+ * Without a usable storage device we can do nothing - -+ * even if noresume is given -+ */ -+ -+ if (!toiNumAllocators) -+ printk(KERN_ALERT "TuxOnIce: " -+ "No storage allocators have been registered.\n"); -+ else -+ printk(KERN_ALERT "TuxOnIce: " -+ "Missing or invalid storage location " -+ "(resume= parameter). Please correct and " -+ "rerun lilo (or equivalent) before " -+ "hibernating.\n"); -+ toi_deactivate_storage(0); -+ return 1; -+ } -+ -+ if (allocate_bitmaps()) -+ return 1; -+ -+ read_image_result = read_pageset1(); /* non fatal error ignored */ -+ -+ if (test_toi_state(TOI_NORESUME_SPECIFIED)) -+ clear_toi_state(TOI_NORESUME_SPECIFIED); -+ -+ toi_deactivate_storage(0); -+ -+ if (read_image_result) -+ return 1; -+ -+ return 0; -+} -+ -+/** -+ * prepare_restore_load_alt_image - save & restore alt image variables -+ * -+ * Save and restore the pageset1 maps, when loading an alternate image. -+ **/ -+static void prepare_restore_load_alt_image(int prepare) -+{ -+ static struct memory_bitmap *pageset1_map_save, *pageset1_copy_map_save; -+ -+ if (prepare) { -+ pageset1_map_save = pageset1_map; -+ pageset1_map = NULL; -+ pageset1_copy_map_save = pageset1_copy_map; -+ pageset1_copy_map = NULL; -+ set_toi_state(TOI_LOADING_ALT_IMAGE); -+ toi_reset_alt_image_pageset2_pfn(); -+ } else { -+ memory_bm_free(pageset1_map, 0); -+ pageset1_map = pageset1_map_save; -+ memory_bm_free(pageset1_copy_map, 0); -+ pageset1_copy_map = pageset1_copy_map_save; -+ clear_toi_state(TOI_NOW_RESUMING); -+ clear_toi_state(TOI_LOADING_ALT_IMAGE); -+ } -+} -+ -+/** -+ * do_toi_step - perform a step in hibernating or resuming -+ * -+ * Perform a step in hibernating or resuming an image. This abstraction -+ * is in preparation for implementing cluster support, and perhaps replacing -+ * uswsusp too (haven't looked whether that's possible yet). -+ **/ -+int do_toi_step(int step) -+{ -+ switch (step) { -+ case STEP_HIBERNATE_PREPARE_IMAGE: -+ return do_prepare_image(); -+ case STEP_HIBERNATE_SAVE_IMAGE: -+ return do_save_image(); -+ case STEP_HIBERNATE_POWERDOWN: -+ return do_post_image_write(); -+ case STEP_RESUME_CAN_RESUME: -+ return do_check_can_resume(); -+ case STEP_RESUME_LOAD_PS1: -+ return do_load_atomic_copy(); -+ case STEP_RESUME_DO_RESTORE: -+ /* -+ * If we succeed, this doesn't return. -+ * Instead, we return from do_save_image() in the -+ * hibernated kernel. -+ */ -+ return toi_atomic_restore(); -+ case STEP_RESUME_ALT_IMAGE: -+ printk(KERN_INFO "Trying to resume alternate image.\n"); -+ toi_in_hibernate = 0; -+ save_restore_alt_param(SAVE, NOQUIET); -+ prepare_restore_load_alt_image(1); -+ if (!do_check_can_resume()) { -+ printk(KERN_INFO "Nothing to resume from.\n"); -+ goto out; -+ } -+ if (!do_load_atomic_copy()) -+ toi_atomic_restore(); -+ -+ printk(KERN_INFO "Failed to load image.\n"); -+out: -+ prepare_restore_load_alt_image(0); -+ save_restore_alt_param(RESTORE, NOQUIET); -+ break; -+ case STEP_CLEANUP: -+ do_cleanup(1, 0); -+ break; -+ case STEP_QUIET_CLEANUP: -+ do_cleanup(0, 0); -+ break; -+ } -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(do_toi_step); -+ -+/* -- Functions for kickstarting a hibernate or resume --- */ -+ -+/** -+ * toi_try_resume - try to do the steps in resuming -+ * -+ * Check if we have an image and if so try to resume. Clear the status -+ * flags too. -+ **/ -+void toi_try_resume(void) -+{ -+ set_toi_state(TOI_TRYING_TO_RESUME); -+ resume_attempted = 1; -+ -+ current->flags |= PF_MEMALLOC; -+ toi_start_other_threads(); -+ -+ if (do_toi_step(STEP_RESUME_CAN_RESUME) && -+ !do_toi_step(STEP_RESUME_LOAD_PS1)) -+ do_toi_step(STEP_RESUME_DO_RESTORE); -+ -+ toi_stop_other_threads(); -+ do_cleanup(0, 0); -+ -+ current->flags &= ~PF_MEMALLOC; -+ -+ clear_toi_state(TOI_IGNORE_LOGLEVEL); -+ clear_toi_state(TOI_TRYING_TO_RESUME); -+ clear_toi_state(TOI_NOW_RESUMING); -+} -+ -+/** -+ * toi_sys_power_disk_try_resume - wrapper calling toi_try_resume -+ * -+ * Wrapper for when __toi_try_resume is called from swsusp resume path, -+ * rather than from echo > /sys/power/tuxonice/do_resume. -+ **/ -+static void toi_sys_power_disk_try_resume(void) -+{ -+ resume_attempted = 1; -+ -+ /* -+ * There's a comment in kernel/power/disk.c that indicates -+ * we should be able to use mutex_lock_nested below. That -+ * doesn't seem to cut it, though, so let's just turn lockdep -+ * off for now. -+ */ -+ lockdep_off(); -+ -+ if (toi_start_anything(SYSFS_RESUMING)) -+ goto out; -+ -+ toi_try_resume(); -+ -+ /* -+ * For initramfs, we have to clear the boot time -+ * flag after trying to resume -+ */ -+ clear_toi_state(TOI_BOOT_TIME); -+ -+ toi_finish_anything(SYSFS_RESUMING); -+out: -+ lockdep_on(); -+} -+ -+/** -+ * toi_try_hibernate - try to start a hibernation cycle -+ * -+ * Start a hibernation cycle, coming in from either -+ * echo > /sys/power/tuxonice/do_suspend -+ * -+ * or -+ * -+ * echo disk > /sys/power/state -+ * -+ * In the later case, we come in without pm_sem taken; in the -+ * former, it has been taken. -+ **/ -+int toi_try_hibernate(void) -+{ -+ int result = 0, sys_power_disk = 0, retries = 0; -+ -+ if (!mutex_is_locked(&tuxonice_in_use)) { -+ /* Came in via /sys/power/disk */ -+ if (toi_start_anything(SYSFS_HIBERNATING)) -+ return -EBUSY; -+ sys_power_disk = 1; -+ } -+ -+ current->flags |= PF_MEMALLOC; -+ -+ if (test_toi_state(TOI_CLUSTER_MODE)) { -+ toi_initiate_cluster_hibernate(); -+ goto out; -+ } -+ -+prepare: -+ result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE); -+ -+ if (result) -+ goto out; -+ -+ if (test_action_state(TOI_FREEZER_TEST)) -+ goto out_restore_gfp_mask; -+ -+ result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE); -+ -+ if (test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL)) { -+ if (retries < 2) { -+ do_cleanup(0, 1); -+ retries++; -+ clear_result_state(TOI_ABORTED); -+ extra_pd1_pages_allowance = extra_pd1_pages_used + 500; -+ printk(KERN_INFO "Automatically adjusting the extra" -+ " pages allowance to %ld and restarting.\n", -+ extra_pd1_pages_allowance); -+ pm_restore_gfp_mask(); -+ goto prepare; -+ } -+ -+ printk(KERN_INFO "Adjusted extra pages allowance twice and " -+ "still couldn't hibernate successfully. Giving up."); -+ } -+ -+ /* This code runs at resume time too! */ -+ if (!result && toi_in_hibernate) -+ result = do_toi_step(STEP_HIBERNATE_POWERDOWN); -+ -+out_restore_gfp_mask: -+ pm_restore_gfp_mask(); -+out: -+ do_cleanup(1, 0); -+ current->flags &= ~PF_MEMALLOC; -+ -+ if (sys_power_disk) -+ toi_finish_anything(SYSFS_HIBERNATING); -+ -+ return result; -+} -+ -+/* -+ * channel_no: If !0, -c is added to args (userui). -+ */ -+int toi_launch_userspace_program(char *command, int channel_no, -+ int wait, int debug) -+{ -+ int retval; -+ static char *envp[] = { -+ "HOME=/", -+ "TERM=linux", -+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", -+ NULL }; -+ static char *argv[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL -+ }; -+ char *channel = NULL; -+ int arg = 0, size; -+ char test_read[255]; -+ char *orig_posn = command; -+ -+ if (!strlen(orig_posn)) -+ return 1; -+ -+ if (channel_no) { -+ channel = toi_kzalloc(4, 6, GFP_KERNEL); -+ if (!channel) { -+ printk(KERN_INFO "Failed to allocate memory in " -+ "preparing to launch userspace program.\n"); -+ return 1; -+ } -+ } -+ -+ /* Up to 6 args supported */ -+ while (arg < 6) { -+ sscanf(orig_posn, "%s", test_read); -+ size = strlen(test_read); -+ if (!(size)) -+ break; -+ argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP); -+ strcpy(argv[arg], test_read); -+ orig_posn += size + 1; -+ *test_read = 0; -+ arg++; -+ } -+ -+ if (channel_no) { -+ sprintf(channel, "-c%d", channel_no); -+ argv[arg] = channel; -+ } else -+ arg--; -+ -+ if (debug) { -+ argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP); -+ strcpy(argv[arg], "--debug"); -+ } -+ -+ retval = call_usermodehelper(argv[0], argv, envp, wait); -+ -+ /* -+ * If the program reports an error, retval = 256. Don't complain -+ * about that here. -+ */ -+ if (retval && retval != 256) -+ printk(KERN_ERR "Failed to launch userspace program '%s': " -+ "Error %d\n", command, retval); -+ -+ { -+ int i; -+ for (i = 0; i < arg; i++) -+ if (argv[i] && argv[i] != channel) -+ toi_kfree(5, argv[i], sizeof(*argv[i])); -+ } -+ -+ toi_kfree(4, channel, sizeof(*channel)); -+ -+ return retval; -+} -+ -+/* -+ * This array contains entries that are automatically registered at -+ * boot. Modules and the console code register their own entries separately. -+ */ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_LONG("extra_pages_allowance", SYSFS_RW, -+ &extra_pd1_pages_allowance, 0, LONG_MAX, 0), -+ SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read, -+ image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL), -+ SYSFS_STRING("resume", SYSFS_RW, resume_file, 255, -+ SYSFS_NEEDS_SM_FOR_WRITE, -+ attempt_to_parse_resume_device2), -+ SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255, -+ SYSFS_NEEDS_SM_FOR_WRITE, -+ attempt_to_parse_alt_resume_param), -+ SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0, -+ NULL), -+ SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_IGNORE_ROOTFS, 0), -+ SYSFS_LONG("image_size_limit", SYSFS_RW, &image_size_limit, -2, -+ INT_MAX, 0), -+ SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0), -+ SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_NO_MULTITHREADED_IO, 0), -+ SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_NO_FLUSHER_THREAD, 0), -+ SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_PAGESET2_FULL, 0), -+ SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0), -+ SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_REPLACE_SWSUSP, 0), -+ SYSFS_STRING("resume_commandline", SYSFS_RW, -+ toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0, -+ NULL), -+ SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL), -+ SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_FREEZER_TEST, 0), -+ SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0), -+ SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_TEST_FILTER_SPEED, 0), -+ SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_NO_PAGESET2, 0), -+ SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_NO_PS2_IF_UNNEEDED, 0), -+ SYSFS_BIT("late_cpu_hotplug", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_LATE_CPU_HOTPLUG, 0), -+ SYSFS_STRING("binary_signature", SYSFS_READONLY, -+ tuxonice_signature, 9, 0, NULL), -+ SYSFS_INT("max_workers", SYSFS_RW, &toi_max_workers, 0, NR_CPUS, 0, -+ NULL), -+#ifdef CONFIG_KGDB -+ SYSFS_BIT("post_resume_breakpoint", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_POST_RESUME_BREAKPOINT, 0), -+#endif -+ SYSFS_BIT("no_readahead", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_NO_READAHEAD, 0), -+#ifdef CONFIG_TOI_KEEP_IMAGE -+ SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE, -+ 0), -+#endif -+}; -+ -+static struct toi_core_fns my_fns = { -+ .get_nonconflicting_page = __toi_get_nonconflicting_page, -+ .post_context_save = __toi_post_context_save, -+ .try_hibernate = toi_try_hibernate, -+ .try_resume = toi_sys_power_disk_try_resume, -+}; -+ -+/** -+ * core_load - initialisation of TuxOnIce core -+ * -+ * Initialise the core, beginning with sysfs. Checksum and so on are part of -+ * the core, but have their own initialisation routines because they either -+ * aren't compiled in all the time or have their own subdirectories. -+ **/ -+static __init int core_load(void) -+{ -+ int i, -+ numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); -+ -+ printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION -+ " (http://tuxonice.net)\n"); -+ -+ if (toi_sysfs_init()) -+ return 1; -+ -+ for (i = 0; i < numfiles; i++) -+ toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]); -+ -+ toi_core_fns = &my_fns; -+ -+ if (toi_alloc_init()) -+ return 1; -+ if (toi_checksum_init()) -+ return 1; -+ if (toi_usm_init()) -+ return 1; -+ if (toi_ui_init()) -+ return 1; -+ if (toi_poweroff_init()) -+ return 1; -+ if (toi_cluster_init()) -+ return 1; -+ -+ return 0; -+} -+ -+#ifdef MODULE -+/** -+ * core_unload: Prepare to unload the core code. -+ **/ -+static __exit void core_unload(void) -+{ -+ int i, -+ numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); -+ -+ toi_alloc_exit(); -+ toi_checksum_exit(); -+ toi_poweroff_exit(); -+ toi_ui_exit(); -+ toi_usm_exit(); -+ toi_cluster_exit(); -+ -+ for (i = 0; i < numfiles; i++) -+ toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]); -+ -+ toi_core_fns = NULL; -+ -+ toi_sysfs_exit(); -+} -+MODULE_LICENSE("GPL"); -+module_init(core_load); -+module_exit(core_unload); -+#else -+late_initcall(core_load); -+#endif -diff --git a/kernel/power/tuxonice_incremental.c b/kernel/power/tuxonice_incremental.c -new file mode 100644 -index 0000000..5870fdd ---- /dev/null -+++ b/kernel/power/tuxonice_incremental.c -@@ -0,0 +1,12 @@ -+/* -+ * kernel/power/tuxonice_incremental.c -+ * -+ * Copyright (C) 2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * This file contains routines related to storing incremental images - that -+ * is, retaining an image after an initial cycle and then storing incremental -+ * changes on subsequent hibernations. -+ */ -+ -diff --git a/kernel/power/tuxonice_io.c b/kernel/power/tuxonice_io.c -new file mode 100644 -index 0000000..00577e1 ---- /dev/null -+++ b/kernel/power/tuxonice_io.c -@@ -0,0 +1,1936 @@ -+/* -+ * kernel/power/tuxonice_io.c -+ * -+ * Copyright (C) 1998-2001 Gabor Kuti -+ * Copyright (C) 1998,2001,2002 Pavel Machek -+ * Copyright (C) 2002-2003 Florent Chabaud -+ * Copyright (C) 2002-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * It contains high level IO routines for hibernating. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "tuxonice.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_pageflags.h" -+#include "tuxonice_io.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_storage.h" -+#include "tuxonice_prepare_image.h" -+#include "tuxonice_extent.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_builtin.h" -+#include "tuxonice_checksum.h" -+#include "tuxonice_alloc.h" -+char alt_resume_param[256]; -+ -+/* Version read from image header at resume */ -+static int toi_image_header_version; -+ -+#define read_if_version(VERS, VAR, DESC, ERR_ACT) do { \ -+ if (likely(toi_image_header_version >= VERS)) \ -+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, \ -+ (char *) &VAR, sizeof(VAR))) { \ -+ abort_hibernate(TOI_FAILED_IO, "Failed to read DESC."); \ -+ ERR_ACT; \ -+ } \ -+} while(0) \ -+ -+/* Variables shared between threads and updated under the mutex */ -+static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result; -+static int io_index, io_nextupdate, io_pc, io_pc_step; -+static DEFINE_MUTEX(io_mutex); -+static DEFINE_PER_CPU(struct page *, last_sought); -+static DEFINE_PER_CPU(struct page *, last_high_page); -+static DEFINE_PER_CPU(char *, checksum_locn); -+static DEFINE_PER_CPU(struct pbe *, last_low_page); -+static atomic_t io_count; -+atomic_t toi_io_workers; -+EXPORT_SYMBOL_GPL(toi_io_workers); -+ -+static int using_flusher; -+ -+DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher); -+EXPORT_SYMBOL_GPL(toi_io_queue_flusher); -+ -+int toi_bio_queue_flusher_should_finish; -+EXPORT_SYMBOL_GPL(toi_bio_queue_flusher_should_finish); -+ -+int toi_max_workers; -+ -+static char *image_version_error = "The image header version is newer than " \ -+ "this kernel supports."; -+ -+struct toi_module_ops *first_filter; -+ -+static atomic_t toi_num_other_threads; -+static DECLARE_WAIT_QUEUE_HEAD(toi_worker_wait_queue); -+enum toi_worker_commands { -+ TOI_IO_WORKER_STOP, -+ TOI_IO_WORKER_RUN, -+ TOI_IO_WORKER_EXIT -+}; -+static enum toi_worker_commands toi_worker_command; -+ -+/** -+ * toi_attempt_to_parse_resume_device - determine if we can hibernate -+ * -+ * Can we hibernate, using the current resume= parameter? -+ **/ -+int toi_attempt_to_parse_resume_device(int quiet) -+{ -+ struct list_head *Allocator; -+ struct toi_module_ops *thisAllocator; -+ int result, returning = 0; -+ -+ if (toi_activate_storage(0)) -+ return 0; -+ -+ toiActiveAllocator = NULL; -+ clear_toi_state(TOI_RESUME_DEVICE_OK); -+ clear_toi_state(TOI_CAN_RESUME); -+ clear_result_state(TOI_ABORTED); -+ -+ if (!toiNumAllocators) { -+ if (!quiet) -+ printk(KERN_INFO "TuxOnIce: No storage allocators have " -+ "been registered. Hibernating will be " -+ "disabled.\n"); -+ goto cleanup; -+ } -+ -+ list_for_each(Allocator, &toiAllocators) { -+ thisAllocator = list_entry(Allocator, struct toi_module_ops, -+ type_list); -+ -+ /* -+ * Not sure why you'd want to disable an allocator, but -+ * we should honour the flag if we're providing it -+ */ -+ if (!thisAllocator->enabled) -+ continue; -+ -+ result = thisAllocator->parse_sig_location( -+ resume_file, (toiNumAllocators == 1), -+ quiet); -+ -+ switch (result) { -+ case -EINVAL: -+ /* For this allocator, but not a valid -+ * configuration. Error already printed. */ -+ goto cleanup; -+ -+ case 0: -+ /* For this allocator and valid. */ -+ toiActiveAllocator = thisAllocator; -+ -+ set_toi_state(TOI_RESUME_DEVICE_OK); -+ set_toi_state(TOI_CAN_RESUME); -+ returning = 1; -+ goto cleanup; -+ } -+ } -+ if (!quiet) -+ printk(KERN_INFO "TuxOnIce: No matching enabled allocator " -+ "found. Resuming disabled.\n"); -+cleanup: -+ toi_deactivate_storage(0); -+ return returning; -+} -+EXPORT_SYMBOL_GPL(toi_attempt_to_parse_resume_device); -+ -+void attempt_to_parse_resume_device2(void) -+{ -+ toi_prepare_usm(); -+ toi_attempt_to_parse_resume_device(0); -+ toi_cleanup_usm(); -+} -+EXPORT_SYMBOL_GPL(attempt_to_parse_resume_device2); -+ -+void save_restore_alt_param(int replace, int quiet) -+{ -+ static char resume_param_save[255]; -+ static unsigned long toi_state_save; -+ -+ if (replace) { -+ toi_state_save = toi_state; -+ strcpy(resume_param_save, resume_file); -+ strcpy(resume_file, alt_resume_param); -+ } else { -+ strcpy(resume_file, resume_param_save); -+ toi_state = toi_state_save; -+ } -+ toi_attempt_to_parse_resume_device(quiet); -+} -+ -+void attempt_to_parse_alt_resume_param(void) -+{ -+ int ok = 0; -+ -+ /* Temporarily set resume_param to the poweroff value */ -+ if (!strlen(alt_resume_param)) -+ return; -+ -+ printk(KERN_INFO "=== Trying Poweroff Resume2 ===\n"); -+ save_restore_alt_param(SAVE, NOQUIET); -+ if (test_toi_state(TOI_CAN_RESUME)) -+ ok = 1; -+ -+ printk(KERN_INFO "=== Done ===\n"); -+ save_restore_alt_param(RESTORE, QUIET); -+ -+ /* If not ok, clear the string */ -+ if (ok) -+ return; -+ -+ printk(KERN_INFO "Can't resume from that location; clearing " -+ "alt_resume_param.\n"); -+ alt_resume_param[0] = '\0'; -+} -+ -+/** -+ * noresume_reset_modules - reset data structures in case of non resuming -+ * -+ * When we read the start of an image, modules (and especially the -+ * active allocator) might need to reset data structures if we -+ * decide to remove the image rather than resuming from it. -+ **/ -+static void noresume_reset_modules(void) -+{ -+ struct toi_module_ops *this_filter; -+ -+ list_for_each_entry(this_filter, &toi_filters, type_list) -+ if (this_filter->noresume_reset) -+ this_filter->noresume_reset(); -+ -+ if (toiActiveAllocator && toiActiveAllocator->noresume_reset) -+ toiActiveAllocator->noresume_reset(); -+} -+ -+/** -+ * fill_toi_header - fill the hibernate header structure -+ * @struct toi_header: Header data structure to be filled. -+ **/ -+static int fill_toi_header(struct toi_header *sh) -+{ -+ int i, error; -+ -+ error = init_header((struct swsusp_info *) sh); -+ if (error) -+ return error; -+ -+ sh->pagedir = pagedir1; -+ sh->pageset_2_size = pagedir2.size; -+ sh->param0 = toi_result; -+ sh->param1 = toi_bkd.toi_action; -+ sh->param2 = toi_bkd.toi_debug_state; -+ sh->param3 = toi_bkd.toi_default_console_level; -+ sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev; -+ for (i = 0; i < 4; i++) -+ sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2]; -+ sh->bkd = boot_kernel_data_buffer; -+ return 0; -+} -+ -+/** -+ * rw_init_modules - initialize modules -+ * @rw: Whether we are reading of writing an image. -+ * @which: Section of the image being processed. -+ * -+ * Iterate over modules, preparing the ones that will be used to read or write -+ * data. -+ **/ -+static int rw_init_modules(int rw, int which) -+{ -+ struct toi_module_ops *this_module; -+ /* Initialise page transformers */ -+ list_for_each_entry(this_module, &toi_filters, type_list) { -+ if (!this_module->enabled) -+ continue; -+ if (this_module->rw_init && this_module->rw_init(rw, which)) { -+ abort_hibernate(TOI_FAILED_MODULE_INIT, -+ "Failed to initialize the %s filter.", -+ this_module->name); -+ return 1; -+ } -+ } -+ -+ /* Initialise allocator */ -+ if (toiActiveAllocator->rw_init(rw, which)) { -+ abort_hibernate(TOI_FAILED_MODULE_INIT, -+ "Failed to initialise the allocator."); -+ return 1; -+ } -+ -+ /* Initialise other modules */ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled || -+ this_module->type == FILTER_MODULE || -+ this_module->type == WRITER_MODULE) -+ continue; -+ if (this_module->rw_init && this_module->rw_init(rw, which)) { -+ set_abort_result(TOI_FAILED_MODULE_INIT); -+ printk(KERN_INFO "Setting aborted flag due to module " -+ "init failure.\n"); -+ return 1; -+ } -+ } -+ -+ return 0; -+} -+ -+/** -+ * rw_cleanup_modules - cleanup modules -+ * @rw: Whether we are reading of writing an image. -+ * -+ * Cleanup components after reading or writing a set of pages. -+ * Only the allocator may fail. -+ **/ -+static int rw_cleanup_modules(int rw) -+{ -+ struct toi_module_ops *this_module; -+ int result = 0; -+ -+ /* Cleanup other modules */ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled || -+ this_module->type == FILTER_MODULE || -+ this_module->type == WRITER_MODULE) -+ continue; -+ if (this_module->rw_cleanup) -+ result |= this_module->rw_cleanup(rw); -+ } -+ -+ /* Flush data and cleanup */ -+ list_for_each_entry(this_module, &toi_filters, type_list) { -+ if (!this_module->enabled) -+ continue; -+ if (this_module->rw_cleanup) -+ result |= this_module->rw_cleanup(rw); -+ } -+ -+ result |= toiActiveAllocator->rw_cleanup(rw); -+ -+ return result; -+} -+ -+static struct page *copy_page_from_orig_page(struct page *orig_page, int is_high) -+{ -+ int index, min, max; -+ struct page *high_page = NULL, -+ **my_last_high_page = &__get_cpu_var(last_high_page), -+ **my_last_sought = &__get_cpu_var(last_sought); -+ struct pbe *this, **my_last_low_page = &__get_cpu_var(last_low_page); -+ void *compare; -+ -+ if (is_high) { -+ if (*my_last_sought && *my_last_high_page && -+ *my_last_sought < orig_page) -+ high_page = *my_last_high_page; -+ else -+ high_page = (struct page *) restore_highmem_pblist; -+ this = (struct pbe *) kmap(high_page); -+ compare = orig_page; -+ } else { -+ if (*my_last_sought && *my_last_low_page && -+ *my_last_sought < orig_page) -+ this = *my_last_low_page; -+ else -+ this = restore_pblist; -+ compare = page_address(orig_page); -+ } -+ -+ *my_last_sought = orig_page; -+ -+ /* Locate page containing pbe */ -+ while (this[PBES_PER_PAGE - 1].next && -+ this[PBES_PER_PAGE - 1].orig_address < compare) { -+ if (is_high) { -+ struct page *next_high_page = (struct page *) -+ this[PBES_PER_PAGE - 1].next; -+ kunmap(high_page); -+ this = kmap(next_high_page); -+ high_page = next_high_page; -+ } else -+ this = this[PBES_PER_PAGE - 1].next; -+ } -+ -+ /* Do a binary search within the page */ -+ min = 0; -+ max = PBES_PER_PAGE; -+ index = PBES_PER_PAGE / 2; -+ while (max - min) { -+ if (!this[index].orig_address || -+ this[index].orig_address > compare) -+ max = index; -+ else if (this[index].orig_address == compare) { -+ if (is_high) { -+ struct page *page = this[index].address; -+ *my_last_high_page = high_page; -+ kunmap(high_page); -+ return page; -+ } -+ *my_last_low_page = this; -+ return virt_to_page(this[index].address); -+ } else -+ min = index; -+ index = ((max + min) / 2); -+ }; -+ -+ if (is_high) -+ kunmap(high_page); -+ -+ abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for" -+ " orig page %p. This[min].orig_address=%p.\n", orig_page, -+ this[index].orig_address); -+ return NULL; -+} -+ -+/** -+ * write_next_page - write the next page in a pageset -+ * @data_pfn: The pfn where the next data to write is located. -+ * @my_io_index: The index of the page in the pageset. -+ * @write_pfn: The pfn number to write in the image (where the data belongs). -+ * -+ * Get the pfn of the next page to write, map the page if necessary and do the -+ * write. -+ **/ -+static int write_next_page(unsigned long *data_pfn, int *my_io_index, -+ unsigned long *write_pfn) -+{ -+ struct page *page; -+ char **my_checksum_locn = &__get_cpu_var(checksum_locn); -+ int result = 0, was_present; -+ -+ *data_pfn = memory_bm_next_pfn(io_map); -+ -+ /* Another thread could have beaten us to it. */ -+ if (*data_pfn == BM_END_OF_MAP) { -+ if (atomic_read(&io_count)) { -+ printk(KERN_INFO "Ran out of pfns but io_count is " -+ "still %d.\n", atomic_read(&io_count)); -+ BUG(); -+ } -+ mutex_unlock(&io_mutex); -+ return -ENODATA; -+ } -+ -+ *my_io_index = io_finish_at - atomic_sub_return(1, &io_count); -+ -+ memory_bm_clear_bit(io_map, *data_pfn); -+ page = pfn_to_page(*data_pfn); -+ -+ was_present = kernel_page_present(page); -+ if (!was_present) -+ kernel_map_pages(page, 1, 1); -+ -+ if (io_pageset == 1) -+ *write_pfn = memory_bm_next_pfn(pageset1_map); -+ else { -+ *write_pfn = *data_pfn; -+ *my_checksum_locn = tuxonice_get_next_checksum(); -+ } -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Write %d:%ld.", *my_io_index, *write_pfn); -+ -+ mutex_unlock(&io_mutex); -+ -+ if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn)) -+ return 1; -+ -+ result = first_filter->write_page(*write_pfn, TOI_PAGE, page, -+ PAGE_SIZE); -+ -+ if (!was_present) -+ kernel_map_pages(page, 1, 0); -+ -+ return result; -+} -+ -+/** -+ * read_next_page - read the next page in a pageset -+ * @my_io_index: The index of the page in the pageset. -+ * @write_pfn: The pfn in which the data belongs. -+ * -+ * Read a page of the image into our buffer. It can happen (here and in the -+ * write routine) that threads don't get run until after other CPUs have done -+ * all the work. This was the cause of the long standing issue with -+ * occasionally getting -ENODATA errors at the end of reading the image. We -+ * therefore need to check there's actually a page to read before trying to -+ * retrieve one. -+ **/ -+ -+static int read_next_page(int *my_io_index, unsigned long *write_pfn, -+ struct page *buffer) -+{ -+ unsigned int buf_size = PAGE_SIZE; -+ unsigned long left = atomic_read(&io_count); -+ -+ if (!left) -+ return -ENODATA; -+ -+ /* Start off assuming the page we read isn't resaved */ -+ *my_io_index = io_finish_at - atomic_sub_return(1, &io_count); -+ -+ mutex_unlock(&io_mutex); -+ -+ /* -+ * Are we aborting? If so, don't submit any more I/O as -+ * resetting the resume_attempted flag (from ui.c) will -+ * clear the bdev flags, making this thread oops. -+ */ -+ if (unlikely(test_toi_state(TOI_STOP_RESUME))) { -+ atomic_dec(&toi_io_workers); -+ if (!atomic_read(&toi_io_workers)) { -+ /* -+ * So we can be sure we'll have memory for -+ * marking that we haven't resumed. -+ */ -+ rw_cleanup_modules(READ); -+ set_toi_state(TOI_IO_STOPPED); -+ } -+ while (1) -+ schedule(); -+ } -+ -+ /* -+ * See toi_bio_read_page in tuxonice_bio.c: -+ * read the next page in the image. -+ */ -+ return first_filter->read_page(write_pfn, TOI_PAGE, buffer, &buf_size); -+} -+ -+static void use_read_page(unsigned long write_pfn, struct page *buffer) -+{ -+ struct page *final_page = pfn_to_page(write_pfn), -+ *copy_page = final_page; -+ char *virt, *buffer_virt; -+ int was_present, cpu = smp_processor_id(); -+ unsigned long idx = 0; -+ -+ if (io_pageset == 1 && (!pageset1_copy_map || -+ !memory_bm_test_bit_index(pageset1_copy_map, write_pfn, cpu))) { -+ int is_high = PageHighMem(final_page); -+ copy_page = copy_page_from_orig_page(is_high ? (void *) write_pfn : final_page, is_high); -+ } -+ -+ if (!memory_bm_test_bit_index(io_map, write_pfn, cpu)) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Discard %ld.", write_pfn); -+ mutex_lock(&io_mutex); -+ idx = atomic_add_return(1, &io_count); -+ mutex_unlock(&io_mutex); -+ return; -+ } -+ -+ virt = kmap(copy_page); -+ buffer_virt = kmap(buffer); -+ was_present = kernel_page_present(copy_page); -+ if (!was_present) -+ kernel_map_pages(copy_page, 1, 1); -+ memcpy(virt, buffer_virt, PAGE_SIZE); -+ if (!was_present) -+ kernel_map_pages(copy_page, 1, 0); -+ kunmap(copy_page); -+ kunmap(buffer); -+ memory_bm_clear_bit_index(io_map, write_pfn, cpu); -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Read %d:%ld", idx, write_pfn); -+} -+ -+static unsigned long status_update(int writing, unsigned long done, -+ unsigned long ticks) -+{ -+ int cs_index = writing ? 0 : 1; -+ unsigned long ticks_so_far = toi_bkd.toi_io_time[cs_index][1] + ticks; -+ unsigned long msec = jiffies_to_msecs(abs(ticks_so_far)); -+ unsigned long pgs_per_s, estimate = 0, pages_left; -+ -+ if (msec) { -+ pages_left = io_barmax - done; -+ pgs_per_s = 1000 * done / msec; -+ if (pgs_per_s) -+ estimate = DIV_ROUND_UP(pages_left, pgs_per_s); -+ } -+ -+ if (estimate && ticks > HZ / 2) -+ return toi_update_status(done, io_barmax, -+ " %d/%d MB (%lu sec left)", -+ MB(done+1), MB(io_barmax), estimate); -+ -+ return toi_update_status(done, io_barmax, " %d/%d MB", -+ MB(done+1), MB(io_barmax)); -+} -+ -+/** -+ * worker_rw_loop - main loop to read/write pages -+ * -+ * The main I/O loop for reading or writing pages. The io_map bitmap is used to -+ * track the pages to read/write. -+ * If we are reading, the pages are loaded to their final (mapped) pfn. -+ * Data is non zero iff this is a thread started via start_other_threads. -+ * In that case, we stay in here until told to quit. -+ **/ -+static int worker_rw_loop(void *data) -+{ -+ unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 4, -+ jif_index = 1, start_time = jiffies, thread_num; -+ int result = 0, my_io_index = 0, last_worker; -+ struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP); -+ cpumask_var_t orig_mask; -+ -+ if (!alloc_cpumask_var(&orig_mask, GFP_KERNEL)) { -+ printk(KERN_EMERG "Failed to allocate cpumask for TuxOnIce I/O thread %ld.\n", (unsigned long) data); -+ return -ENOMEM; -+ } -+ -+ cpumask_copy(orig_mask, tsk_cpus_allowed(current)); -+ -+ current->flags |= PF_NOFREEZE; -+ -+top: -+ mutex_lock(&io_mutex); -+ thread_num = atomic_read(&toi_io_workers); -+ -+ cpumask_copy(tsk_cpus_allowed(current), orig_mask); -+ schedule(); -+ -+ atomic_inc(&toi_io_workers); -+ -+ while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) && -+ !(io_write && test_result_state(TOI_ABORTED)) && -+ toi_worker_command == TOI_IO_WORKER_RUN) { -+ if (!thread_num && jiffies > next_jiffies) { -+ next_jiffies += HZ / 4; -+ if (toiActiveAllocator->update_throughput_throttle) -+ toiActiveAllocator->update_throughput_throttle( -+ jif_index); -+ jif_index++; -+ } -+ -+ /* -+ * What page to use? If reading, don't know yet which page's -+ * data will be read, so always use the buffer. If writing, -+ * use the copy (Pageset1) or original page (Pageset2), but -+ * always write the pfn of the original page. -+ */ -+ if (io_write) -+ result = write_next_page(&data_pfn, &my_io_index, -+ &write_pfn); -+ else /* Reading */ -+ result = read_next_page(&my_io_index, &write_pfn, -+ buffer); -+ -+ if (result) { -+ mutex_lock(&io_mutex); -+ /* Nothing to do? */ -+ if (result == -ENODATA) { -+ toi_message(TOI_IO, TOI_VERBOSE, 0, -+ "Thread %d has no more work.", -+ smp_processor_id()); -+ break; -+ } -+ -+ io_result = result; -+ -+ if (io_write) { -+ printk(KERN_INFO "Write chunk returned %d.\n", -+ result); -+ abort_hibernate(TOI_FAILED_IO, -+ "Failed to write a chunk of the " -+ "image."); -+ break; -+ } -+ -+ if (io_pageset == 1) { -+ printk(KERN_ERR "\nBreaking out of I/O loop " -+ "because of result code %d.\n", result); -+ break; -+ } -+ panic("Read chunk returned (%d)", result); -+ } -+ -+ /* -+ * Discard reads of resaved pages while reading ps2 -+ * and unwanted pages while rereading ps2 when aborting. -+ */ -+ if (!io_write) { -+ if (!PageResave(pfn_to_page(write_pfn))) -+ use_read_page(write_pfn, buffer); -+ else { -+ mutex_lock(&io_mutex); -+ toi_message(TOI_IO, TOI_VERBOSE, 0, -+ "Resaved %ld.", write_pfn); -+ atomic_inc(&io_count); -+ mutex_unlock(&io_mutex); -+ } -+ } -+ -+ if (!thread_num) { -+ if(my_io_index + io_base > io_nextupdate) -+ io_nextupdate = status_update(io_write, -+ my_io_index + io_base, -+ jiffies - start_time); -+ -+ if (my_io_index > io_pc) { -+ printk(KERN_CONT "...%d%%", 20 * io_pc_step); -+ io_pc_step++; -+ io_pc = io_finish_at * io_pc_step / 5; -+ } -+ } -+ -+ toi_cond_pause(0, NULL); -+ -+ /* -+ * Subtle: If there's less I/O still to be done than threads -+ * running, quit. This stops us doing I/O beyond the end of -+ * the image when reading. -+ * -+ * Possible race condition. Two threads could do the test at -+ * the same time; one should exit and one should continue. -+ * Therefore we take the mutex before comparing and exiting. -+ */ -+ -+ mutex_lock(&io_mutex); -+ } -+ -+ last_worker = atomic_dec_and_test(&toi_io_workers); -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "%d workers left.", atomic_read(&toi_io_workers)); -+ mutex_unlock(&io_mutex); -+ -+ if ((unsigned long) data && toi_worker_command != TOI_IO_WORKER_EXIT) { -+ /* Were we the last thread and we're using a flusher thread? */ -+ if (last_worker && using_flusher) { -+ toiActiveAllocator->finish_all_io(); -+ } -+ /* First, if we're doing I/O, wait for it to finish */ -+ wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_RUN); -+ /* Then wait to be told what to do next */ -+ wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_STOP); -+ if (toi_worker_command == TOI_IO_WORKER_RUN) -+ goto top; -+ } -+ -+ if (thread_num) -+ atomic_dec(&toi_num_other_threads); -+ -+ toi_message(TOI_IO, TOI_LOW, 0, "Thread %d exiting.", thread_num); -+ toi__free_page(28, buffer); -+ free_cpumask_var(orig_mask); -+ -+ return result; -+} -+ -+int toi_start_other_threads(void) -+{ -+ int cpu; -+ struct task_struct *p; -+ int to_start = (toi_max_workers ? toi_max_workers : num_online_cpus()) - 1; -+ unsigned long num_started = 0; -+ -+ if (test_action_state(TOI_NO_MULTITHREADED_IO)) -+ return 0; -+ -+ toi_worker_command = TOI_IO_WORKER_STOP; -+ -+ for_each_online_cpu(cpu) { -+ if (num_started == to_start) -+ break; -+ -+ if (cpu == smp_processor_id()) -+ continue; -+ -+ p = kthread_create_on_node(worker_rw_loop, (void *) num_started + 1, -+ cpu_to_node(cpu), "ktoi_io/%d", cpu); -+ if (IS_ERR(p)) { -+ printk(KERN_ERR "ktoi_io for %i failed\n", cpu); -+ continue; -+ } -+ kthread_bind(p, cpu); -+ p->flags |= PF_MEMALLOC; -+ wake_up_process(p); -+ num_started++; -+ atomic_inc(&toi_num_other_threads); -+ } -+ -+ toi_message(TOI_IO, TOI_LOW, 0, "Started %d threads.", num_started); -+ return num_started; -+} -+ -+void toi_stop_other_threads(void) -+{ -+ toi_message(TOI_IO, TOI_LOW, 0, "Stopping other threads."); -+ toi_worker_command = TOI_IO_WORKER_EXIT; -+ wake_up(&toi_worker_wait_queue); -+} -+ -+/** -+ * do_rw_loop - main highlevel function for reading or writing pages -+ * -+ * Create the io_map bitmap and call worker_rw_loop to perform I/O operations. -+ **/ -+static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags, -+ int base, int barmax, int pageset) -+{ -+ int index = 0, cpu, result = 0, workers_started; -+ unsigned long pfn; -+ -+ first_filter = toi_get_next_filter(NULL); -+ -+ if (!finish_at) -+ return 0; -+ -+ io_write = write; -+ io_finish_at = finish_at; -+ io_base = base; -+ io_barmax = barmax; -+ io_pageset = pageset; -+ io_index = 0; -+ io_pc = io_finish_at / 5; -+ io_pc_step = 1; -+ io_result = 0; -+ io_nextupdate = base + 1; -+ toi_bio_queue_flusher_should_finish = 0; -+ -+ for_each_online_cpu(cpu) { -+ per_cpu(last_sought, cpu) = NULL; -+ per_cpu(last_low_page, cpu) = NULL; -+ per_cpu(last_high_page, cpu) = NULL; -+ } -+ -+ /* Ensure all bits clear */ -+ memory_bm_clear(io_map); -+ -+ /* Set the bits for the pages to write */ -+ memory_bm_position_reset(pageflags); -+ -+ pfn = memory_bm_next_pfn(pageflags); -+ -+ while (pfn != BM_END_OF_MAP && index < finish_at) { -+ memory_bm_set_bit(io_map, pfn); -+ pfn = memory_bm_next_pfn(pageflags); -+ index++; -+ } -+ -+ BUG_ON(index < finish_at); -+ -+ atomic_set(&io_count, finish_at); -+ -+ memory_bm_position_reset(pageset1_map); -+ -+ mutex_lock(&io_mutex); -+ -+ clear_toi_state(TOI_IO_STOPPED); -+ -+ using_flusher = (atomic_read(&toi_num_other_threads) && -+ toiActiveAllocator->io_flusher && -+ !test_action_state(TOI_NO_FLUSHER_THREAD)); -+ -+ workers_started = atomic_read(&toi_num_other_threads); -+ -+ memory_bm_set_iterators(io_map, atomic_read(&toi_num_other_threads) + 1); -+ memory_bm_position_reset(io_map); -+ -+ memory_bm_set_iterators(pageset1_copy_map, atomic_read(&toi_num_other_threads) + 1); -+ memory_bm_position_reset(pageset1_copy_map); -+ -+ toi_worker_command = TOI_IO_WORKER_RUN; -+ wake_up(&toi_worker_wait_queue); -+ -+ mutex_unlock(&io_mutex); -+ -+ if (using_flusher) -+ result = toiActiveAllocator->io_flusher(write); -+ else -+ worker_rw_loop(NULL); -+ -+ while (atomic_read(&toi_io_workers)) -+ schedule(); -+ -+ printk(KERN_CONT "\n"); -+ -+ toi_worker_command = TOI_IO_WORKER_STOP; -+ wake_up(&toi_worker_wait_queue); -+ -+ if (unlikely(test_toi_state(TOI_STOP_RESUME))) { -+ if (!atomic_read(&toi_io_workers)) { -+ rw_cleanup_modules(READ); -+ set_toi_state(TOI_IO_STOPPED); -+ } -+ while (1) -+ schedule(); -+ } -+ set_toi_state(TOI_IO_STOPPED); -+ -+ if (!io_result && !result && !test_result_state(TOI_ABORTED)) { -+ unsigned long next; -+ -+ toi_update_status(io_base + io_finish_at, io_barmax, -+ " %d/%d MB ", -+ MB(io_base + io_finish_at), MB(io_barmax)); -+ -+ memory_bm_position_reset(io_map); -+ next = memory_bm_next_pfn(io_map); -+ if (next != BM_END_OF_MAP) { -+ printk(KERN_INFO "Finished I/O loop but still work to " -+ "do?\nFinish at = %d. io_count = %d.\n", -+ finish_at, atomic_read(&io_count)); -+ printk(KERN_INFO "I/O bitmap still records work to do." -+ "%ld.\n", next); -+ BUG(); -+ do { -+ cpu_relax(); -+ } while (0); -+ } -+ } -+ -+ return io_result ? io_result : result; -+} -+ -+/** -+ * write_pageset - write a pageset to disk. -+ * @pagedir: Which pagedir to write. -+ * -+ * Returns: -+ * Zero on success or -1 on failure. -+ **/ -+int write_pageset(struct pagedir *pagedir) -+{ -+ int finish_at, base = 0; -+ int barmax = pagedir1.size + pagedir2.size; -+ long error = 0; -+ struct memory_bitmap *pageflags; -+ unsigned long start_time, end_time; -+ -+ /* -+ * Even if there is nothing to read or write, the allocator -+ * may need the init/cleanup for it's housekeeping. (eg: -+ * Pageset1 may start where pageset2 ends when writing). -+ */ -+ finish_at = pagedir->size; -+ -+ if (pagedir->id == 1) { -+ toi_prepare_status(DONT_CLEAR_BAR, -+ "Writing kernel & process data..."); -+ base = pagedir2.size; -+ if (test_action_state(TOI_TEST_FILTER_SPEED) || -+ test_action_state(TOI_TEST_BIO)) -+ pageflags = pageset1_map; -+ else -+ pageflags = pageset1_copy_map; -+ } else { -+ toi_prepare_status(DONT_CLEAR_BAR, "Writing caches..."); -+ pageflags = pageset2_map; -+ } -+ -+ start_time = jiffies; -+ -+ if (rw_init_modules(WRITE, pagedir->id)) { -+ abort_hibernate(TOI_FAILED_MODULE_INIT, -+ "Failed to initialise modules for writing."); -+ error = 1; -+ } -+ -+ if (!error) -+ error = do_rw_loop(WRITE, finish_at, pageflags, base, barmax, -+ pagedir->id); -+ -+ if (rw_cleanup_modules(WRITE) && !error) { -+ abort_hibernate(TOI_FAILED_MODULE_CLEANUP, -+ "Failed to cleanup after writing."); -+ error = 1; -+ } -+ -+ end_time = jiffies; -+ -+ if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) { -+ toi_bkd.toi_io_time[0][0] += finish_at, -+ toi_bkd.toi_io_time[0][1] += (end_time - start_time); -+ } -+ -+ return error; -+} -+ -+/** -+ * read_pageset - highlevel function to read a pageset from disk -+ * @pagedir: pageset to read -+ * @overwrittenpagesonly: Whether to read the whole pageset or -+ * only part of it. -+ * -+ * Returns: -+ * Zero on success or -1 on failure. -+ **/ -+static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly) -+{ -+ int result = 0, base = 0; -+ int finish_at = pagedir->size; -+ int barmax = pagedir1.size + pagedir2.size; -+ struct memory_bitmap *pageflags; -+ unsigned long start_time, end_time; -+ -+ if (pagedir->id == 1) { -+ toi_prepare_status(DONT_CLEAR_BAR, -+ "Reading kernel & process data..."); -+ pageflags = pageset1_map; -+ } else { -+ toi_prepare_status(DONT_CLEAR_BAR, "Reading caches..."); -+ if (overwrittenpagesonly) { -+ barmax = min(pagedir1.size, pagedir2.size); -+ finish_at = min(pagedir1.size, pagedir2.size); -+ } else -+ base = pagedir1.size; -+ pageflags = pageset2_map; -+ } -+ -+ start_time = jiffies; -+ -+ if (rw_init_modules(READ, pagedir->id)) { -+ toiActiveAllocator->remove_image(); -+ result = 1; -+ } else -+ result = do_rw_loop(READ, finish_at, pageflags, base, barmax, -+ pagedir->id); -+ -+ if (rw_cleanup_modules(READ) && !result) { -+ abort_hibernate(TOI_FAILED_MODULE_CLEANUP, -+ "Failed to cleanup after reading."); -+ result = 1; -+ } -+ -+ /* Statistics */ -+ end_time = jiffies; -+ -+ if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) { -+ toi_bkd.toi_io_time[1][0] += finish_at, -+ toi_bkd.toi_io_time[1][1] += (end_time - start_time); -+ } -+ -+ return result; -+} -+ -+/** -+ * write_module_configs - store the modules configuration -+ * -+ * The configuration for each module is stored in the image header. -+ * Returns: Int -+ * Zero on success, Error value otherwise. -+ **/ -+static int write_module_configs(void) -+{ -+ struct toi_module_ops *this_module; -+ char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP); -+ int len, index = 1; -+ struct toi_module_header toi_module_header; -+ -+ if (!buffer) { -+ printk(KERN_INFO "Failed to allocate a buffer for saving " -+ "module configuration info.\n"); -+ return -ENOMEM; -+ } -+ -+ /* -+ * We have to know which data goes with which module, so we at -+ * least write a length of zero for a module. Note that we are -+ * also assuming every module's config data takes <= PAGE_SIZE. -+ */ -+ -+ /* For each module (in registration order) */ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled || !this_module->storage_needed || -+ (this_module->type == WRITER_MODULE && -+ toiActiveAllocator != this_module)) -+ continue; -+ -+ /* Get the data from the module */ -+ len = 0; -+ if (this_module->save_config_info) -+ len = this_module->save_config_info(buffer); -+ -+ /* Save the details of the module */ -+ toi_module_header.enabled = this_module->enabled; -+ toi_module_header.type = this_module->type; -+ toi_module_header.index = index++; -+ strncpy(toi_module_header.name, this_module->name, -+ sizeof(toi_module_header.name)); -+ toiActiveAllocator->rw_header_chunk(WRITE, -+ this_module, -+ (char *) &toi_module_header, -+ sizeof(toi_module_header)); -+ -+ /* Save the size of the data and any data returned */ -+ toiActiveAllocator->rw_header_chunk(WRITE, -+ this_module, -+ (char *) &len, sizeof(int)); -+ if (len) -+ toiActiveAllocator->rw_header_chunk( -+ WRITE, this_module, buffer, len); -+ } -+ -+ /* Write a blank header to terminate the list */ -+ toi_module_header.name[0] = '\0'; -+ toiActiveAllocator->rw_header_chunk(WRITE, NULL, -+ (char *) &toi_module_header, sizeof(toi_module_header)); -+ -+ toi_free_page(22, (unsigned long) buffer); -+ return 0; -+} -+ -+/** -+ * read_one_module_config - read and configure one module -+ * -+ * Read the configuration for one module, and configure the module -+ * to match if it is loaded. -+ * -+ * Returns: Int -+ * Zero on success, Error value otherwise. -+ **/ -+static int read_one_module_config(struct toi_module_header *header) -+{ -+ struct toi_module_ops *this_module; -+ int result, len; -+ char *buffer; -+ -+ /* Find the module */ -+ this_module = toi_find_module_given_name(header->name); -+ -+ if (!this_module) { -+ if (header->enabled) { -+ toi_early_boot_message(1, TOI_CONTINUE_REQ, -+ "It looks like we need module %s for reading " -+ "the image but it hasn't been registered.\n", -+ header->name); -+ if (!(test_toi_state(TOI_CONTINUE_REQ))) -+ return -EINVAL; -+ } else -+ printk(KERN_INFO "Module %s configuration data found, " -+ "but the module hasn't registered. Looks like " -+ "it was disabled, so we're ignoring its data.", -+ header->name); -+ } -+ -+ /* Get the length of the data (if any) */ -+ result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len, -+ sizeof(int)); -+ if (result) { -+ printk(KERN_ERR "Failed to read the length of the module %s's" -+ " configuration data.\n", -+ header->name); -+ return -EINVAL; -+ } -+ -+ /* Read any data and pass to the module (if we found one) */ -+ if (!len) -+ return 0; -+ -+ buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP); -+ -+ if (!buffer) { -+ printk(KERN_ERR "Failed to allocate a buffer for reloading " -+ "module configuration info.\n"); -+ return -ENOMEM; -+ } -+ -+ toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len); -+ -+ if (!this_module) -+ goto out; -+ -+ if (!this_module->save_config_info) -+ printk(KERN_ERR "Huh? Module %s appears to have a " -+ "save_config_info, but not a load_config_info " -+ "function!\n", this_module->name); -+ else -+ this_module->load_config_info(buffer, len); -+ -+ /* -+ * Now move this module to the tail of its lists. This will put it in -+ * order. Any new modules will end up at the top of the lists. They -+ * should have been set to disabled when loaded (people will -+ * normally not edit an initrd to load a new module and then hibernate -+ * without using it!). -+ */ -+ -+ toi_move_module_tail(this_module); -+ -+ this_module->enabled = header->enabled; -+ -+out: -+ toi_free_page(23, (unsigned long) buffer); -+ return 0; -+} -+ -+/** -+ * read_module_configs - reload module configurations from the image header. -+ * -+ * Returns: Int -+ * Zero on success or an error code. -+ **/ -+static int read_module_configs(void) -+{ -+ int result = 0; -+ struct toi_module_header toi_module_header; -+ struct toi_module_ops *this_module; -+ -+ /* All modules are initially disabled. That way, if we have a module -+ * loaded now that wasn't loaded when we hibernated, it won't be used -+ * in trying to read the data. -+ */ -+ list_for_each_entry(this_module, &toi_modules, module_list) -+ this_module->enabled = 0; -+ -+ /* Get the first module header */ -+ result = toiActiveAllocator->rw_header_chunk(READ, NULL, -+ (char *) &toi_module_header, -+ sizeof(toi_module_header)); -+ if (result) { -+ printk(KERN_ERR "Failed to read the next module header.\n"); -+ return -EINVAL; -+ } -+ -+ /* For each module (in registration order) */ -+ while (toi_module_header.name[0]) { -+ result = read_one_module_config(&toi_module_header); -+ -+ if (result) -+ return -EINVAL; -+ -+ /* Get the next module header */ -+ result = toiActiveAllocator->rw_header_chunk(READ, NULL, -+ (char *) &toi_module_header, -+ sizeof(toi_module_header)); -+ -+ if (result) { -+ printk(KERN_ERR "Failed to read the next module " -+ "header.\n"); -+ return -EINVAL; -+ } -+ } -+ -+ return 0; -+} -+ -+static inline int save_fs_info(struct fs_info *fs, struct block_device *bdev) -+{ -+ return (!fs || IS_ERR(fs) || !fs->last_mount_size) ? 0 : 1; -+} -+ -+int fs_info_space_needed(void) -+{ -+ const struct super_block *sb; -+ int result = sizeof(int); -+ -+ list_for_each_entry(sb, &super_blocks, s_list) { -+ struct fs_info *fs; -+ -+ if (!sb->s_bdev) -+ continue; -+ -+ fs = fs_info_from_block_dev(sb->s_bdev); -+ if (save_fs_info(fs, sb->s_bdev)) -+ result += 16 + sizeof(dev_t) + sizeof(int) + -+ fs->last_mount_size; -+ free_fs_info(fs); -+ } -+ return result; -+} -+ -+static int fs_info_num_to_save(void) -+{ -+ const struct super_block *sb; -+ int to_save = 0; -+ -+ list_for_each_entry(sb, &super_blocks, s_list) { -+ struct fs_info *fs; -+ -+ if (!sb->s_bdev) -+ continue; -+ -+ fs = fs_info_from_block_dev(sb->s_bdev); -+ if (save_fs_info(fs, sb->s_bdev)) -+ to_save++; -+ free_fs_info(fs); -+ } -+ -+ return to_save; -+} -+ -+static int fs_info_save(void) -+{ -+ const struct super_block *sb; -+ int to_save = fs_info_num_to_save(); -+ -+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, (char *) &to_save, -+ sizeof(int))) { -+ abort_hibernate(TOI_FAILED_IO, "Failed to write num fs_info" -+ " to save."); -+ return -EIO; -+ } -+ -+ list_for_each_entry(sb, &super_blocks, s_list) { -+ struct fs_info *fs; -+ -+ if (!sb->s_bdev) -+ continue; -+ -+ fs = fs_info_from_block_dev(sb->s_bdev); -+ if (save_fs_info(fs, sb->s_bdev)) { -+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, -+ &fs->uuid[0], 16)) { -+ abort_hibernate(TOI_FAILED_IO, "Failed to " -+ "write uuid."); -+ return -EIO; -+ } -+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, -+ (char *) &fs->dev_t, sizeof(dev_t))) { -+ abort_hibernate(TOI_FAILED_IO, "Failed to " -+ "write dev_t."); -+ return -EIO; -+ } -+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, -+ (char *) &fs->last_mount_size, sizeof(int))) { -+ abort_hibernate(TOI_FAILED_IO, "Failed to " -+ "write last mount length."); -+ return -EIO; -+ } -+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, -+ fs->last_mount, fs->last_mount_size)) { -+ abort_hibernate(TOI_FAILED_IO, "Failed to " -+ "write uuid."); -+ return -EIO; -+ } -+ } -+ free_fs_info(fs); -+ } -+ return 0; -+} -+ -+static int fs_info_load_and_check_one(void) -+{ -+ char uuid[16], *last_mount; -+ int result = 0, ln; -+ dev_t dev_t; -+ struct block_device *dev; -+ struct fs_info *fs_info, seek; -+ -+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, uuid, 16)) { -+ abort_hibernate(TOI_FAILED_IO, "Failed to read uuid."); -+ return -EIO; -+ } -+ -+ read_if_version(3, dev_t, "uuid dev_t field", return -EIO); -+ -+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &ln, -+ sizeof(int))) { -+ abort_hibernate(TOI_FAILED_IO, -+ "Failed to read last mount size."); -+ return -EIO; -+ } -+ -+ last_mount = kzalloc(ln, GFP_KERNEL); -+ -+ if (!last_mount) -+ return -ENOMEM; -+ -+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, last_mount, ln)) { -+ abort_hibernate(TOI_FAILED_IO, -+ "Failed to read last mount timestamp."); -+ result = -EIO; -+ goto out_lmt; -+ } -+ -+ strncpy((char *) &seek.uuid, uuid, 16); -+ seek.dev_t = dev_t; -+ seek.last_mount_size = ln; -+ seek.last_mount = last_mount; -+ dev_t = blk_lookup_fs_info(&seek); -+ if (!dev_t) -+ goto out_lmt; -+ -+ dev = toi_open_by_devnum(dev_t); -+ -+ fs_info = fs_info_from_block_dev(dev); -+ if (fs_info && !IS_ERR(fs_info)) { -+ if (ln != fs_info->last_mount_size) { -+ printk(KERN_EMERG "Found matching uuid but last mount " -+ "time lengths differ?! " -+ "(%d vs %d).\n", ln, -+ fs_info->last_mount_size); -+ result = -EINVAL; -+ } else { -+ char buf[BDEVNAME_SIZE]; -+ result = !!memcmp(fs_info->last_mount, last_mount, ln); -+ if (result) -+ printk(KERN_EMERG "Last mount time for %s has " -+ "changed!\n", bdevname(dev, buf)); -+ } -+ } -+ toi_close_bdev(dev); -+ free_fs_info(fs_info); -+out_lmt: -+ kfree(last_mount); -+ return result; -+} -+ -+static int fs_info_load_and_check(void) -+{ -+ int to_do, result = 0; -+ -+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &to_do, -+ sizeof(int))) { -+ abort_hibernate(TOI_FAILED_IO, "Failed to read num fs_info " -+ "to load."); -+ return -EIO; -+ } -+ -+ while(to_do--) -+ result |= fs_info_load_and_check_one(); -+ -+ return result; -+} -+ -+/** -+ * write_image_header - write the image header after write the image proper -+ * -+ * Returns: Int -+ * Zero on success, error value otherwise. -+ **/ -+int write_image_header(void) -+{ -+ int ret; -+ int total = pagedir1.size + pagedir2.size+2; -+ char *header_buffer = NULL; -+ -+ /* Now prepare to write the header */ -+ ret = toiActiveAllocator->write_header_init(); -+ if (ret) { -+ abort_hibernate(TOI_FAILED_MODULE_INIT, -+ "Active allocator's write_header_init" -+ " function failed."); -+ goto write_image_header_abort; -+ } -+ -+ /* Get a buffer */ -+ header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP); -+ if (!header_buffer) { -+ abort_hibernate(TOI_OUT_OF_MEMORY, -+ "Out of memory when trying to get page for header!"); -+ goto write_image_header_abort; -+ } -+ -+ /* Write hibernate header */ -+ if (fill_toi_header((struct toi_header *) header_buffer)) { -+ abort_hibernate(TOI_OUT_OF_MEMORY, -+ "Failure to fill header information!"); -+ goto write_image_header_abort; -+ } -+ -+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, -+ header_buffer, sizeof(struct toi_header))) { -+ abort_hibernate(TOI_OUT_OF_MEMORY, -+ "Failure to write header info."); -+ goto write_image_header_abort; -+ } -+ -+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, -+ (char *) &toi_max_workers, sizeof(toi_max_workers))) { -+ abort_hibernate(TOI_OUT_OF_MEMORY, -+ "Failure to number of workers to use."); -+ goto write_image_header_abort; -+ } -+ -+ /* Write filesystem info */ -+ if (fs_info_save()) -+ goto write_image_header_abort; -+ -+ /* Write module configurations */ -+ ret = write_module_configs(); -+ if (ret) { -+ abort_hibernate(TOI_FAILED_IO, -+ "Failed to write module configs."); -+ goto write_image_header_abort; -+ } -+ -+ if (memory_bm_write(pageset1_map, -+ toiActiveAllocator->rw_header_chunk)) { -+ abort_hibernate(TOI_FAILED_IO, -+ "Failed to write bitmaps."); -+ goto write_image_header_abort; -+ } -+ -+ /* Flush data and let allocator cleanup */ -+ if (toiActiveAllocator->write_header_cleanup()) { -+ abort_hibernate(TOI_FAILED_IO, -+ "Failed to cleanup writing header."); -+ goto write_image_header_abort_no_cleanup; -+ } -+ -+ if (test_result_state(TOI_ABORTED)) -+ goto write_image_header_abort_no_cleanup; -+ -+ toi_update_status(total, total, NULL); -+ -+out: -+ if (header_buffer) -+ toi_free_page(24, (unsigned long) header_buffer); -+ return ret; -+ -+write_image_header_abort: -+ toiActiveAllocator->write_header_cleanup(); -+write_image_header_abort_no_cleanup: -+ ret = -1; -+ goto out; -+} -+ -+/** -+ * sanity_check - check the header -+ * @sh: the header which was saved at hibernate time. -+ * -+ * Perform a few checks, seeking to ensure that the kernel being -+ * booted matches the one hibernated. They need to match so we can -+ * be _sure_ things will work. It is not absolutely impossible for -+ * resuming from a different kernel to work, just not assured. -+ **/ -+static char *sanity_check(struct toi_header *sh) -+{ -+ char *reason = check_image_kernel((struct swsusp_info *) sh); -+ -+ if (reason) -+ return reason; -+ -+ if (!test_action_state(TOI_IGNORE_ROOTFS)) { -+ const struct super_block *sb; -+ list_for_each_entry(sb, &super_blocks, s_list) { -+ if ((!(sb->s_flags & MS_RDONLY)) && -+ (sb->s_type->fs_flags & FS_REQUIRES_DEV)) -+ return "Device backed fs has been mounted " -+ "rw prior to resume or initrd/ramfs " -+ "is mounted rw."; -+ } -+ } -+ -+ return NULL; -+} -+ -+static DECLARE_WAIT_QUEUE_HEAD(freeze_wait); -+ -+#define FREEZE_IN_PROGRESS (~0) -+ -+static int freeze_result; -+ -+static void do_freeze(struct work_struct *dummy) -+{ -+ freeze_result = freeze_processes(); -+ wake_up(&freeze_wait); -+ trap_non_toi_io = 1; -+} -+ -+static DECLARE_WORK(freeze_work, do_freeze); -+ -+/** -+ * __read_pageset1 - test for the existence of an image and attempt to load it -+ * -+ * Returns: Int -+ * Zero if image found and pageset1 successfully loaded. -+ * Error if no image found or loaded. -+ **/ -+static int __read_pageset1(void) -+{ -+ int i, result = 0; -+ char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP), -+ *sanity_error = NULL; -+ struct toi_header *toi_header; -+ -+ if (!header_buffer) { -+ printk(KERN_INFO "Unable to allocate a page for reading the " -+ "signature.\n"); -+ return -ENOMEM; -+ } -+ -+ /* Check for an image */ -+ result = toiActiveAllocator->image_exists(1); -+ if (result == 3) { -+ result = -ENODATA; -+ toi_early_boot_message(1, 0, "The signature from an older " -+ "version of TuxOnIce has been detected."); -+ goto out_remove_image; -+ } -+ -+ if (result != 1) { -+ result = -ENODATA; -+ noresume_reset_modules(); -+ printk(KERN_INFO "TuxOnIce: No image found.\n"); -+ goto out; -+ } -+ -+ /* -+ * Prepare the active allocator for reading the image header. The -+ * activate allocator might read its own configuration. -+ * -+ * NB: This call may never return because there might be a signature -+ * for a different image such that we warn the user and they choose -+ * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the -+ * location of the image might be unavailable if it was stored on a -+ * network connection). -+ */ -+ -+ result = toiActiveAllocator->read_header_init(); -+ if (result) { -+ printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the " -+ "image header.\n"); -+ goto out_remove_image; -+ } -+ -+ /* Check for noresume command line option */ -+ if (test_toi_state(TOI_NORESUME_SPECIFIED)) { -+ printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed " -+ "image.\n"); -+ goto out_remove_image; -+ } -+ -+ /* Check whether we've resumed before */ -+ if (test_toi_state(TOI_RESUMED_BEFORE)) { -+ toi_early_boot_message(1, 0, NULL); -+ if (!(test_toi_state(TOI_CONTINUE_REQ))) { -+ printk(KERN_INFO "TuxOnIce: Tried to resume before: " -+ "Invalidated image.\n"); -+ goto out_remove_image; -+ } -+ } -+ -+ clear_toi_state(TOI_CONTINUE_REQ); -+ -+ toi_image_header_version = toiActiveAllocator->get_header_version(); -+ -+ if (unlikely(toi_image_header_version > TOI_HEADER_VERSION)) { -+ toi_early_boot_message(1, 0, image_version_error); -+ if (!(test_toi_state(TOI_CONTINUE_REQ))) { -+ printk(KERN_INFO "TuxOnIce: Header version too new: " -+ "Invalidated image.\n"); -+ goto out_remove_image; -+ } -+ } -+ -+ /* Read hibernate header */ -+ result = toiActiveAllocator->rw_header_chunk(READ, NULL, -+ header_buffer, sizeof(struct toi_header)); -+ if (result < 0) { -+ printk(KERN_ERR "TuxOnIce: Failed to read the image " -+ "signature.\n"); -+ goto out_remove_image; -+ } -+ -+ toi_header = (struct toi_header *) header_buffer; -+ -+ /* -+ * NB: This call may also result in a reboot rather than returning. -+ */ -+ -+ sanity_error = sanity_check(toi_header); -+ if (sanity_error) { -+ toi_early_boot_message(1, TOI_CONTINUE_REQ, -+ sanity_error); -+ printk(KERN_INFO "TuxOnIce: Sanity check failed.\n"); -+ goto out_remove_image; -+ } -+ -+ /* -+ * We have an image and it looks like it will load okay. -+ * -+ * Get metadata from header. Don't override commandline parameters. -+ * -+ * We don't need to save the image size limit because it's not used -+ * during resume and will be restored with the image anyway. -+ */ -+ -+ memcpy((char *) &pagedir1, -+ (char *) &toi_header->pagedir, sizeof(pagedir1)); -+ toi_result = toi_header->param0; -+ if (!toi_bkd.toi_debug_state) { -+ toi_bkd.toi_action = -+ (toi_header->param1 & ~toi_bootflags_mask) | -+ (toi_bkd.toi_action & toi_bootflags_mask); -+ toi_bkd.toi_debug_state = toi_header->param2; -+ toi_bkd.toi_default_console_level = toi_header->param3; -+ } -+ clear_toi_state(TOI_IGNORE_LOGLEVEL); -+ pagedir2.size = toi_header->pageset_2_size; -+ for (i = 0; i < 4; i++) -+ toi_bkd.toi_io_time[i/2][i%2] = -+ toi_header->io_time[i/2][i%2]; -+ -+ set_toi_state(TOI_BOOT_KERNEL); -+ boot_kernel_data_buffer = toi_header->bkd; -+ -+ read_if_version(1, toi_max_workers, "TuxOnIce max workers", -+ goto out_remove_image); -+ -+ /* Read filesystem info */ -+ if (fs_info_load_and_check()) { -+ printk(KERN_EMERG "TuxOnIce: File system mount time checks " -+ "failed. Refusing to corrupt your filesystems!\n"); -+ goto out_remove_image; -+ } -+ -+ /* Read module configurations */ -+ result = read_module_configs(); -+ if (result) { -+ pagedir1.size = 0; -+ pagedir2.size = 0; -+ printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module " -+ "configurations.\n"); -+ clear_action_state(TOI_KEEP_IMAGE); -+ goto out_remove_image; -+ } -+ -+ toi_prepare_console(); -+ -+ set_toi_state(TOI_NOW_RESUMING); -+ -+ if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) { -+ toi_prepare_status(DONT_CLEAR_BAR, "Disable nonboot cpus."); -+ if (disable_nonboot_cpus()) { -+ set_abort_result(TOI_CPU_HOTPLUG_FAILED); -+ goto out_reset_console; -+ } -+ } -+ -+ result = pm_notifier_call_chain(PM_RESTORE_PREPARE); -+ if (result) -+ goto out_notifier_call_chain;; -+ -+ if (usermodehelper_disable()) -+ goto out_enable_nonboot_cpus; -+ -+ current->flags |= PF_NOFREEZE; -+ freeze_result = FREEZE_IN_PROGRESS; -+ -+ schedule_work_on(cpumask_first(cpu_online_mask), &freeze_work); -+ -+ toi_cond_pause(1, "About to read original pageset1 locations."); -+ -+ /* -+ * See _toi_rw_header_chunk in tuxonice_bio.c: -+ * Initialize pageset1_map by reading the map from the image. -+ */ -+ if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk)) -+ goto out_thaw; -+ -+ /* -+ * See toi_rw_cleanup in tuxonice_bio.c: -+ * Clean up after reading the header. -+ */ -+ result = toiActiveAllocator->read_header_cleanup(); -+ if (result) { -+ printk(KERN_ERR "TuxOnIce: Failed to cleanup after reading the " -+ "image header.\n"); -+ goto out_thaw; -+ } -+ -+ toi_cond_pause(1, "About to read pagedir."); -+ -+ /* -+ * Get the addresses of pages into which we will load the kernel to -+ * be copied back and check if they conflict with the ones we are using. -+ */ -+ if (toi_get_pageset1_load_addresses()) { -+ printk(KERN_INFO "TuxOnIce: Failed to get load addresses for " -+ "pageset1.\n"); -+ goto out_thaw; -+ } -+ -+ /* Read the original kernel back */ -+ toi_cond_pause(1, "About to read pageset 1."); -+ -+ /* Given the pagemap, read back the data from disk */ -+ if (read_pageset(&pagedir1, 0)) { -+ toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1."); -+ result = -EIO; -+ goto out_thaw; -+ } -+ -+ toi_cond_pause(1, "About to restore original kernel."); -+ result = 0; -+ -+ if (!test_action_state(TOI_KEEP_IMAGE) && -+ toiActiveAllocator->mark_resume_attempted) -+ toiActiveAllocator->mark_resume_attempted(1); -+ -+ wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS); -+out: -+ current->flags &= ~PF_NOFREEZE; -+ toi_free_page(25, (unsigned long) header_buffer); -+ return result; -+ -+out_thaw: -+ wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS); -+ trap_non_toi_io = 0; -+ thaw_processes(); -+ usermodehelper_enable(); -+out_enable_nonboot_cpus: -+ enable_nonboot_cpus(); -+out_notifier_call_chain: -+ pm_notifier_call_chain(PM_POST_RESTORE); -+out_reset_console: -+ toi_cleanup_console(); -+out_remove_image: -+ result = -EINVAL; -+ if (!test_action_state(TOI_KEEP_IMAGE)) -+ toiActiveAllocator->remove_image(); -+ toiActiveAllocator->read_header_cleanup(); -+ noresume_reset_modules(); -+ goto out; -+} -+ -+/** -+ * read_pageset1 - highlevel function to read the saved pages -+ * -+ * Attempt to read the header and pageset1 of a hibernate image. -+ * Handle the outcome, complaining where appropriate. -+ **/ -+int read_pageset1(void) -+{ -+ int error; -+ -+ error = __read_pageset1(); -+ -+ if (error && error != -ENODATA && error != -EINVAL && -+ !test_result_state(TOI_ABORTED)) -+ abort_hibernate(TOI_IMAGE_ERROR, -+ "TuxOnIce: Error %d resuming\n", error); -+ -+ return error; -+} -+ -+/** -+ * get_have_image_data - check the image header -+ **/ -+static char *get_have_image_data(void) -+{ -+ char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP); -+ struct toi_header *toi_header; -+ -+ if (!output_buffer) { -+ printk(KERN_INFO "Output buffer null.\n"); -+ return NULL; -+ } -+ -+ /* Check for an image */ -+ if (!toiActiveAllocator->image_exists(1) || -+ toiActiveAllocator->read_header_init() || -+ toiActiveAllocator->rw_header_chunk(READ, NULL, -+ output_buffer, sizeof(struct toi_header))) { -+ sprintf(output_buffer, "0\n"); -+ /* -+ * From an initrd/ramfs, catting have_image and -+ * getting a result of 0 is sufficient. -+ */ -+ clear_toi_state(TOI_BOOT_TIME); -+ goto out; -+ } -+ -+ toi_header = (struct toi_header *) output_buffer; -+ -+ sprintf(output_buffer, "1\n%s\n%s\n", -+ toi_header->uts.machine, -+ toi_header->uts.version); -+ -+ /* Check whether we've resumed before */ -+ if (test_toi_state(TOI_RESUMED_BEFORE)) -+ strcat(output_buffer, "Resumed before.\n"); -+ -+out: -+ noresume_reset_modules(); -+ return output_buffer; -+} -+ -+/** -+ * read_pageset2 - read second part of the image -+ * @overwrittenpagesonly: Read only pages which would have been -+ * verwritten by pageset1? -+ * -+ * Read in part or all of pageset2 of an image, depending upon -+ * whether we are hibernating and have only overwritten a portion -+ * with pageset1 pages, or are resuming and need to read them -+ * all. -+ * -+ * Returns: Int -+ * Zero if no error, otherwise the error value. -+ **/ -+int read_pageset2(int overwrittenpagesonly) -+{ -+ int result = 0; -+ -+ if (!pagedir2.size) -+ return 0; -+ -+ result = read_pageset(&pagedir2, overwrittenpagesonly); -+ -+ toi_cond_pause(1, "Pagedir 2 read."); -+ -+ return result; -+} -+ -+/** -+ * image_exists_read - has an image been found? -+ * @page: Output buffer -+ * -+ * Store 0 or 1 in page, depending on whether an image is found. -+ * Incoming buffer is PAGE_SIZE and result is guaranteed -+ * to be far less than that, so we don't worry about -+ * overflow. -+ **/ -+int image_exists_read(const char *page, int count) -+{ -+ int len = 0; -+ char *result; -+ -+ if (toi_activate_storage(0)) -+ return count; -+ -+ if (!test_toi_state(TOI_RESUME_DEVICE_OK)) -+ toi_attempt_to_parse_resume_device(0); -+ -+ if (!toiActiveAllocator) { -+ len = sprintf((char *) page, "-1\n"); -+ } else { -+ result = get_have_image_data(); -+ if (result) { -+ len = sprintf((char *) page, "%s", result); -+ toi_free_page(26, (unsigned long) result); -+ } -+ } -+ -+ toi_deactivate_storage(0); -+ -+ return len; -+} -+ -+/** -+ * image_exists_write - invalidate an image if one exists -+ **/ -+int image_exists_write(const char *buffer, int count) -+{ -+ if (toi_activate_storage(0)) -+ return count; -+ -+ if (toiActiveAllocator && toiActiveAllocator->image_exists(1)) -+ toiActiveAllocator->remove_image(); -+ -+ toi_deactivate_storage(0); -+ -+ clear_result_state(TOI_KEPT_IMAGE); -+ -+ return count; -+} -diff --git a/kernel/power/tuxonice_io.h b/kernel/power/tuxonice_io.h -new file mode 100644 -index 0000000..6f740ca ---- /dev/null -+++ b/kernel/power/tuxonice_io.h -@@ -0,0 +1,74 @@ -+/* -+ * kernel/power/tuxonice_io.h -+ * -+ * Copyright (C) 2005-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * It contains high level IO routines for hibernating. -+ * -+ */ -+ -+#include -+#include "tuxonice_pagedir.h" -+ -+/* Non-module data saved in our image header */ -+struct toi_header { -+ /* -+ * Mirror struct swsusp_info, but without -+ * the page aligned attribute -+ */ -+ struct new_utsname uts; -+ u32 version_code; -+ unsigned long num_physpages; -+ int cpus; -+ unsigned long image_pages; -+ unsigned long pages; -+ unsigned long size; -+ -+ /* Our own data */ -+ unsigned long orig_mem_free; -+ int page_size; -+ int pageset_2_size; -+ int param0; -+ int param1; -+ int param2; -+ int param3; -+ int progress0; -+ int progress1; -+ int progress2; -+ int progress3; -+ int io_time[2][2]; -+ struct pagedir pagedir; -+ dev_t root_fs; -+ unsigned long bkd; /* Boot kernel data locn */ -+}; -+ -+extern int write_pageset(struct pagedir *pagedir); -+extern int write_image_header(void); -+extern int read_pageset1(void); -+extern int read_pageset2(int overwrittenpagesonly); -+ -+extern int toi_attempt_to_parse_resume_device(int quiet); -+extern void attempt_to_parse_resume_device2(void); -+extern void attempt_to_parse_alt_resume_param(void); -+int image_exists_read(const char *page, int count); -+int image_exists_write(const char *buffer, int count); -+extern void save_restore_alt_param(int replace, int quiet); -+extern atomic_t toi_io_workers; -+ -+/* Args to save_restore_alt_param */ -+#define RESTORE 0 -+#define SAVE 1 -+ -+#define NOQUIET 0 -+#define QUIET 1 -+ -+extern dev_t name_to_dev_t(char *line); -+ -+extern wait_queue_head_t toi_io_queue_flusher; -+extern int toi_bio_queue_flusher_should_finish; -+ -+int fs_info_space_needed(void); -+ -+extern int toi_max_workers; -diff --git a/kernel/power/tuxonice_modules.c b/kernel/power/tuxonice_modules.c -new file mode 100644 -index 0000000..9e794cb ---- /dev/null -+++ b/kernel/power/tuxonice_modules.c -@@ -0,0 +1,522 @@ -+/* -+ * kernel/power/tuxonice_modules.c -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ */ -+ -+#include -+#include "tuxonice.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_ui.h" -+ -+LIST_HEAD(toi_filters); -+LIST_HEAD(toiAllocators); -+ -+LIST_HEAD(toi_modules); -+EXPORT_SYMBOL_GPL(toi_modules); -+ -+struct toi_module_ops *toiActiveAllocator; -+EXPORT_SYMBOL_GPL(toiActiveAllocator); -+ -+static int toi_num_filters; -+int toiNumAllocators, toi_num_modules; -+ -+/* -+ * toi_header_storage_for_modules -+ * -+ * Returns the amount of space needed to store configuration -+ * data needed by the modules prior to copying back the original -+ * kernel. We can exclude data for pageset2 because it will be -+ * available anyway once the kernel is copied back. -+ */ -+long toi_header_storage_for_modules(void) -+{ -+ struct toi_module_ops *this_module; -+ int bytes = 0; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled || -+ (this_module->type == WRITER_MODULE && -+ toiActiveAllocator != this_module)) -+ continue; -+ if (this_module->storage_needed) { -+ int this = this_module->storage_needed() + -+ sizeof(struct toi_module_header) + -+ sizeof(int); -+ this_module->header_requested = this; -+ bytes += this; -+ } -+ } -+ -+ /* One more for the empty terminator */ -+ return bytes + sizeof(struct toi_module_header); -+} -+ -+void print_toi_header_storage_for_modules(void) -+{ -+ struct toi_module_ops *this_module; -+ int bytes = 0; -+ -+ printk(KERN_DEBUG "Header storage:\n"); -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled || -+ (this_module->type == WRITER_MODULE && -+ toiActiveAllocator != this_module)) -+ continue; -+ if (this_module->storage_needed) { -+ int this = this_module->storage_needed() + -+ sizeof(struct toi_module_header) + -+ sizeof(int); -+ this_module->header_requested = this; -+ bytes += this; -+ printk(KERN_DEBUG "+ %16s : %-4d/%d.\n", -+ this_module->name, -+ this_module->header_used, this); -+ } -+ } -+ -+ printk(KERN_DEBUG "+ empty terminator : %zu.\n", -+ sizeof(struct toi_module_header)); -+ printk(KERN_DEBUG " ====\n"); -+ printk(KERN_DEBUG " %zu\n", -+ bytes + sizeof(struct toi_module_header)); -+} -+EXPORT_SYMBOL_GPL(print_toi_header_storage_for_modules); -+ -+/* -+ * toi_memory_for_modules -+ * -+ * Returns the amount of memory requested by modules for -+ * doing their work during the cycle. -+ */ -+ -+long toi_memory_for_modules(int print_parts) -+{ -+ long bytes = 0, result; -+ struct toi_module_ops *this_module; -+ -+ if (print_parts) -+ printk(KERN_INFO "Memory for modules:\n===================\n"); -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ int this; -+ if (!this_module->enabled) -+ continue; -+ if (this_module->memory_needed) { -+ this = this_module->memory_needed(); -+ if (print_parts) -+ printk(KERN_INFO "%10d bytes (%5ld pages) for " -+ "module '%s'.\n", this, -+ DIV_ROUND_UP(this, PAGE_SIZE), -+ this_module->name); -+ bytes += this; -+ } -+ } -+ -+ result = DIV_ROUND_UP(bytes, PAGE_SIZE); -+ if (print_parts) -+ printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result); -+ -+ return result; -+} -+ -+/* -+ * toi_expected_compression_ratio -+ * -+ * Returns the compression ratio expected when saving the image. -+ */ -+ -+int toi_expected_compression_ratio(void) -+{ -+ int ratio = 100; -+ struct toi_module_ops *this_module; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled) -+ continue; -+ if (this_module->expected_compression) -+ ratio = ratio * this_module->expected_compression() -+ / 100; -+ } -+ -+ return ratio; -+} -+ -+/* toi_find_module_given_dir -+ * Functionality : Return a module (if found), given a pointer -+ * to its directory name -+ */ -+ -+static struct toi_module_ops *toi_find_module_given_dir(char *name) -+{ -+ struct toi_module_ops *this_module, *found_module = NULL; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!strcmp(name, this_module->directory)) { -+ found_module = this_module; -+ break; -+ } -+ } -+ -+ return found_module; -+} -+ -+/* toi_find_module_given_name -+ * Functionality : Return a module (if found), given a pointer -+ * to its name -+ */ -+ -+struct toi_module_ops *toi_find_module_given_name(char *name) -+{ -+ struct toi_module_ops *this_module, *found_module = NULL; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!strcmp(name, this_module->name)) { -+ found_module = this_module; -+ break; -+ } -+ } -+ -+ return found_module; -+} -+ -+/* -+ * toi_print_module_debug_info -+ * Functionality : Get debugging info from modules into a buffer. -+ */ -+int toi_print_module_debug_info(char *buffer, int buffer_size) -+{ -+ struct toi_module_ops *this_module; -+ int len = 0; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled) -+ continue; -+ if (this_module->print_debug_info) { -+ int result; -+ result = this_module->print_debug_info(buffer + len, -+ buffer_size - len); -+ len += result; -+ } -+ } -+ -+ /* Ensure null terminated */ -+ buffer[buffer_size] = 0; -+ -+ return len; -+} -+ -+/* -+ * toi_register_module -+ * -+ * Register a module. -+ */ -+int toi_register_module(struct toi_module_ops *module) -+{ -+ int i; -+ struct kobject *kobj; -+ -+ module->enabled = 1; -+ -+ if (toi_find_module_given_name(module->name)) { -+ printk(KERN_INFO "TuxOnIce: Trying to load module %s," -+ " which is already registered.\n", -+ module->name); -+ return -EBUSY; -+ } -+ -+ switch (module->type) { -+ case FILTER_MODULE: -+ list_add_tail(&module->type_list, &toi_filters); -+ toi_num_filters++; -+ break; -+ case WRITER_MODULE: -+ list_add_tail(&module->type_list, &toiAllocators); -+ toiNumAllocators++; -+ break; -+ case MISC_MODULE: -+ case MISC_HIDDEN_MODULE: -+ case BIO_ALLOCATOR_MODULE: -+ break; -+ default: -+ printk(KERN_ERR "Hmmm. Module '%s' has an invalid type." -+ " It has been ignored.\n", module->name); -+ return -EINVAL; -+ } -+ list_add_tail(&module->module_list, &toi_modules); -+ toi_num_modules++; -+ -+ if ((!module->directory && !module->shared_directory) || -+ !module->sysfs_data || !module->num_sysfs_entries) -+ return 0; -+ -+ /* -+ * Modules may share a directory, but those with shared_dir -+ * set must be loaded (via symbol dependencies) after parents -+ * and unloaded beforehand. -+ */ -+ if (module->shared_directory) { -+ struct toi_module_ops *shared = -+ toi_find_module_given_dir(module->shared_directory); -+ if (!shared) { -+ printk(KERN_ERR "TuxOnIce: Module %s wants to share " -+ "%s's directory but %s isn't loaded.\n", -+ module->name, module->shared_directory, -+ module->shared_directory); -+ toi_unregister_module(module); -+ return -ENODEV; -+ } -+ kobj = shared->dir_kobj; -+ } else { -+ if (!strncmp(module->directory, "[ROOT]", 6)) -+ kobj = tuxonice_kobj; -+ else -+ kobj = make_toi_sysdir(module->directory); -+ } -+ module->dir_kobj = kobj; -+ for (i = 0; i < module->num_sysfs_entries; i++) { -+ int result = toi_register_sysfs_file(kobj, -+ &module->sysfs_data[i]); -+ if (result) -+ return result; -+ } -+ return 0; -+} -+EXPORT_SYMBOL_GPL(toi_register_module); -+ -+/* -+ * toi_unregister_module -+ * -+ * Remove a module. -+ */ -+void toi_unregister_module(struct toi_module_ops *module) -+{ -+ int i; -+ -+ if (module->dir_kobj) -+ for (i = 0; i < module->num_sysfs_entries; i++) -+ toi_unregister_sysfs_file(module->dir_kobj, -+ &module->sysfs_data[i]); -+ -+ if (!module->shared_directory && module->directory && -+ strncmp(module->directory, "[ROOT]", 6)) -+ remove_toi_sysdir(module->dir_kobj); -+ -+ switch (module->type) { -+ case FILTER_MODULE: -+ list_del(&module->type_list); -+ toi_num_filters--; -+ break; -+ case WRITER_MODULE: -+ list_del(&module->type_list); -+ toiNumAllocators--; -+ if (toiActiveAllocator == module) { -+ toiActiveAllocator = NULL; -+ clear_toi_state(TOI_CAN_RESUME); -+ clear_toi_state(TOI_CAN_HIBERNATE); -+ } -+ break; -+ case MISC_MODULE: -+ case MISC_HIDDEN_MODULE: -+ case BIO_ALLOCATOR_MODULE: -+ break; -+ default: -+ printk(KERN_ERR "Module '%s' has an invalid type." -+ " It has been ignored.\n", module->name); -+ return; -+ } -+ list_del(&module->module_list); -+ toi_num_modules--; -+} -+EXPORT_SYMBOL_GPL(toi_unregister_module); -+ -+/* -+ * toi_move_module_tail -+ * -+ * Rearrange modules when reloading the config. -+ */ -+void toi_move_module_tail(struct toi_module_ops *module) -+{ -+ switch (module->type) { -+ case FILTER_MODULE: -+ if (toi_num_filters > 1) -+ list_move_tail(&module->type_list, &toi_filters); -+ break; -+ case WRITER_MODULE: -+ if (toiNumAllocators > 1) -+ list_move_tail(&module->type_list, &toiAllocators); -+ break; -+ case MISC_MODULE: -+ case MISC_HIDDEN_MODULE: -+ case BIO_ALLOCATOR_MODULE: -+ break; -+ default: -+ printk(KERN_ERR "Module '%s' has an invalid type." -+ " It has been ignored.\n", module->name); -+ return; -+ } -+ if ((toi_num_filters + toiNumAllocators) > 1) -+ list_move_tail(&module->module_list, &toi_modules); -+} -+ -+/* -+ * toi_initialise_modules -+ * -+ * Get ready to do some work! -+ */ -+int toi_initialise_modules(int starting_cycle, int early) -+{ -+ struct toi_module_ops *this_module; -+ int result; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ this_module->header_requested = 0; -+ this_module->header_used = 0; -+ if (!this_module->enabled) -+ continue; -+ if (this_module->early != early) -+ continue; -+ if (this_module->initialise) { -+ result = this_module->initialise(starting_cycle); -+ if (result) { -+ toi_cleanup_modules(starting_cycle); -+ return result; -+ } -+ this_module->initialised = 1; -+ } -+ } -+ -+ return 0; -+} -+ -+/* -+ * toi_cleanup_modules -+ * -+ * Tell modules the work is done. -+ */ -+void toi_cleanup_modules(int finishing_cycle) -+{ -+ struct toi_module_ops *this_module; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (!this_module->enabled || !this_module->initialised) -+ continue; -+ if (this_module->cleanup) -+ this_module->cleanup(finishing_cycle); -+ this_module->initialised = 0; -+ } -+} -+ -+/* -+ * toi_pre_atomic_restore_modules -+ * -+ * Get ready to do some work! -+ */ -+void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd) -+{ -+ struct toi_module_ops *this_module; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (this_module->enabled && this_module->pre_atomic_restore) -+ this_module->pre_atomic_restore(bkd); -+ } -+} -+ -+/* -+ * toi_post_atomic_restore_modules -+ * -+ * Get ready to do some work! -+ */ -+void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd) -+{ -+ struct toi_module_ops *this_module; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (this_module->enabled && this_module->post_atomic_restore) -+ this_module->post_atomic_restore(bkd); -+ } -+} -+ -+/* -+ * toi_get_next_filter -+ * -+ * Get the next filter in the pipeline. -+ */ -+struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought) -+{ -+ struct toi_module_ops *last_filter = NULL, *this_filter = NULL; -+ -+ list_for_each_entry(this_filter, &toi_filters, type_list) { -+ if (!this_filter->enabled) -+ continue; -+ if ((last_filter == filter_sought) || (!filter_sought)) -+ return this_filter; -+ last_filter = this_filter; -+ } -+ -+ return toiActiveAllocator; -+} -+EXPORT_SYMBOL_GPL(toi_get_next_filter); -+ -+/** -+ * toi_show_modules: Printk what support is loaded. -+ */ -+void toi_print_modules(void) -+{ -+ struct toi_module_ops *this_module; -+ int prev = 0; -+ -+ printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION ", with support for"); -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ if (this_module->type == MISC_HIDDEN_MODULE) -+ continue; -+ printk("%s %s%s%s", prev ? "," : "", -+ this_module->enabled ? "" : "[", -+ this_module->name, -+ this_module->enabled ? "" : "]"); -+ prev = 1; -+ } -+ -+ printk(".\n"); -+} -+ -+/* toi_get_modules -+ * -+ * Take a reference to modules so they can't go away under us. -+ */ -+ -+int toi_get_modules(void) -+{ -+ struct toi_module_ops *this_module; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) { -+ struct toi_module_ops *this_module2; -+ -+ if (try_module_get(this_module->module)) -+ continue; -+ -+ /* Failed! Reverse gets and return error */ -+ list_for_each_entry(this_module2, &toi_modules, -+ module_list) { -+ if (this_module == this_module2) -+ return -EINVAL; -+ module_put(this_module2->module); -+ } -+ } -+ return 0; -+} -+ -+/* toi_put_modules -+ * -+ * Release our references to modules we used. -+ */ -+ -+void toi_put_modules(void) -+{ -+ struct toi_module_ops *this_module; -+ -+ list_for_each_entry(this_module, &toi_modules, module_list) -+ module_put(this_module->module); -+} -diff --git a/kernel/power/tuxonice_modules.h b/kernel/power/tuxonice_modules.h -new file mode 100644 -index 0000000..d488572 ---- /dev/null -+++ b/kernel/power/tuxonice_modules.h -@@ -0,0 +1,211 @@ -+/* -+ * kernel/power/tuxonice_modules.h -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * It contains declarations for modules. Modules are additions to -+ * TuxOnIce that provide facilities such as image compression or -+ * encryption, backends for storage of the image and user interfaces. -+ * -+ */ -+ -+#ifndef TOI_MODULES_H -+#define TOI_MODULES_H -+ -+/* This is the maximum size we store in the image header for a module name */ -+#define TOI_MAX_MODULE_NAME_LENGTH 30 -+ -+struct toi_boot_kernel_data; -+ -+/* Per-module metadata */ -+struct toi_module_header { -+ char name[TOI_MAX_MODULE_NAME_LENGTH]; -+ int enabled; -+ int type; -+ int index; -+ int data_length; -+ unsigned long signature; -+}; -+ -+enum { -+ FILTER_MODULE, -+ WRITER_MODULE, -+ BIO_ALLOCATOR_MODULE, -+ MISC_MODULE, -+ MISC_HIDDEN_MODULE, -+}; -+ -+enum { -+ TOI_ASYNC, -+ TOI_SYNC -+}; -+ -+enum { -+ TOI_VIRT, -+ TOI_PAGE, -+}; -+ -+#define TOI_MAP(type, addr) \ -+ (type == TOI_PAGE ? kmap(addr) : addr) -+ -+#define TOI_UNMAP(type, addr) \ -+ do { \ -+ if (type == TOI_PAGE) \ -+ kunmap(addr); \ -+ } while(0) -+ -+struct toi_module_ops { -+ /* Functions common to all modules */ -+ int type; -+ char *name; -+ char *directory; -+ char *shared_directory; -+ struct kobject *dir_kobj; -+ struct module *module; -+ int enabled, early, initialised; -+ struct list_head module_list; -+ -+ /* List of filters or allocators */ -+ struct list_head list, type_list; -+ -+ /* -+ * Requirements for memory and storage in -+ * the image header.. -+ */ -+ int (*memory_needed) (void); -+ int (*storage_needed) (void); -+ -+ int header_requested, header_used; -+ -+ int (*expected_compression) (void); -+ -+ /* -+ * Debug info -+ */ -+ int (*print_debug_info) (char *buffer, int size); -+ int (*save_config_info) (char *buffer); -+ void (*load_config_info) (char *buffer, int len); -+ -+ /* -+ * Initialise & cleanup - general routines called -+ * at the start and end of a cycle. -+ */ -+ int (*initialise) (int starting_cycle); -+ void (*cleanup) (int finishing_cycle); -+ -+ void (*pre_atomic_restore) (struct toi_boot_kernel_data *bkd); -+ void (*post_atomic_restore) (struct toi_boot_kernel_data *bkd); -+ -+ /* -+ * Calls for allocating storage (allocators only). -+ * -+ * Header space is requested separately and cannot fail, but the -+ * reservation is only applied when main storage is allocated. -+ * The header space reservation is thus always set prior to -+ * requesting the allocation of storage - and prior to querying -+ * how much storage is available. -+ */ -+ -+ unsigned long (*storage_available) (void); -+ void (*reserve_header_space) (unsigned long space_requested); -+ int (*register_storage) (void); -+ int (*allocate_storage) (unsigned long space_requested); -+ unsigned long (*storage_allocated) (void); -+ -+ /* -+ * Routines used in image I/O. -+ */ -+ int (*rw_init) (int rw, int stream_number); -+ int (*rw_cleanup) (int rw); -+ int (*write_page) (unsigned long index, int buf_type, void *buf, -+ unsigned int buf_size); -+ int (*read_page) (unsigned long *index, int buf_type, void *buf, -+ unsigned int *buf_size); -+ int (*io_flusher) (int rw); -+ -+ /* Reset module if image exists but reading aborted */ -+ void (*noresume_reset) (void); -+ -+ /* Read and write the metadata */ -+ int (*write_header_init) (void); -+ int (*write_header_cleanup) (void); -+ -+ int (*read_header_init) (void); -+ int (*read_header_cleanup) (void); -+ -+ /* To be called after read_header_init */ -+ int (*get_header_version) (void); -+ -+ int (*rw_header_chunk) (int rw, struct toi_module_ops *owner, -+ char *buffer_start, int buffer_size); -+ -+ int (*rw_header_chunk_noreadahead) (int rw, -+ struct toi_module_ops *owner, char *buffer_start, -+ int buffer_size); -+ -+ /* Attempt to parse an image location */ -+ int (*parse_sig_location) (char *buffer, int only_writer, int quiet); -+ -+ /* Throttle I/O according to throughput */ -+ void (*update_throughput_throttle) (int jif_index); -+ -+ /* Flush outstanding I/O */ -+ int (*finish_all_io) (void); -+ -+ /* Determine whether image exists that we can restore */ -+ int (*image_exists) (int quiet); -+ -+ /* Mark the image as having tried to resume */ -+ int (*mark_resume_attempted) (int); -+ -+ /* Destroy image if one exists */ -+ int (*remove_image) (void); -+ -+ /* Sysfs Data */ -+ struct toi_sysfs_data *sysfs_data; -+ int num_sysfs_entries; -+ -+ /* Block I/O allocator */ -+ struct toi_bio_allocator_ops *bio_allocator_ops; -+}; -+ -+extern int toi_num_modules, toiNumAllocators; -+ -+extern struct toi_module_ops *toiActiveAllocator; -+extern struct list_head toi_filters, toiAllocators, toi_modules; -+ -+extern void toi_prepare_console_modules(void); -+extern void toi_cleanup_console_modules(void); -+ -+extern struct toi_module_ops *toi_find_module_given_name(char *name); -+extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *); -+ -+extern int toi_register_module(struct toi_module_ops *module); -+extern void toi_move_module_tail(struct toi_module_ops *module); -+ -+extern long toi_header_storage_for_modules(void); -+extern long toi_memory_for_modules(int print_parts); -+extern void print_toi_header_storage_for_modules(void); -+extern int toi_expected_compression_ratio(void); -+ -+extern int toi_print_module_debug_info(char *buffer, int buffer_size); -+extern int toi_register_module(struct toi_module_ops *module); -+extern void toi_unregister_module(struct toi_module_ops *module); -+ -+extern int toi_initialise_modules(int starting_cycle, int early); -+#define toi_initialise_modules_early(starting) \ -+ toi_initialise_modules(starting, 1) -+#define toi_initialise_modules_late(starting) \ -+ toi_initialise_modules(starting, 0) -+extern void toi_cleanup_modules(int finishing_cycle); -+ -+extern void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd); -+extern void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd); -+ -+extern void toi_print_modules(void); -+ -+int toi_get_modules(void); -+void toi_put_modules(void); -+#endif -diff --git a/kernel/power/tuxonice_netlink.c b/kernel/power/tuxonice_netlink.c -new file mode 100644 -index 0000000..0a40aa8 ---- /dev/null -+++ b/kernel/power/tuxonice_netlink.c -@@ -0,0 +1,329 @@ -+/* -+ * kernel/power/tuxonice_netlink.c -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Functions for communicating with a userspace helper via netlink. -+ */ -+ -+ -+#include -+#include -+#include "tuxonice_netlink.h" -+#include "tuxonice.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_builtin.h" -+ -+static struct user_helper_data *uhd_list; -+ -+/* -+ * Refill our pool of SKBs for use in emergencies (eg, when eating memory and -+ * none can be allocated). -+ */ -+static void toi_fill_skb_pool(struct user_helper_data *uhd) -+{ -+ while (uhd->pool_level < uhd->pool_limit) { -+ struct sk_buff *new_skb = -+ alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP); -+ -+ if (!new_skb) -+ break; -+ -+ new_skb->next = uhd->emerg_skbs; -+ uhd->emerg_skbs = new_skb; -+ uhd->pool_level++; -+ } -+} -+ -+/* -+ * Try to allocate a single skb. If we can't get one, try to use one from -+ * our pool. -+ */ -+static struct sk_buff *toi_get_skb(struct user_helper_data *uhd) -+{ -+ struct sk_buff *skb = -+ alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP); -+ -+ if (skb) -+ return skb; -+ -+ skb = uhd->emerg_skbs; -+ if (skb) { -+ uhd->pool_level--; -+ uhd->emerg_skbs = skb->next; -+ skb->next = NULL; -+ } -+ -+ return skb; -+} -+ -+void toi_send_netlink_message(struct user_helper_data *uhd, -+ int type, void *params, size_t len) -+{ -+ struct sk_buff *skb; -+ struct nlmsghdr *nlh; -+ void *dest; -+ struct task_struct *t; -+ -+ if (uhd->pid == -1) -+ return; -+ -+ if (uhd->debug) -+ printk(KERN_ERR "toi_send_netlink_message: Send " -+ "message type %d.\n", type); -+ -+ skb = toi_get_skb(uhd); -+ if (!skb) { -+ printk(KERN_INFO "toi_netlink: Can't allocate skb!\n"); -+ return; -+ } -+ -+ nlh = nlmsg_put(skb, 0, uhd->sock_seq, type, len, 0); -+ uhd->sock_seq++; -+ -+ dest = NLMSG_DATA(nlh); -+ if (params && len > 0) -+ memcpy(dest, params, len); -+ -+ netlink_unicast(uhd->nl, skb, uhd->pid, 0); -+ -+ toi_read_lock_tasklist(); -+ t = find_task_by_pid_ns(uhd->pid, &init_pid_ns); -+ if (!t) { -+ toi_read_unlock_tasklist(); -+ if (uhd->pid > -1) -+ printk(KERN_INFO "Hmm. Can't find the userspace task" -+ " %d.\n", uhd->pid); -+ return; -+ } -+ wake_up_process(t); -+ toi_read_unlock_tasklist(); -+ -+ yield(); -+} -+EXPORT_SYMBOL_GPL(toi_send_netlink_message); -+ -+static void send_whether_debugging(struct user_helper_data *uhd) -+{ -+ static u8 is_debugging = 1; -+ -+ toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING, -+ &is_debugging, sizeof(u8)); -+} -+ -+/* -+ * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we -+ * are hibernating. -+ */ -+static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid) -+{ -+ struct task_struct *t; -+ -+ if (uhd->debug) -+ printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid); -+ -+ toi_read_lock_tasklist(); -+ t = find_task_by_pid_ns(pid, &init_pid_ns); -+ if (!t) { -+ toi_read_unlock_tasklist(); -+ printk(KERN_INFO "Strange. Can't find the userspace task %d.\n", -+ pid); -+ return -EINVAL; -+ } -+ -+ t->flags |= PF_NOFREEZE; -+ -+ toi_read_unlock_tasklist(); -+ uhd->pid = pid; -+ -+ toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0); -+ -+ return 0; -+} -+ -+/* -+ * Called when the userspace process has informed us that it's ready to roll. -+ */ -+static int nl_ready(struct user_helper_data *uhd, u32 version) -+{ -+ if (version != uhd->interface_version) { -+ printk(KERN_INFO "%s userspace process using invalid interface" -+ " version (%d - kernel wants %d). Trying to " -+ "continue without it.\n", -+ uhd->name, version, uhd->interface_version); -+ if (uhd->not_ready) -+ uhd->not_ready(); -+ return -EINVAL; -+ } -+ -+ complete(&uhd->wait_for_process); -+ -+ return 0; -+} -+ -+void toi_netlink_close_complete(struct user_helper_data *uhd) -+{ -+ if (uhd->nl) { -+ netlink_kernel_release(uhd->nl); -+ uhd->nl = NULL; -+ } -+ -+ while (uhd->emerg_skbs) { -+ struct sk_buff *next = uhd->emerg_skbs->next; -+ kfree_skb(uhd->emerg_skbs); -+ uhd->emerg_skbs = next; -+ } -+ -+ uhd->pid = -1; -+} -+EXPORT_SYMBOL_GPL(toi_netlink_close_complete); -+ -+static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd, -+ struct sk_buff *skb, struct nlmsghdr *nlh) -+{ -+ int type = nlh->nlmsg_type; -+ int *data; -+ int err; -+ -+ if (uhd->debug) -+ printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n", -+ type); -+ -+ /* Let the more specific handler go first. It returns -+ * 1 for valid messages that it doesn't know. */ -+ err = uhd->rcv_msg(skb, nlh); -+ if (err != 1) -+ return err; -+ -+ /* Only allow one task to receive NOFREEZE privileges */ -+ if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) { -+ printk(KERN_INFO "Received extra nofreeze me requests.\n"); -+ return -EBUSY; -+ } -+ -+ data = NLMSG_DATA(nlh); -+ -+ switch (type) { -+ case NETLINK_MSG_NOFREEZE_ME: -+ return nl_set_nofreeze(uhd, nlh->nlmsg_pid); -+ case NETLINK_MSG_GET_DEBUGGING: -+ send_whether_debugging(uhd); -+ return 0; -+ case NETLINK_MSG_READY: -+ if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) { -+ printk(KERN_INFO "Invalid ready mesage.\n"); -+ if (uhd->not_ready) -+ uhd->not_ready(); -+ return -EINVAL; -+ } -+ return nl_ready(uhd, (u32) *data); -+ case NETLINK_MSG_CLEANUP: -+ toi_netlink_close_complete(uhd); -+ return 0; -+ } -+ -+ return -EINVAL; -+} -+ -+static void toi_user_rcv_skb(struct sk_buff *skb) -+{ -+ int err; -+ struct nlmsghdr *nlh; -+ struct user_helper_data *uhd = uhd_list; -+ -+ while (uhd && uhd->netlink_id != skb->sk->sk_protocol) -+ uhd = uhd->next; -+ -+ if (!uhd) -+ return; -+ -+ while (skb->len >= NLMSG_SPACE(0)) { -+ u32 rlen; -+ -+ nlh = (struct nlmsghdr *) skb->data; -+ if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) -+ return; -+ -+ rlen = NLMSG_ALIGN(nlh->nlmsg_len); -+ if (rlen > skb->len) -+ rlen = skb->len; -+ -+ err = toi_nl_gen_rcv_msg(uhd, skb, nlh); -+ if (err) -+ netlink_ack(skb, nlh, err); -+ else if (nlh->nlmsg_flags & NLM_F_ACK) -+ netlink_ack(skb, nlh, 0); -+ skb_pull(skb, rlen); -+ } -+} -+ -+static int netlink_prepare(struct user_helper_data *uhd) -+{ -+ struct netlink_kernel_cfg cfg = { -+ .groups = 0, -+ .input = toi_user_rcv_skb, -+ }; -+ -+ uhd->next = uhd_list; -+ uhd_list = uhd; -+ -+ uhd->sock_seq = 0x42c0ffee; -+ uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, &cfg); -+ if (!uhd->nl) { -+ printk(KERN_INFO "Failed to allocate netlink socket for %s.\n", -+ uhd->name); -+ return -ENOMEM; -+ } -+ -+ toi_fill_skb_pool(uhd); -+ -+ return 0; -+} -+ -+void toi_netlink_close(struct user_helper_data *uhd) -+{ -+ struct task_struct *t; -+ -+ toi_read_lock_tasklist(); -+ t = find_task_by_pid_ns(uhd->pid, &init_pid_ns); -+ if (t) -+ t->flags &= ~PF_NOFREEZE; -+ toi_read_unlock_tasklist(); -+ -+ toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0); -+} -+EXPORT_SYMBOL_GPL(toi_netlink_close); -+ -+int toi_netlink_setup(struct user_helper_data *uhd) -+{ -+ /* In case userui didn't cleanup properly on us */ -+ toi_netlink_close_complete(uhd); -+ -+ if (netlink_prepare(uhd) < 0) { -+ printk(KERN_INFO "Netlink prepare failed.\n"); -+ return 1; -+ } -+ -+ if (toi_launch_userspace_program(uhd->program, uhd->netlink_id, -+ UMH_WAIT_EXEC, uhd->debug) < 0) { -+ printk(KERN_INFO "Launch userspace program failed.\n"); -+ toi_netlink_close_complete(uhd); -+ return 1; -+ } -+ -+ /* Wait 2 seconds for the userspace process to make contact */ -+ wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ); -+ -+ if (uhd->pid == -1) { -+ printk(KERN_INFO "%s: Failed to contact userspace process.\n", -+ uhd->name); -+ toi_netlink_close_complete(uhd); -+ return 1; -+ } -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(toi_netlink_setup); -diff --git a/kernel/power/tuxonice_netlink.h b/kernel/power/tuxonice_netlink.h -new file mode 100644 -index 0000000..952f67b ---- /dev/null -+++ b/kernel/power/tuxonice_netlink.h -@@ -0,0 +1,62 @@ -+/* -+ * kernel/power/tuxonice_netlink.h -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Declarations for functions for communicating with a userspace helper -+ * via netlink. -+ */ -+ -+#include -+#include -+ -+#define NETLINK_MSG_BASE 0x10 -+ -+#define NETLINK_MSG_READY 0x10 -+#define NETLINK_MSG_NOFREEZE_ME 0x16 -+#define NETLINK_MSG_GET_DEBUGGING 0x19 -+#define NETLINK_MSG_CLEANUP 0x24 -+#define NETLINK_MSG_NOFREEZE_ACK 0x27 -+#define NETLINK_MSG_IS_DEBUGGING 0x28 -+ -+struct user_helper_data { -+ int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh); -+ void (*not_ready) (void); -+ struct sock *nl; -+ u32 sock_seq; -+ pid_t pid; -+ char *comm; -+ char program[256]; -+ int pool_level; -+ int pool_limit; -+ struct sk_buff *emerg_skbs; -+ int skb_size; -+ int netlink_id; -+ char *name; -+ struct user_helper_data *next; -+ struct completion wait_for_process; -+ u32 interface_version; -+ int must_init; -+ int debug; -+}; -+ -+#ifdef CONFIG_NET -+int toi_netlink_setup(struct user_helper_data *uhd); -+void toi_netlink_close(struct user_helper_data *uhd); -+void toi_send_netlink_message(struct user_helper_data *uhd, -+ int type, void *params, size_t len); -+void toi_netlink_close_complete(struct user_helper_data *uhd); -+#else -+static inline int toi_netlink_setup(struct user_helper_data *uhd) -+{ -+ return 0; -+} -+ -+static inline void toi_netlink_close(struct user_helper_data *uhd) { }; -+static inline void toi_send_netlink_message(struct user_helper_data *uhd, -+ int type, void *params, size_t len) { }; -+static inline void toi_netlink_close_complete(struct user_helper_data *uhd) -+ { }; -+#endif -diff --git a/kernel/power/tuxonice_pagedir.c b/kernel/power/tuxonice_pagedir.c -new file mode 100644 -index 0000000..6934114 ---- /dev/null -+++ b/kernel/power/tuxonice_pagedir.c -@@ -0,0 +1,346 @@ -+/* -+ * kernel/power/tuxonice_pagedir.c -+ * -+ * Copyright (C) 1998-2001 Gabor Kuti -+ * Copyright (C) 1998,2001,2002 Pavel Machek -+ * Copyright (C) 2002-2003 Florent Chabaud -+ * Copyright (C) 2006-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Routines for handling pagesets. -+ * Note that pbes aren't actually stored as such. They're stored as -+ * bitmaps and extents. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "tuxonice_pageflags.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_pagedir.h" -+#include "tuxonice_prepare_image.h" -+#include "tuxonice.h" -+#include "tuxonice_builtin.h" -+#include "tuxonice_alloc.h" -+ -+static int ptoi_pfn; -+static struct pbe *this_low_pbe; -+static struct pbe **last_low_pbe_ptr; -+ -+void toi_reset_alt_image_pageset2_pfn(void) -+{ -+ memory_bm_position_reset(pageset2_map); -+} -+ -+static struct page *first_conflicting_page; -+ -+/* -+ * free_conflicting_pages -+ */ -+ -+static void free_conflicting_pages(void) -+{ -+ while (first_conflicting_page) { -+ struct page *next = -+ *((struct page **) kmap(first_conflicting_page)); -+ kunmap(first_conflicting_page); -+ toi__free_page(29, first_conflicting_page); -+ first_conflicting_page = next; -+ } -+} -+ -+/* __toi_get_nonconflicting_page -+ * -+ * Description: Gets order zero pages that won't be overwritten -+ * while copying the original pages. -+ */ -+ -+struct page *___toi_get_nonconflicting_page(int can_be_highmem) -+{ -+ struct page *page; -+ gfp_t flags = TOI_ATOMIC_GFP; -+ if (can_be_highmem) -+ flags |= __GFP_HIGHMEM; -+ -+ -+ if (test_toi_state(TOI_LOADING_ALT_IMAGE) && -+ pageset2_map && -+ (ptoi_pfn != BM_END_OF_MAP)) { -+ do { -+ ptoi_pfn = memory_bm_next_pfn(pageset2_map); -+ if (ptoi_pfn != BM_END_OF_MAP) { -+ page = pfn_to_page(ptoi_pfn); -+ if (!PagePageset1(page) && -+ (can_be_highmem || !PageHighMem(page))) -+ return page; -+ } -+ } while (ptoi_pfn != BM_END_OF_MAP); -+ } -+ -+ do { -+ page = toi_alloc_page(29, flags); -+ if (!page) { -+ printk(KERN_INFO "Failed to get nonconflicting " -+ "page.\n"); -+ return NULL; -+ } -+ if (PagePageset1(page)) { -+ struct page **next = (struct page **) kmap(page); -+ *next = first_conflicting_page; -+ first_conflicting_page = page; -+ kunmap(page); -+ } -+ } while (PagePageset1(page)); -+ -+ return page; -+} -+ -+unsigned long __toi_get_nonconflicting_page(void) -+{ -+ struct page *page = ___toi_get_nonconflicting_page(0); -+ return page ? (unsigned long) page_address(page) : 0; -+} -+ -+static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe, -+ int highmem) -+{ -+ if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1)) -+ + 2 * sizeof(struct pbe)) > PAGE_SIZE) { -+ struct page *new_page = -+ ___toi_get_nonconflicting_page(highmem); -+ if (!new_page) -+ return ERR_PTR(-ENOMEM); -+ this_pbe = (struct pbe *) kmap(new_page); -+ memset(this_pbe, 0, PAGE_SIZE); -+ *page_ptr = new_page; -+ } else -+ this_pbe++; -+ -+ return this_pbe; -+} -+ -+/** -+ * get_pageset1_load_addresses - generate pbes for conflicting pages -+ * -+ * We check here that pagedir & pages it points to won't collide -+ * with pages where we're going to restore from the loaded pages -+ * later. -+ * -+ * Returns: -+ * Zero on success, one if couldn't find enough pages (shouldn't -+ * happen). -+ **/ -+int toi_get_pageset1_load_addresses(void) -+{ -+ int pfn, highallocd = 0, lowallocd = 0; -+ int low_needed = pagedir1.size - get_highmem_size(pagedir1); -+ int high_needed = get_highmem_size(pagedir1); -+ int low_pages_for_highmem = 0; -+ gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM; -+ struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL, -+ *low_pbe_page, *last_low_pbe_page = NULL; -+ struct pbe **last_high_pbe_ptr = &restore_highmem_pblist, -+ *this_high_pbe = NULL; -+ unsigned long orig_low_pfn, orig_high_pfn; -+ int high_pbes_done = 0, low_pbes_done = 0; -+ int low_direct = 0, high_direct = 0, result = 0, i; -+ int high_page = 1, high_offset = 0, low_page = 1, low_offset = 0; -+ -+ memory_bm_set_iterators(pageset1_map, 3); -+ memory_bm_position_reset(pageset1_map); -+ -+ memory_bm_set_iterators(pageset1_copy_map, 2); -+ memory_bm_position_reset(pageset1_copy_map); -+ -+ last_low_pbe_ptr = &restore_pblist; -+ -+ /* First, allocate pages for the start of our pbe lists. */ -+ if (high_needed) { -+ high_pbe_page = ___toi_get_nonconflicting_page(1); -+ if (!high_pbe_page) { -+ result = -ENOMEM; -+ goto out; -+ } -+ this_high_pbe = (struct pbe *) kmap(high_pbe_page); -+ memset(this_high_pbe, 0, PAGE_SIZE); -+ } -+ -+ low_pbe_page = ___toi_get_nonconflicting_page(0); -+ if (!low_pbe_page) { -+ result = -ENOMEM; -+ goto out; -+ } -+ this_low_pbe = (struct pbe *) page_address(low_pbe_page); -+ -+ /* -+ * Next, allocate the number of pages we need. -+ */ -+ -+ i = low_needed + high_needed; -+ -+ do { -+ int is_high; -+ -+ if (i == low_needed) -+ flags &= ~__GFP_HIGHMEM; -+ -+ page = toi_alloc_page(30, flags); -+ BUG_ON(!page); -+ -+ SetPagePageset1Copy(page); -+ is_high = PageHighMem(page); -+ -+ if (PagePageset1(page)) { -+ if (is_high) -+ high_direct++; -+ else -+ low_direct++; -+ } else { -+ if (is_high) -+ highallocd++; -+ else -+ lowallocd++; -+ } -+ } while (--i); -+ -+ high_needed -= high_direct; -+ low_needed -= low_direct; -+ -+ /* -+ * Do we need to use some lowmem pages for the copies of highmem -+ * pages? -+ */ -+ if (high_needed > highallocd) { -+ low_pages_for_highmem = high_needed - highallocd; -+ high_needed -= low_pages_for_highmem; -+ low_needed += low_pages_for_highmem; -+ } -+ -+ /* -+ * Now generate our pbes (which will be used for the atomic restore), -+ * and free unneeded pages. -+ */ -+ memory_bm_position_reset(pageset1_copy_map); -+ for (pfn = memory_bm_next_pfn_index(pageset1_copy_map, 1); pfn != BM_END_OF_MAP; -+ pfn = memory_bm_next_pfn_index(pageset1_copy_map, 1)) { -+ int is_high; -+ page = pfn_to_page(pfn); -+ is_high = PageHighMem(page); -+ -+ if (PagePageset1(page)) -+ continue; -+ -+ /* Nope. We're going to use this page. Add a pbe. */ -+ if (is_high || low_pages_for_highmem) { -+ struct page *orig_page; -+ high_pbes_done++; -+ if (!is_high) -+ low_pages_for_highmem--; -+ do { -+ orig_high_pfn = memory_bm_next_pfn_index(pageset1_map, 1); -+ BUG_ON(orig_high_pfn == BM_END_OF_MAP); -+ orig_page = pfn_to_page(orig_high_pfn); -+ } while (!PageHighMem(orig_page) || -+ PagePageset1Copy(orig_page)); -+ -+ this_high_pbe->orig_address = (void *) orig_high_pfn; -+ this_high_pbe->address = page; -+ this_high_pbe->next = NULL; -+ toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "High pbe %d/%d: %p(%d)=>%p", -+ high_page, high_offset, page, orig_high_pfn, orig_page); -+ if (last_high_pbe_page != high_pbe_page) { -+ *last_high_pbe_ptr = -+ (struct pbe *) high_pbe_page; -+ if (last_high_pbe_page) { -+ kunmap(last_high_pbe_page); -+ high_page++; -+ high_offset = 0; -+ } else -+ high_offset++; -+ last_high_pbe_page = high_pbe_page; -+ } else { -+ *last_high_pbe_ptr = this_high_pbe; -+ high_offset++; -+ } -+ last_high_pbe_ptr = &this_high_pbe->next; -+ this_high_pbe = get_next_pbe(&high_pbe_page, -+ this_high_pbe, 1); -+ if (IS_ERR(this_high_pbe)) { -+ printk(KERN_INFO -+ "This high pbe is an error.\n"); -+ return -ENOMEM; -+ } -+ } else { -+ struct page *orig_page; -+ low_pbes_done++; -+ do { -+ orig_low_pfn = memory_bm_next_pfn_index(pageset1_map, 2); -+ BUG_ON(orig_low_pfn == BM_END_OF_MAP); -+ orig_page = pfn_to_page(orig_low_pfn); -+ } while (PageHighMem(orig_page) || -+ PagePageset1Copy(orig_page)); -+ -+ this_low_pbe->orig_address = page_address(orig_page); -+ this_low_pbe->address = page_address(page); -+ this_low_pbe->next = NULL; -+ toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "Low pbe %d/%d: %p(%d)=>%p", -+ low_page, low_offset, this_low_pbe->orig_address, -+ orig_low_pfn, this_low_pbe->address); -+ *last_low_pbe_ptr = this_low_pbe; -+ last_low_pbe_ptr = &this_low_pbe->next; -+ this_low_pbe = get_next_pbe(&low_pbe_page, -+ this_low_pbe, 0); -+ if (low_pbe_page != last_low_pbe_page) { -+ if (last_low_pbe_page) { -+ low_page++; -+ low_offset = 0; -+ } -+ last_low_pbe_page = low_pbe_page; -+ } else -+ low_offset++; -+ if (IS_ERR(this_low_pbe)) { -+ printk(KERN_INFO "this_low_pbe is an error.\n"); -+ return -ENOMEM; -+ } -+ } -+ } -+ -+ if (high_pbe_page) -+ kunmap(high_pbe_page); -+ -+ if (last_high_pbe_page != high_pbe_page) { -+ if (last_high_pbe_page) -+ kunmap(last_high_pbe_page); -+ toi__free_page(29, high_pbe_page); -+ } -+ -+ free_conflicting_pages(); -+ -+out: -+ memory_bm_set_iterators(pageset1_map, 1); -+ memory_bm_set_iterators(pageset1_copy_map, 1); -+ return result; -+} -+ -+int add_boot_kernel_data_pbe(void) -+{ -+ this_low_pbe->address = (char *) __toi_get_nonconflicting_page(); -+ if (!this_low_pbe->address) { -+ printk(KERN_INFO "Failed to get bkd atomic restore buffer."); -+ return -ENOMEM; -+ } -+ -+ toi_bkd.size = sizeof(toi_bkd); -+ memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd)); -+ -+ *last_low_pbe_ptr = this_low_pbe; -+ this_low_pbe->orig_address = (char *) boot_kernel_data_buffer; -+ this_low_pbe->next = NULL; -+ return 0; -+} -diff --git a/kernel/power/tuxonice_pagedir.h b/kernel/power/tuxonice_pagedir.h -new file mode 100644 -index 0000000..0c7321e ---- /dev/null -+++ b/kernel/power/tuxonice_pagedir.h -@@ -0,0 +1,50 @@ -+/* -+ * kernel/power/tuxonice_pagedir.h -+ * -+ * Copyright (C) 2006-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Declarations for routines for handling pagesets. -+ */ -+ -+#ifndef KERNEL_POWER_PAGEDIR_H -+#define KERNEL_POWER_PAGEDIR_H -+ -+/* Pagedir -+ * -+ * Contains the metadata for a set of pages saved in the image. -+ */ -+ -+struct pagedir { -+ int id; -+ unsigned long size; -+#ifdef CONFIG_HIGHMEM -+ unsigned long size_high; -+#endif -+}; -+ -+#ifdef CONFIG_HIGHMEM -+#define get_highmem_size(pagedir) (pagedir.size_high) -+#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0) -+#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0) -+#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high) -+#else -+#define get_highmem_size(pagedir) (0) -+#define set_highmem_size(pagedir, sz) do { } while (0) -+#define inc_highmem_size(pagedir) do { } while (0) -+#define get_lowmem_size(pagedir) (pagedir.size) -+#endif -+ -+extern struct pagedir pagedir1, pagedir2; -+ -+extern void toi_copy_pageset1(void); -+ -+extern int toi_get_pageset1_load_addresses(void); -+ -+extern unsigned long __toi_get_nonconflicting_page(void); -+struct page *___toi_get_nonconflicting_page(int can_be_highmem); -+ -+extern void toi_reset_alt_image_pageset2_pfn(void); -+extern int add_boot_kernel_data_pbe(void); -+#endif -diff --git a/kernel/power/tuxonice_pageflags.c b/kernel/power/tuxonice_pageflags.c -new file mode 100644 -index 0000000..a3780e5 ---- /dev/null -+++ b/kernel/power/tuxonice_pageflags.c -@@ -0,0 +1,29 @@ -+/* -+ * kernel/power/tuxonice_pageflags.c -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Routines for serialising and relocating pageflags in which we -+ * store our image metadata. -+ */ -+ -+#include -+#include -+#include "tuxonice_pageflags.h" -+#include "power.h" -+ -+int toi_pageflags_space_needed(void) -+{ -+ int total = 0; -+ struct bm_block *bb; -+ -+ total = sizeof(unsigned int); -+ -+ list_for_each_entry(bb, &pageset1_map->blocks, hook) -+ total += 2 * sizeof(unsigned long) + PAGE_SIZE; -+ -+ return total; -+} -+EXPORT_SYMBOL_GPL(toi_pageflags_space_needed); -diff --git a/kernel/power/tuxonice_pageflags.h b/kernel/power/tuxonice_pageflags.h -new file mode 100644 -index 0000000..3d6d471 ---- /dev/null -+++ b/kernel/power/tuxonice_pageflags.h -@@ -0,0 +1,80 @@ -+/* -+ * kernel/power/tuxonice_pageflags.h -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ */ -+ -+#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H -+#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H -+ -+extern struct memory_bitmap *pageset1_map; -+extern struct memory_bitmap *pageset1_copy_map; -+extern struct memory_bitmap *pageset2_map; -+extern struct memory_bitmap *page_resave_map; -+extern struct memory_bitmap *io_map; -+extern struct memory_bitmap *nosave_map; -+extern struct memory_bitmap *free_map; -+extern struct memory_bitmap *compare_map; -+ -+#define PagePageset1(page) \ -+ (memory_bm_test_bit(pageset1_map, page_to_pfn(page))) -+#define SetPagePageset1(page) \ -+ (memory_bm_set_bit(pageset1_map, page_to_pfn(page))) -+#define ClearPagePageset1(page) \ -+ (memory_bm_clear_bit(pageset1_map, page_to_pfn(page))) -+ -+#define PagePageset1Copy(page) \ -+ (memory_bm_test_bit(pageset1_copy_map, page_to_pfn(page))) -+#define SetPagePageset1Copy(page) \ -+ (memory_bm_set_bit(pageset1_copy_map, page_to_pfn(page))) -+#define ClearPagePageset1Copy(page) \ -+ (memory_bm_clear_bit(pageset1_copy_map, page_to_pfn(page))) -+ -+#define PagePageset2(page) \ -+ (memory_bm_test_bit(pageset2_map, page_to_pfn(page))) -+#define SetPagePageset2(page) \ -+ (memory_bm_set_bit(pageset2_map, page_to_pfn(page))) -+#define ClearPagePageset2(page) \ -+ (memory_bm_clear_bit(pageset2_map, page_to_pfn(page))) -+ -+#define PageWasRW(page) \ -+ (memory_bm_test_bit(pageset2_map, page_to_pfn(page))) -+#define SetPageWasRW(page) \ -+ (memory_bm_set_bit(pageset2_map, page_to_pfn(page))) -+#define ClearPageWasRW(page) \ -+ (memory_bm_clear_bit(pageset2_map, page_to_pfn(page))) -+ -+#define PageResave(page) (page_resave_map ? \ -+ memory_bm_test_bit(page_resave_map, page_to_pfn(page)) : 0) -+#define SetPageResave(page) \ -+ (memory_bm_set_bit(page_resave_map, page_to_pfn(page))) -+#define ClearPageResave(page) \ -+ (memory_bm_clear_bit(page_resave_map, page_to_pfn(page))) -+ -+#define PageNosave(page) (nosave_map ? \ -+ memory_bm_test_bit(nosave_map, page_to_pfn(page)) : 0) -+#define SetPageNosave(page) \ -+ (memory_bm_set_bit(nosave_map, page_to_pfn(page))) -+#define ClearPageNosave(page) \ -+ (memory_bm_clear_bit(nosave_map, page_to_pfn(page))) -+ -+#define PageNosaveFree(page) (free_map ? \ -+ memory_bm_test_bit(free_map, page_to_pfn(page)) : 0) -+#define SetPageNosaveFree(page) \ -+ (memory_bm_set_bit(free_map, page_to_pfn(page))) -+#define ClearPageNosaveFree(page) \ -+ (memory_bm_clear_bit(free_map, page_to_pfn(page))) -+ -+#define PageCompareChanged(page) (compare_map ? \ -+ memory_bm_test_bit(compare_map, page_to_pfn(page)) : 0) -+#define SetPageCompareChanged(page) \ -+ (memory_bm_set_bit(compare_map, page_to_pfn(page))) -+#define ClearPageCompareChanged(page) \ -+ (memory_bm_clear_bit(compare_map, page_to_pfn(page))) -+ -+extern void save_pageflags(struct memory_bitmap *pagemap); -+extern int load_pageflags(struct memory_bitmap *pagemap); -+extern int toi_pageflags_space_needed(void); -+#endif -diff --git a/kernel/power/tuxonice_power_off.c b/kernel/power/tuxonice_power_off.c -new file mode 100644 -index 0000000..6ac5ea71 ---- /dev/null -+++ b/kernel/power/tuxonice_power_off.c -@@ -0,0 +1,287 @@ -+/* -+ * kernel/power/tuxonice_power_off.c -+ * -+ * Copyright (C) 2006-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Support for powering down. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "tuxonice.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_power_off.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_io.h" -+ -+unsigned long toi_poweroff_method; /* 0 - Kernel power off */ -+EXPORT_SYMBOL_GPL(toi_poweroff_method); -+ -+static int wake_delay; -+static char lid_state_file[256], wake_alarm_dir[256]; -+static struct file *lid_file, *alarm_file, *epoch_file; -+static int post_wake_state = -1; -+ -+static int did_suspend_to_both; -+ -+/* -+ * __toi_power_down -+ * Functionality : Powers down or reboots the computer once the image -+ * has been written to disk. -+ * Key Assumptions : Able to reboot/power down via code called or that -+ * the warning emitted if the calls fail will be visible -+ * to the user (ie printk resumes devices). -+ */ -+ -+static void __toi_power_down(int method) -+{ -+ int error; -+ -+ toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." : -+ "Powering down."); -+ -+ if (test_result_state(TOI_ABORTED)) -+ goto out; -+ -+ if (test_action_state(TOI_REBOOT)) -+ kernel_restart(NULL); -+ -+ switch (method) { -+ case 0: -+ break; -+ case 3: -+ /* -+ * Re-read the overwritten part of pageset2 to make post-resume -+ * faster. -+ */ -+ if (read_pageset2(1)) -+ panic("Attempt to reload pagedir 2 failed. " -+ "Try rebooting."); -+ -+ pm_prepare_console(); -+ -+ error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); -+ if (!error) { -+ pm_restore_gfp_mask(); -+ error = suspend_devices_and_enter(PM_SUSPEND_MEM); -+ pm_restrict_gfp_mask(); -+ if (!error) -+ did_suspend_to_both = 1; -+ } -+ pm_notifier_call_chain(PM_POST_SUSPEND); -+ pm_restore_console(); -+ -+ /* Success - we're now post-resume-from-ram */ -+ if (did_suspend_to_both) -+ return; -+ -+ /* Failed to suspend to ram - do normal power off */ -+ break; -+ case 4: -+ /* -+ * If succeeds, doesn't return. If fails, do a simple -+ * powerdown. -+ */ -+ hibernation_platform_enter(); -+ break; -+ case 5: -+ /* Historic entry only now */ -+ break; -+ } -+ -+ if (method && method != 5) -+ toi_cond_pause(1, -+ "Falling back to alternate power off method."); -+ -+ if (test_result_state(TOI_ABORTED)) -+ goto out; -+ -+ kernel_power_off(); -+ kernel_halt(); -+ toi_cond_pause(1, "Powerdown failed."); -+ while (1) -+ cpu_relax(); -+ -+out: -+ if (read_pageset2(1)) -+ panic("Attempt to reload pagedir 2 failed. Try rebooting."); -+ return; -+} -+ -+#define CLOSE_FILE(file) \ -+ if (file) { \ -+ filp_close(file, NULL); file = NULL; \ -+ } -+ -+static void powerdown_cleanup(int toi_or_resume) -+{ -+ if (!toi_or_resume) -+ return; -+ -+ CLOSE_FILE(lid_file); -+ CLOSE_FILE(alarm_file); -+ CLOSE_FILE(epoch_file); -+} -+ -+static void open_file(char *format, char *arg, struct file **var, int mode, -+ char *desc) -+{ -+ char buf[256]; -+ -+ if (strlen(arg)) { -+ sprintf(buf, format, arg); -+ *var = filp_open(buf, mode, 0); -+ if (IS_ERR(*var) || !*var) { -+ printk(KERN_INFO "Failed to open %s file '%s' (%p).\n", -+ desc, buf, *var); -+ *var = NULL; -+ } -+ } -+} -+ -+static int powerdown_init(int toi_or_resume) -+{ -+ if (!toi_or_resume) -+ return 0; -+ -+ did_suspend_to_both = 0; -+ -+ open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file, -+ O_RDONLY, "lid"); -+ -+ if (strlen(wake_alarm_dir)) { -+ open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir, -+ &alarm_file, O_WRONLY, "alarm"); -+ -+ open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir, -+ &epoch_file, O_RDONLY, "epoch"); -+ } -+ -+ return 0; -+} -+ -+static int lid_closed(void) -+{ -+ char array[25]; -+ ssize_t size; -+ loff_t pos = 0; -+ -+ if (!lid_file) -+ return 0; -+ -+ size = vfs_read(lid_file, (char __user *) array, 25, &pos); -+ if ((int) size < 1) { -+ printk(KERN_INFO "Failed to read lid state file (%d).\n", -+ (int) size); -+ return 0; -+ } -+ -+ if (!strcmp(array, "state: closed\n")) -+ return 1; -+ -+ return 0; -+} -+ -+static void write_alarm_file(int value) -+{ -+ ssize_t size; -+ char buf[40]; -+ loff_t pos = 0; -+ -+ if (!alarm_file) -+ return; -+ -+ sprintf(buf, "%d\n", value); -+ -+ size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos); -+ -+ if (size < 0) -+ printk(KERN_INFO "Error %d writing alarm value %s.\n", -+ (int) size, buf); -+} -+ -+/** -+ * toi_check_resleep: See whether to powerdown again after waking. -+ * -+ * After waking, check whether we should powerdown again in a (usually -+ * different) way. We only do this if the lid switch is still closed. -+ */ -+void toi_check_resleep(void) -+{ -+ /* We only return if we suspended to ram and woke. */ -+ if (lid_closed() && post_wake_state >= 0) -+ __toi_power_down(post_wake_state); -+} -+ -+void toi_power_down(void) -+{ -+ if (alarm_file && wake_delay) { -+ char array[25]; -+ loff_t pos = 0; -+ size_t size = vfs_read(epoch_file, (char __user *) array, 25, -+ &pos); -+ -+ if (((int) size) < 1) -+ printk(KERN_INFO "Failed to read epoch file (%d).\n", -+ (int) size); -+ else { -+ unsigned long since_epoch; -+ if (!strict_strtoul(array, 0, &since_epoch)) { -+ /* Clear any wakeup time. */ -+ write_alarm_file(0); -+ -+ /* Set new wakeup time. */ -+ write_alarm_file(since_epoch + wake_delay); -+ } -+ } -+ } -+ -+ __toi_power_down(toi_poweroff_method); -+ -+ toi_check_resleep(); -+} -+EXPORT_SYMBOL_GPL(toi_power_down); -+ -+static struct toi_sysfs_data sysfs_params[] = { -+#if defined(CONFIG_ACPI) -+ SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL), -+ SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL), -+ SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL), -+ SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0, -+ NULL), -+ SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0), -+ SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both, -+ 0, 0, 0, NULL) -+#endif -+}; -+ -+static struct toi_module_ops powerdown_ops = { -+ .type = MISC_HIDDEN_MODULE, -+ .name = "poweroff", -+ .initialise = powerdown_init, -+ .cleanup = powerdown_cleanup, -+ .directory = "[ROOT]", -+ .module = THIS_MODULE, -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+int toi_poweroff_init(void) -+{ -+ return toi_register_module(&powerdown_ops); -+} -+ -+void toi_poweroff_exit(void) -+{ -+ toi_unregister_module(&powerdown_ops); -+} -diff --git a/kernel/power/tuxonice_power_off.h b/kernel/power/tuxonice_power_off.h -new file mode 100644 -index 0000000..804293d ---- /dev/null -+++ b/kernel/power/tuxonice_power_off.h -@@ -0,0 +1,24 @@ -+/* -+ * kernel/power/tuxonice_power_off.h -+ * -+ * Copyright (C) 2006-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Support for the powering down. -+ */ -+ -+int toi_pm_state_finish(void); -+void toi_power_down(void); -+extern unsigned long toi_poweroff_method; -+int toi_poweroff_init(void); -+void toi_poweroff_exit(void); -+void toi_check_resleep(void); -+ -+extern int platform_begin(int platform_mode); -+extern int platform_pre_snapshot(int platform_mode); -+extern void platform_leave(int platform_mode); -+extern void platform_end(int platform_mode); -+extern void platform_finish(int platform_mode); -+extern int platform_pre_restore(int platform_mode); -+extern void platform_restore_cleanup(int platform_mode); -diff --git a/kernel/power/tuxonice_prepare_image.c b/kernel/power/tuxonice_prepare_image.c -new file mode 100644 -index 0000000..64c71c0 ---- /dev/null -+++ b/kernel/power/tuxonice_prepare_image.c -@@ -0,0 +1,1118 @@ -+/* -+ * kernel/power/tuxonice_prepare_image.c -+ * -+ * Copyright (C) 2003-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * We need to eat memory until we can: -+ * 1. Perform the save without changing anything (RAM_NEEDED < #pages) -+ * 2. Fit it all in available space (toiActiveAllocator->available_space() >= -+ * main_storage_needed()) -+ * 3. Reload the pagedir and pageset1 to places that don't collide with their -+ * final destinations, not knowing to what extent the resumed kernel will -+ * overlap with the one loaded at boot time. I think the resumed kernel -+ * should overlap completely, but I don't want to rely on this as it is -+ * an unproven assumption. We therefore assume there will be no overlap at -+ * all (worse case). -+ * 4. Meet the user's requested limit (if any) on the size of the image. -+ * The limit is in MB, so pages/256 (assuming 4K pages). -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "tuxonice_pageflags.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_io.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_prepare_image.h" -+#include "tuxonice.h" -+#include "tuxonice_extent.h" -+#include "tuxonice_checksum.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_atomic_copy.h" -+#include "tuxonice_builtin.h" -+ -+static unsigned long num_nosave, main_storage_allocated, storage_limit, -+ header_storage_needed; -+unsigned long extra_pd1_pages_allowance = -+ CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE; -+long image_size_limit = CONFIG_TOI_DEFAULT_IMAGE_SIZE_LIMIT; -+static int no_ps2_needed; -+ -+struct attention_list { -+ struct task_struct *task; -+ struct attention_list *next; -+}; -+ -+static struct attention_list *attention_list; -+ -+#define PAGESET1 0 -+#define PAGESET2 1 -+ -+void free_attention_list(void) -+{ -+ struct attention_list *last = NULL; -+ -+ while (attention_list) { -+ last = attention_list; -+ attention_list = attention_list->next; -+ toi_kfree(6, last, sizeof(*last)); -+ } -+} -+ -+static int build_attention_list(void) -+{ -+ int i, task_count = 0; -+ struct task_struct *p; -+ struct attention_list *next; -+ -+ /* -+ * Count all userspace process (with task->mm) marked PF_NOFREEZE. -+ */ -+ toi_read_lock_tasklist(); -+ for_each_process(p) -+ if ((p->flags & PF_NOFREEZE) || p == current) -+ task_count++; -+ toi_read_unlock_tasklist(); -+ -+ /* -+ * Allocate attention list structs. -+ */ -+ for (i = 0; i < task_count; i++) { -+ struct attention_list *this = -+ toi_kzalloc(6, sizeof(struct attention_list), -+ TOI_WAIT_GFP); -+ if (!this) { -+ printk(KERN_INFO "Failed to allocate slab for " -+ "attention list.\n"); -+ free_attention_list(); -+ return 1; -+ } -+ this->next = NULL; -+ if (attention_list) -+ this->next = attention_list; -+ attention_list = this; -+ } -+ -+ next = attention_list; -+ toi_read_lock_tasklist(); -+ for_each_process(p) -+ if ((p->flags & PF_NOFREEZE) || p == current) { -+ next->task = p; -+ next = next->next; -+ } -+ toi_read_unlock_tasklist(); -+ return 0; -+} -+ -+static void pageset2_full(void) -+{ -+ struct zone *zone; -+ struct page *page; -+ unsigned long flags; -+ int i; -+ -+ for_each_populated_zone(zone) { -+ spin_lock_irqsave(&zone->lru_lock, flags); -+ for_each_lru(i) { -+ if (!zone_page_state(zone, NR_LRU_BASE + i)) -+ continue; -+ -+ list_for_each_entry(page, &zone->lruvec.lists[i], lru) { -+ struct address_space *mapping; -+ -+ mapping = page_mapping(page); -+ if (!mapping || !mapping->host || -+ !(mapping->host->i_flags & S_ATOMIC_COPY)) -+ SetPagePageset2(page); -+ } -+ } -+ spin_unlock_irqrestore(&zone->lru_lock, flags); -+ } -+} -+ -+/* -+ * toi_mark_task_as_pageset -+ * Functionality : Marks all the saveable pages belonging to a given process -+ * as belonging to a particular pageset. -+ */ -+ -+static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2) -+{ -+ struct vm_area_struct *vma; -+ struct mm_struct *mm; -+ -+ mm = t->active_mm; -+ -+ if (!mm || !mm->mmap) -+ return; -+ -+ if (!irqs_disabled()) -+ down_read(&mm->mmap_sem); -+ -+ for (vma = mm->mmap; vma; vma = vma->vm_next) { -+ unsigned long posn; -+ -+ if (!vma->vm_start || -+ vma->vm_flags & (VM_IO | VM_DONTDUMP | VM_PFNMAP)) -+ continue; -+ -+ for (posn = vma->vm_start; posn < vma->vm_end; -+ posn += PAGE_SIZE) { -+ struct page *page = follow_page(vma, posn, 0); -+ struct address_space *mapping; -+ -+ if (!page || !pfn_valid(page_to_pfn(page))) -+ continue; -+ -+ mapping = page_mapping(page); -+ if (mapping && mapping->host && -+ mapping->host->i_flags & S_ATOMIC_COPY) -+ continue; -+ -+ if (pageset2) -+ SetPagePageset2(page); -+ else { -+ ClearPagePageset2(page); -+ SetPagePageset1(page); -+ } -+ } -+ } -+ -+ if (!irqs_disabled()) -+ up_read(&mm->mmap_sem); -+} -+ -+static void mark_tasks(int pageset) -+{ -+ struct task_struct *p; -+ -+ toi_read_lock_tasklist(); -+ for_each_process(p) { -+ if (!p->mm) -+ continue; -+ -+ if (p->flags & PF_KTHREAD) -+ continue; -+ -+ toi_mark_task_as_pageset(p, pageset); -+ } -+ toi_read_unlock_tasklist(); -+ -+} -+ -+/* mark_pages_for_pageset2 -+ * -+ * Description: Mark unshared pages in processes not needed for hibernate as -+ * being able to be written out in a separate pagedir. -+ * HighMem pages are simply marked as pageset2. They won't be -+ * needed during hibernate. -+ */ -+ -+static void toi_mark_pages_for_pageset2(void) -+{ -+ struct attention_list *this = attention_list; -+ -+ memory_bm_clear(pageset2_map); -+ -+ if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed) -+ return; -+ -+ if (test_action_state(TOI_PAGESET2_FULL)) -+ pageset2_full(); -+ else -+ mark_tasks(PAGESET2); -+ -+ /* -+ * Because the tasks in attention_list are ones related to hibernating, -+ * we know that they won't go away under us. -+ */ -+ -+ while (this) { -+ if (!test_result_state(TOI_ABORTED)) -+ toi_mark_task_as_pageset(this->task, PAGESET1); -+ this = this->next; -+ } -+} -+ -+/* -+ * The atomic copy of pageset1 is stored in pageset2 pages. -+ * But if pageset1 is larger (normally only just after boot), -+ * we need to allocate extra pages to store the atomic copy. -+ * The following data struct and functions are used to handle -+ * the allocation and freeing of that memory. -+ */ -+ -+static unsigned long extra_pages_allocated; -+ -+struct extras { -+ struct page *page; -+ int order; -+ struct extras *next; -+}; -+ -+static struct extras *extras_list; -+ -+/* toi_free_extra_pagedir_memory -+ * -+ * Description: Free previously allocated extra pagedir memory. -+ */ -+void toi_free_extra_pagedir_memory(void) -+{ -+ /* Free allocated pages */ -+ while (extras_list) { -+ struct extras *this = extras_list; -+ int i; -+ -+ extras_list = this->next; -+ -+ for (i = 0; i < (1 << this->order); i++) -+ ClearPageNosave(this->page + i); -+ -+ toi_free_pages(9, this->page, this->order); -+ toi_kfree(7, this, sizeof(*this)); -+ } -+ -+ extra_pages_allocated = 0; -+} -+ -+/* toi_allocate_extra_pagedir_memory -+ * -+ * Description: Allocate memory for making the atomic copy of pagedir1 in the -+ * case where it is bigger than pagedir2. -+ * Arguments: int num_to_alloc: Number of extra pages needed. -+ * Result: int. Number of extra pages we now have allocated. -+ */ -+static int toi_allocate_extra_pagedir_memory(int extra_pages_needed) -+{ -+ int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated; -+ gfp_t flags = TOI_ATOMIC_GFP; -+ -+ if (num_to_alloc < 1) -+ return 0; -+ -+ order = fls(num_to_alloc); -+ if (order >= MAX_ORDER) -+ order = MAX_ORDER - 1; -+ -+ while (num_to_alloc) { -+ struct page *newpage; -+ unsigned long virt; -+ struct extras *extras_entry; -+ -+ while ((1 << order) > num_to_alloc) -+ order--; -+ -+ extras_entry = (struct extras *) toi_kzalloc(7, -+ sizeof(struct extras), TOI_ATOMIC_GFP); -+ -+ if (!extras_entry) -+ return extra_pages_allocated; -+ -+ virt = toi_get_free_pages(9, flags, order); -+ while (!virt && order) { -+ order--; -+ virt = toi_get_free_pages(9, flags, order); -+ } -+ -+ if (!virt) { -+ toi_kfree(7, extras_entry, sizeof(*extras_entry)); -+ return extra_pages_allocated; -+ } -+ -+ newpage = virt_to_page(virt); -+ -+ extras_entry->page = newpage; -+ extras_entry->order = order; -+ extras_entry->next = extras_list; -+ -+ extras_list = extras_entry; -+ -+ for (j = 0; j < (1 << order); j++) { -+ SetPageNosave(newpage + j); -+ SetPagePageset1Copy(newpage + j); -+ } -+ -+ extra_pages_allocated += (1 << order); -+ num_to_alloc -= (1 << order); -+ } -+ -+ return extra_pages_allocated; -+} -+ -+/* -+ * real_nr_free_pages: Count pcp pages for a zone type or all zones -+ * (-1 for all, otherwise zone_idx() result desired). -+ */ -+unsigned long real_nr_free_pages(unsigned long zone_idx_mask) -+{ -+ struct zone *zone; -+ int result = 0, cpu; -+ -+ /* PCP lists */ -+ for_each_populated_zone(zone) { -+ if (!(zone_idx_mask & (1 << zone_idx(zone)))) -+ continue; -+ -+ for_each_online_cpu(cpu) { -+ struct per_cpu_pageset *pset = -+ per_cpu_ptr(zone->pageset, cpu); -+ struct per_cpu_pages *pcp = &pset->pcp; -+ result += pcp->count; -+ } -+ -+ result += zone_page_state(zone, NR_FREE_PAGES); -+ } -+ return result; -+} -+EXPORT_SYMBOL_GPL(real_nr_free_pages); -+ -+/* -+ * Discover how much extra memory will be required by the drivers -+ * when they're asked to hibernate. We can then ensure that amount -+ * of memory is available when we really want it. -+ */ -+static void get_extra_pd1_allowance(void) -+{ -+ unsigned long orig_num_free = real_nr_free_pages(all_zones_mask), final; -+ -+ toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers."); -+ -+ if (toi_go_atomic(PMSG_FREEZE, 1)) -+ return; -+ -+ final = real_nr_free_pages(all_zones_mask); -+ toi_end_atomic(ATOMIC_ALL_STEPS, 1, 0); -+ -+ extra_pd1_pages_allowance = (orig_num_free > final) ? -+ orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE : -+ MIN_EXTRA_PAGES_ALLOWANCE; -+} -+ -+/* -+ * Amount of storage needed, possibly taking into account the -+ * expected compression ratio and possibly also ignoring our -+ * allowance for extra pages. -+ */ -+static unsigned long main_storage_needed(int use_ecr, -+ int ignore_extra_pd1_allow) -+{ -+ return (pagedir1.size + pagedir2.size + -+ (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) * -+ (use_ecr ? toi_expected_compression_ratio() : 100) / 100; -+} -+ -+/* -+ * Storage needed for the image header, in bytes until the return. -+ */ -+unsigned long get_header_storage_needed(void) -+{ -+ unsigned long bytes = sizeof(struct toi_header) + -+ toi_header_storage_for_modules() + -+ toi_pageflags_space_needed() + -+ fs_info_space_needed(); -+ -+ return DIV_ROUND_UP(bytes, PAGE_SIZE); -+} -+EXPORT_SYMBOL_GPL(get_header_storage_needed); -+ -+/* -+ * When freeing memory, pages from either pageset might be freed. -+ * -+ * When seeking to free memory to be able to hibernate, for every ps1 page -+ * freed, we need 2 less pages for the atomic copy because there is one less -+ * page to copy and one more page into which data can be copied. -+ * -+ * Freeing ps2 pages saves us nothing directly. No more memory is available -+ * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but -+ * that's too much work to figure out. -+ * -+ * => ps1_to_free functions -+ * -+ * Of course if we just want to reduce the image size, because of storage -+ * limitations or an image size limit either ps will do. -+ * -+ * => any_to_free function -+ */ -+ -+static unsigned long lowpages_usable_for_highmem_copy(void) -+{ -+ unsigned long needed = get_lowmem_size(pagedir1) + -+ extra_pd1_pages_allowance + MIN_FREE_RAM + -+ toi_memory_for_modules(0), -+ available = get_lowmem_size(pagedir2) + -+ real_nr_free_low_pages() + extra_pages_allocated; -+ -+ return available > needed ? available - needed : 0; -+} -+ -+static unsigned long highpages_ps1_to_free(void) -+{ -+ unsigned long need = get_highmem_size(pagedir1), -+ available = get_highmem_size(pagedir2) + -+ real_nr_free_high_pages() + -+ lowpages_usable_for_highmem_copy(); -+ -+ return need > available ? DIV_ROUND_UP(need - available, 2) : 0; -+} -+ -+static unsigned long lowpages_ps1_to_free(void) -+{ -+ unsigned long needed = get_lowmem_size(pagedir1) + -+ extra_pd1_pages_allowance + MIN_FREE_RAM + -+ toi_memory_for_modules(0), -+ available = get_lowmem_size(pagedir2) + -+ real_nr_free_low_pages() + extra_pages_allocated; -+ -+ return needed > available ? DIV_ROUND_UP(needed - available, 2) : 0; -+} -+ -+static unsigned long current_image_size(void) -+{ -+ return pagedir1.size + pagedir2.size + header_storage_needed; -+} -+ -+static unsigned long storage_still_required(void) -+{ -+ unsigned long needed = main_storage_needed(1, 1); -+ return needed > storage_limit ? needed - storage_limit : 0; -+} -+ -+static unsigned long ram_still_required(void) -+{ -+ unsigned long needed = MIN_FREE_RAM + toi_memory_for_modules(0) + -+ 2 * extra_pd1_pages_allowance, -+ available = real_nr_free_low_pages() + extra_pages_allocated; -+ return needed > available ? needed - available : 0; -+} -+ -+unsigned long any_to_free(int use_image_size_limit) -+{ -+ int use_soft_limit = use_image_size_limit && image_size_limit > 0; -+ unsigned long current_size = current_image_size(), -+ soft_limit = use_soft_limit ? (image_size_limit << 8) : 0, -+ to_free = use_soft_limit ? (current_size > soft_limit ? -+ current_size - soft_limit : 0) : 0, -+ storage_limit = storage_still_required(), -+ ram_limit = ram_still_required(), -+ first_max = max(to_free, storage_limit); -+ -+ return max(first_max, ram_limit); -+} -+ -+static int need_pageset2(void) -+{ -+ return (real_nr_free_low_pages() + extra_pages_allocated - -+ 2 * extra_pd1_pages_allowance - MIN_FREE_RAM - -+ toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size; -+} -+ -+/* amount_needed -+ * -+ * Calculates the amount by which the image size needs to be reduced to meet -+ * our constraints. -+ */ -+static unsigned long amount_needed(int use_image_size_limit) -+{ -+ return max(highpages_ps1_to_free() + lowpages_ps1_to_free(), -+ any_to_free(use_image_size_limit)); -+} -+ -+static int image_not_ready(int use_image_size_limit) -+{ -+ toi_message(TOI_EAT_MEMORY, TOI_LOW, 1, -+ "Amount still needed (%lu) > 0:%u," -+ " Storage allocd: %lu < %lu: %u.\n", -+ amount_needed(use_image_size_limit), -+ (amount_needed(use_image_size_limit) > 0), -+ main_storage_allocated, -+ main_storage_needed(1, 1), -+ main_storage_allocated < main_storage_needed(1, 1)); -+ -+ toi_cond_pause(0, NULL); -+ -+ return (amount_needed(use_image_size_limit) > 0) || -+ main_storage_allocated < main_storage_needed(1, 1); -+} -+ -+static void display_failure_reason(int tries_exceeded) -+{ -+ unsigned long storage_required = storage_still_required(), -+ ram_required = ram_still_required(), -+ high_ps1 = highpages_ps1_to_free(), -+ low_ps1 = lowpages_ps1_to_free(); -+ -+ printk(KERN_INFO "Failed to prepare the image because...\n"); -+ -+ if (!storage_limit) { -+ printk(KERN_INFO "- You need some storage available to be " -+ "able to hibernate.\n"); -+ return; -+ } -+ -+ if (tries_exceeded) -+ printk(KERN_INFO "- The maximum number of iterations was " -+ "reached without successfully preparing the " -+ "image.\n"); -+ -+ if (storage_required) { -+ printk(KERN_INFO " - We need at least %lu pages of storage " -+ "(ignoring the header), but only have %lu.\n", -+ main_storage_needed(1, 1), -+ main_storage_allocated); -+ set_abort_result(TOI_INSUFFICIENT_STORAGE); -+ } -+ -+ if (ram_required) { -+ printk(KERN_INFO " - We need %lu more free pages of low " -+ "memory.\n", ram_required); -+ printk(KERN_INFO " Minimum free : %8d\n", MIN_FREE_RAM); -+ printk(KERN_INFO " + Reqd. by modules : %8lu\n", -+ toi_memory_for_modules(0)); -+ printk(KERN_INFO " + 2 * extra allow : %8lu\n", -+ 2 * extra_pd1_pages_allowance); -+ printk(KERN_INFO " - Currently free : %8lu\n", -+ real_nr_free_low_pages()); -+ printk(KERN_INFO " - Pages allocd : %8lu\n", -+ extra_pages_allocated); -+ printk(KERN_INFO " : ========\n"); -+ printk(KERN_INFO " Still needed : %8lu\n", -+ ram_required); -+ -+ /* Print breakdown of memory needed for modules */ -+ toi_memory_for_modules(1); -+ set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); -+ } -+ -+ if (high_ps1) { -+ printk(KERN_INFO "- We need to free %lu highmem pageset 1 " -+ "pages.\n", high_ps1); -+ set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); -+ } -+ -+ if (low_ps1) { -+ printk(KERN_INFO " - We need to free %ld lowmem pageset 1 " -+ "pages.\n", low_ps1); -+ set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY); -+ } -+} -+ -+static void display_stats(int always, int sub_extra_pd1_allow) -+{ -+ char buffer[255]; -+ snprintf(buffer, 254, -+ "Free:%lu(%lu). Sets:%lu(%lu),%lu(%lu). " -+ "Nosave:%lu-%lu=%lu. Storage:%lu/%lu(%lu=>%lu). " -+ "Needed:%lu,%lu,%lu(%u,%lu,%lu,%ld) (PS2:%s)\n", -+ -+ /* Free */ -+ real_nr_free_pages(all_zones_mask), -+ real_nr_free_low_pages(), -+ -+ /* Sets */ -+ pagedir1.size, pagedir1.size - get_highmem_size(pagedir1), -+ pagedir2.size, pagedir2.size - get_highmem_size(pagedir2), -+ -+ /* Nosave */ -+ num_nosave, extra_pages_allocated, -+ num_nosave - extra_pages_allocated, -+ -+ /* Storage */ -+ main_storage_allocated, -+ storage_limit, -+ main_storage_needed(1, sub_extra_pd1_allow), -+ main_storage_needed(1, 1), -+ -+ /* Needed */ -+ lowpages_ps1_to_free(), highpages_ps1_to_free(), -+ any_to_free(1), -+ MIN_FREE_RAM, toi_memory_for_modules(0), -+ extra_pd1_pages_allowance, -+ image_size_limit, -+ -+ need_pageset2() ? "yes" : "no"); -+ -+ if (always) -+ printk("%s", buffer); -+ else -+ toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer); -+} -+ -+/* generate_free_page_map -+ * -+ * Description: This routine generates a bitmap of free pages from the -+ * lists used by the memory manager. We then use the bitmap -+ * to quickly calculate which pages to save and in which -+ * pagesets. -+ */ -+static void generate_free_page_map(void) -+{ -+ int order, cpu, t; -+ unsigned long flags, i; -+ struct zone *zone; -+ struct list_head *curr; -+ unsigned long pfn; -+ struct page *page; -+ -+ for_each_populated_zone(zone) { -+ -+ if (!zone->spanned_pages) -+ continue; -+ -+ spin_lock_irqsave(&zone->lock, flags); -+ -+ for (i = 0; i < zone->spanned_pages; i++) { -+ pfn = zone->zone_start_pfn + i; -+ -+ if (!pfn_valid(pfn)) -+ continue; -+ -+ page = pfn_to_page(pfn); -+ -+ ClearPageNosaveFree(page); -+ } -+ -+ for_each_migratetype_order(order, t) { -+ list_for_each(curr, -+ &zone->free_area[order].free_list[t]) { -+ unsigned long j; -+ -+ pfn = page_to_pfn(list_entry(curr, struct page, -+ lru)); -+ for (j = 0; j < (1UL << order); j++) -+ SetPageNosaveFree(pfn_to_page(pfn + j)); -+ } -+ } -+ -+ for_each_online_cpu(cpu) { -+ struct per_cpu_pageset *pset = -+ per_cpu_ptr(zone->pageset, cpu); -+ struct per_cpu_pages *pcp = &pset->pcp; -+ struct page *page; -+ int t; -+ -+ for (t = 0; t < MIGRATE_PCPTYPES; t++) -+ list_for_each_entry(page, &pcp->lists[t], lru) -+ SetPageNosaveFree(page); -+ } -+ -+ spin_unlock_irqrestore(&zone->lock, flags); -+ } -+} -+ -+/* size_of_free_region -+ * -+ * Description: Return the number of pages that are free, beginning with and -+ * including this one. -+ */ -+static int size_of_free_region(struct zone *zone, unsigned long start_pfn) -+{ -+ unsigned long this_pfn = start_pfn, -+ end_pfn = zone_end_pfn(zone); -+ -+ while (pfn_valid(this_pfn) && this_pfn < end_pfn && PageNosaveFree(pfn_to_page(this_pfn))) -+ this_pfn++; -+ -+ return this_pfn - start_pfn; -+} -+ -+/* flag_image_pages -+ * -+ * This routine generates our lists of pages to be stored in each -+ * pageset. Since we store the data using extents, and adding new -+ * extents might allocate a new extent page, this routine may well -+ * be called more than once. -+ */ -+static void flag_image_pages(int atomic_copy) -+{ -+ int num_free = 0; -+ unsigned long loop; -+ struct zone *zone; -+ -+ pagedir1.size = 0; -+ pagedir2.size = 0; -+ -+ set_highmem_size(pagedir1, 0); -+ set_highmem_size(pagedir2, 0); -+ -+ num_nosave = 0; -+ -+ memory_bm_clear(pageset1_map); -+ -+ generate_free_page_map(); -+ -+ /* -+ * Pages not to be saved are marked Nosave irrespective of being -+ * reserved. -+ */ -+ for_each_populated_zone(zone) { -+ int highmem = is_highmem(zone); -+ -+ for (loop = 0; loop < zone->spanned_pages; loop++) { -+ unsigned long pfn = zone->zone_start_pfn + loop; -+ struct page *page; -+ int chunk_size; -+ -+ if (!pfn_valid(pfn)) -+ continue; -+ -+ chunk_size = size_of_free_region(zone, pfn); -+ if (chunk_size) { -+ num_free += chunk_size; -+ loop += chunk_size - 1; -+ continue; -+ } -+ -+ page = pfn_to_page(pfn); -+ -+ if (PageNosave(page)) { -+ num_nosave++; -+ continue; -+ } -+ -+ page = highmem ? saveable_highmem_page(zone, pfn) : -+ saveable_page(zone, pfn); -+ -+ if (!page) { -+ num_nosave++; -+ continue; -+ } -+ -+ if (PagePageset2(page)) { -+ pagedir2.size++; -+ if (PageHighMem(page)) -+ inc_highmem_size(pagedir2); -+ else -+ SetPagePageset1Copy(page); -+ if (PageResave(page)) { -+ SetPagePageset1(page); -+ ClearPagePageset1Copy(page); -+ pagedir1.size++; -+ if (PageHighMem(page)) -+ inc_highmem_size(pagedir1); -+ } -+ } else { -+ pagedir1.size++; -+ SetPagePageset1(page); -+ if (PageHighMem(page)) -+ inc_highmem_size(pagedir1); -+ } -+ } -+ } -+ -+ if (!atomic_copy) -+ toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0, -+ "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)" -+ " + NumFree (%d) = %d.\n", -+ pagedir1.size, pagedir2.size, num_nosave, num_free, -+ pagedir1.size + pagedir2.size + num_nosave + num_free); -+} -+ -+void toi_recalculate_image_contents(int atomic_copy) -+{ -+ memory_bm_clear(pageset1_map); -+ if (!atomic_copy) { -+ unsigned long pfn; -+ memory_bm_position_reset(pageset2_map); -+ for (pfn = memory_bm_next_pfn(pageset2_map); -+ pfn != BM_END_OF_MAP; -+ pfn = memory_bm_next_pfn(pageset2_map)) -+ ClearPagePageset1Copy(pfn_to_page(pfn)); -+ /* Need to call this before getting pageset1_size! */ -+ toi_mark_pages_for_pageset2(); -+ } -+ flag_image_pages(atomic_copy); -+ -+ if (!atomic_copy) { -+ storage_limit = toiActiveAllocator->storage_available(); -+ display_stats(0, 0); -+ } -+} -+ -+int try_allocate_extra_memory(void) -+{ -+ unsigned long wanted = pagedir1.size + extra_pd1_pages_allowance - -+ get_lowmem_size(pagedir2); -+ if (wanted > extra_pages_allocated) { -+ unsigned long got = toi_allocate_extra_pagedir_memory(wanted); -+ if (wanted < got) { -+ toi_message(TOI_EAT_MEMORY, TOI_LOW, 1, -+ "Want %d extra pages for pageset1, got %d.\n", -+ wanted, got); -+ return 1; -+ } -+ } -+ return 0; -+} -+ -+ -+/* update_image -+ * -+ * Allocate [more] memory and storage for the image. -+ */ -+static void update_image(int ps2_recalc) -+{ -+ int old_header_req; -+ unsigned long seek; -+ -+ if (try_allocate_extra_memory()) -+ return; -+ -+ if (ps2_recalc) -+ goto recalc; -+ -+ thaw_kernel_threads(); -+ -+ /* -+ * Allocate remaining storage space, if possible, up to the -+ * maximum we know we'll need. It's okay to allocate the -+ * maximum if the writer is the swapwriter, but -+ * we don't want to grab all available space on an NFS share. -+ * We therefore ignore the expected compression ratio here, -+ * thereby trying to allocate the maximum image size we could -+ * need (assuming compression doesn't expand the image), but -+ * don't complain if we can't get the full amount we're after. -+ */ -+ -+ do { -+ int result; -+ -+ old_header_req = header_storage_needed; -+ toiActiveAllocator->reserve_header_space(header_storage_needed); -+ -+ /* How much storage is free with the reservation applied? */ -+ storage_limit = toiActiveAllocator->storage_available(); -+ seek = min(storage_limit, main_storage_needed(0, 0)); -+ -+ result = toiActiveAllocator->allocate_storage(seek); -+ if (result) -+ printk("Failed to allocate storage (%d).\n", result); -+ -+ main_storage_allocated = -+ toiActiveAllocator->storage_allocated(); -+ -+ /* Need more header because more storage allocated? */ -+ header_storage_needed = get_header_storage_needed(); -+ -+ } while (header_storage_needed > old_header_req); -+ -+ if (freeze_kernel_threads()) -+ set_abort_result(TOI_FREEZING_FAILED); -+ -+recalc: -+ toi_recalculate_image_contents(0); -+} -+ -+/* attempt_to_freeze -+ * -+ * Try to freeze processes. -+ */ -+ -+static int attempt_to_freeze(void) -+{ -+ int result; -+ -+ /* Stop processes before checking again */ -+ toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing " -+ "filesystems."); -+ result = freeze_processes(); -+ -+ if (result) -+ set_abort_result(TOI_FREEZING_FAILED); -+ -+ result = freeze_kernel_threads(); -+ -+ if (result) -+ set_abort_result(TOI_FREEZING_FAILED); -+ -+ return result; -+} -+ -+/* eat_memory -+ * -+ * Try to free some memory, either to meet hard or soft constraints on the image -+ * characteristics. -+ * -+ * Hard constraints: -+ * - Pageset1 must be < half of memory; -+ * - We must have enough memory free at resume time to have pageset1 -+ * be able to be loaded in pages that don't conflict with where it has to -+ * be restored. -+ * Soft constraints -+ * - User specificied image size limit. -+ */ -+static void eat_memory(void) -+{ -+ unsigned long amount_wanted = 0; -+ int did_eat_memory = 0; -+ -+ /* -+ * Note that if we have enough storage space and enough free memory, we -+ * may exit without eating anything. We give up when the last 10 -+ * iterations ate no extra pages because we're not going to get much -+ * more anyway, but the few pages we get will take a lot of time. -+ * -+ * We freeze processes before beginning, and then unfreeze them if we -+ * need to eat memory until we think we have enough. If our attempts -+ * to freeze fail, we give up and abort. -+ */ -+ -+ amount_wanted = amount_needed(1); -+ -+ switch (image_size_limit) { -+ case -1: /* Don't eat any memory */ -+ if (amount_wanted > 0) { -+ set_abort_result(TOI_WOULD_EAT_MEMORY); -+ return; -+ } -+ break; -+ case -2: /* Free caches only */ -+ drop_pagecache(); -+ toi_recalculate_image_contents(0); -+ amount_wanted = amount_needed(1); -+ break; -+ default: -+ break; -+ } -+ -+ if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) && -+ image_size_limit != -1) { -+ unsigned long request = amount_wanted; -+ unsigned long high_req = max(highpages_ps1_to_free(), -+ any_to_free(1)); -+ unsigned long low_req = lowpages_ps1_to_free(); -+ unsigned long got = 0; -+ -+ toi_prepare_status(CLEAR_BAR, -+ "Seeking to free %ldMB of memory.", -+ MB(amount_wanted)); -+ -+ thaw_kernel_threads(); -+ -+ /* -+ * Ask for too many because shrink_memory_mask doesn't -+ * currently return enough most of the time. -+ */ -+ -+ if (low_req) -+ got = shrink_memory_mask(low_req, GFP_KERNEL); -+ if (high_req) -+ shrink_memory_mask(high_req - got, GFP_HIGHUSER); -+ -+ did_eat_memory = 1; -+ -+ toi_recalculate_image_contents(0); -+ -+ amount_wanted = amount_needed(1); -+ -+ printk(KERN_DEBUG "Asked shrink_memory_mask for %ld low pages &" -+ " %ld pages from anywhere, got %ld.\n", -+ high_req, low_req, -+ request - amount_wanted); -+ -+ toi_cond_pause(0, NULL); -+ -+ if (freeze_kernel_threads()) -+ set_abort_result(TOI_FREEZING_FAILED); -+ } -+ -+ if (did_eat_memory) -+ toi_recalculate_image_contents(0); -+} -+ -+/* toi_prepare_image -+ * -+ * Entry point to the whole image preparation section. -+ * -+ * We do four things: -+ * - Freeze processes; -+ * - Ensure image size constraints are met; -+ * - Complete all the preparation for saving the image, -+ * including allocation of storage. The only memory -+ * that should be needed when we're finished is that -+ * for actually storing the image (and we know how -+ * much is needed for that because the modules tell -+ * us). -+ * - Make sure that all dirty buffers are written out. -+ */ -+#define MAX_TRIES 2 -+int toi_prepare_image(void) -+{ -+ int result = 1, tries = 1; -+ -+ main_storage_allocated = 0; -+ no_ps2_needed = 0; -+ -+ if (attempt_to_freeze()) -+ return 1; -+ -+ lock_device_hotplug(); -+ set_toi_state(TOI_DEVICE_HOTPLUG_LOCKED); -+ -+ if (!extra_pd1_pages_allowance) -+ get_extra_pd1_allowance(); -+ -+ storage_limit = toiActiveAllocator->storage_available(); -+ -+ if (!storage_limit) { -+ printk(KERN_INFO "No storage available. Didn't try to prepare " -+ "an image.\n"); -+ display_failure_reason(0); -+ set_abort_result(TOI_NOSTORAGE_AVAILABLE); -+ return 1; -+ } -+ -+ if (build_attention_list()) { -+ abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE, -+ "Unable to successfully prepare the image.\n"); -+ return 1; -+ } -+ -+ toi_recalculate_image_contents(0); -+ -+ do { -+ toi_prepare_status(CLEAR_BAR, -+ "Preparing Image. Try %d.", tries); -+ -+ eat_memory(); -+ -+ if (test_result_state(TOI_ABORTED)) -+ break; -+ -+ update_image(0); -+ -+ tries++; -+ -+ } while (image_not_ready(1) && tries <= MAX_TRIES && -+ !test_result_state(TOI_ABORTED)); -+ -+ result = image_not_ready(0); -+ -+ if (!test_result_state(TOI_ABORTED)) { -+ if (result) { -+ display_stats(1, 0); -+ display_failure_reason(tries > MAX_TRIES); -+ abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE, -+ "Unable to successfully prepare the image.\n"); -+ } else { -+ /* Pageset 2 needed? */ -+ if (!need_pageset2() && -+ test_action_state(TOI_NO_PS2_IF_UNNEEDED)) { -+ no_ps2_needed = 1; -+ toi_recalculate_image_contents(0); -+ update_image(1); -+ } -+ -+ toi_cond_pause(1, "Image preparation complete."); -+ } -+ } -+ -+ return result ? result : allocate_checksum_pages(); -+} -diff --git a/kernel/power/tuxonice_prepare_image.h b/kernel/power/tuxonice_prepare_image.h -new file mode 100644 -index 0000000..73e8bf2 ---- /dev/null -+++ b/kernel/power/tuxonice_prepare_image.h -@@ -0,0 +1,38 @@ -+/* -+ * kernel/power/tuxonice_prepare_image.h -+ * -+ * Copyright (C) 2003-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ */ -+ -+#include -+ -+extern int toi_prepare_image(void); -+extern void toi_recalculate_image_contents(int storage_available); -+extern unsigned long real_nr_free_pages(unsigned long zone_idx_mask); -+extern long image_size_limit; -+extern void toi_free_extra_pagedir_memory(void); -+extern unsigned long extra_pd1_pages_allowance; -+extern void free_attention_list(void); -+ -+#define MIN_FREE_RAM 100 -+#define MIN_EXTRA_PAGES_ALLOWANCE 500 -+ -+#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1)) -+#ifdef CONFIG_HIGHMEM -+#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM)) -+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \ -+ (1 << ZONE_HIGHMEM))) -+#else -+#define real_nr_free_high_pages() (0) -+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask)) -+ -+/* For eat_memory function */ -+#define ZONE_HIGHMEM (MAX_NR_ZONES + 1) -+#endif -+ -+unsigned long get_header_storage_needed(void); -+unsigned long any_to_free(int use_image_size_limit); -+int try_allocate_extra_memory(void); -diff --git a/kernel/power/tuxonice_prune.c b/kernel/power/tuxonice_prune.c -new file mode 100644 -index 0000000..9a9444d ---- /dev/null -+++ b/kernel/power/tuxonice_prune.c -@@ -0,0 +1,419 @@ -+/* -+ * kernel/power/tuxonice_prune.c -+ * -+ * Copyright (C) 2012 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * This file implements a TuxOnIce module that seeks to prune the -+ * amount of data written to disk. It builds a table of hashes -+ * of the uncompressed data, and writes the pfn of the previous page -+ * with the same contents instead of repeating the data when a match -+ * is found. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "tuxonice_builtin.h" -+#include "tuxonice.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_io.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_alloc.h" -+ -+/* -+ * We never write a page bigger than PAGE_SIZE, so use a large number -+ * to indicate that data is a PFN. -+ */ -+#define PRUNE_DATA_IS_PFN (PAGE_SIZE + 100) -+ -+static unsigned long toi_pruned_pages; -+ -+static struct toi_module_ops toi_prune_ops; -+static struct toi_module_ops *next_driver; -+ -+static char toi_prune_hash_algo_name[32] = "sha1"; -+ -+static DEFINE_MUTEX(stats_lock); -+ -+struct cpu_context { -+ struct shash_desc desc; -+ char *digest; -+}; -+ -+#define OUT_BUF_SIZE (2 * PAGE_SIZE) -+ -+static DEFINE_PER_CPU(struct cpu_context, contexts); -+ -+/* -+ * toi_crypto_prepare -+ * -+ * Prepare to do some work by allocating buffers and transforms. -+ */ -+static int toi_prune_crypto_prepare(void) -+{ -+ int cpu, ret, digestsize; -+ -+ if (!*toi_prune_hash_algo_name) { -+ printk(KERN_INFO "TuxOnIce: Pruning enabled but no " -+ "hash algorithm set.\n"); -+ return 1; -+ } -+ -+ for_each_online_cpu(cpu) { -+ struct cpu_context *this = &per_cpu(contexts, cpu); -+ this->desc.tfm = crypto_alloc_shash(toi_prune_hash_algo_name, 0, 0); -+ if (IS_ERR(this->desc.tfm)) { -+ printk(KERN_INFO "TuxOnIce: Failed to allocate the " -+ "%s prune hash algorithm.\n", -+ toi_prune_hash_algo_name); -+ this->desc.tfm = NULL; -+ return 1; -+ } -+ -+ if (!digestsize) -+ digestsize = crypto_shash_digestsize(this->desc.tfm); -+ -+ this->digest = kmalloc(digestsize, GFP_KERNEL); -+ if (!this->digest) { -+ printk(KERN_INFO "TuxOnIce: Failed to allocate space " -+ "for digest output.\n"); -+ crypto_free_shash(this->desc.tfm); -+ this->desc.tfm = NULL; -+ } -+ -+ this->desc.flags = 0; -+ -+ ret = crypto_shash_init(&this->desc); -+ if (ret < 0) { -+ printk(KERN_INFO "TuxOnIce: Failed to initialise the " -+ "%s prune hash algorithm.\n", -+ toi_prune_hash_algo_name); -+ kfree(this->digest); -+ this->digest = NULL; -+ crypto_free_shash(this->desc.tfm); -+ this->desc.tfm = NULL; -+ return 1; -+ } -+ } -+ -+ return 0; -+} -+ -+static int toi_prune_rw_cleanup(int writing) -+{ -+ int cpu; -+ -+ for_each_online_cpu(cpu) { -+ struct cpu_context *this = &per_cpu(contexts, cpu); -+ if (this->desc.tfm) { -+ crypto_free_shash(this->desc.tfm); -+ this->desc.tfm = NULL; -+ } -+ -+ if (this->digest) { -+ kfree(this->digest); -+ this->digest = NULL; -+ } -+ } -+ -+ return 0; -+} -+ -+/* -+ * toi_prune_init -+ */ -+ -+static int toi_prune_init(int toi_or_resume) -+{ -+ if (!toi_or_resume) -+ return 0; -+ -+ toi_pruned_pages = 0; -+ -+ next_driver = toi_get_next_filter(&toi_prune_ops); -+ -+ return next_driver ? 0 : -ECHILD; -+} -+ -+/* -+ * toi_prune_rw_init() -+ */ -+ -+static int toi_prune_rw_init(int rw, int stream_number) -+{ -+ if (toi_prune_crypto_prepare()) { -+ printk(KERN_ERR "Failed to initialise prune " -+ "algorithm.\n"); -+ if (rw == READ) { -+ printk(KERN_INFO "Unable to read the image.\n"); -+ return -ENODEV; -+ } else { -+ printk(KERN_INFO "Continuing without " -+ "pruning the image.\n"); -+ toi_prune_ops.enabled = 0; -+ } -+ } -+ -+ return 0; -+} -+ -+/* -+ * toi_prune_write_page() -+ * -+ * Compress a page of data, buffering output and passing on filled -+ * pages to the next module in the pipeline. -+ * -+ * Buffer_page: Pointer to a buffer of size PAGE_SIZE, containing -+ * data to be checked. -+ * -+ * Returns: 0 on success. Otherwise the error is that returned by later -+ * modules, -ECHILD if we have a broken pipeline or -EIO if -+ * zlib errs. -+ */ -+static int toi_prune_write_page(unsigned long index, int buf_type, -+ void *buffer_page, unsigned int buf_size) -+{ -+ int ret = 0, cpu = smp_processor_id(), write_data = 1; -+ struct cpu_context *ctx = &per_cpu(contexts, cpu); -+ u8* output_buffer = buffer_page; -+ int output_len = buf_size; -+ int out_buf_type = buf_type; -+ void *buffer_start; -+ u32 buf[4]; -+ -+ if (ctx->desc.tfm) { -+ -+ buffer_start = TOI_MAP(buf_type, buffer_page); -+ ctx->len = OUT_BUF_SIZE; -+ -+ ret = crypto_shash_digest(&ctx->desc, buffer_start, buf_size, &ctx->digest); -+ if (ret) { -+ printk(KERN_INFO "TuxOnIce: Failed to calculate digest (%d).\n", ret); -+ } else { -+ mutex_lock(&stats_lock); -+ -+ toi_pruned_pages++; -+ -+ mutex_unlock(&stats_lock); -+ -+ } -+ -+ TOI_UNMAP(buf_type, buffer_page); -+ } -+ -+ if (write_data) -+ ret = next_driver->write_page(index, out_buf_type, -+ output_buffer, output_len); -+ else -+ ret = next_driver->write_page(index, out_buf_type, -+ output_buffer, output_len); -+ -+ return ret; -+} -+ -+/* -+ * toi_prune_read_page() -+ * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE. -+ * -+ * Retrieve data from later modules or from a previously loaded page and -+ * fill the input buffer. -+ * Zero if successful. Error condition from me or from downstream on failure. -+ */ -+static int toi_prune_read_page(unsigned long *index, int buf_type, -+ void *buffer_page, unsigned int *buf_size) -+{ -+ int ret, cpu = smp_processor_id(); -+ unsigned int len; -+ char *buffer_start; -+ struct cpu_context *ctx = &per_cpu(contexts, cpu); -+ -+ if (!ctx->desc.tfm) -+ return next_driver->read_page(index, TOI_PAGE, buffer_page, -+ buf_size); -+ -+ /* -+ * All our reads must be synchronous - we can't handle -+ * data that hasn't been read yet. -+ */ -+ -+ ret = next_driver->read_page(index, buf_type, buffer_page, &len); -+ -+ if (len == PRUNE_DATA_IS_PFN) { -+ buffer_start = kmap(buffer_page); -+ } -+ -+ return ret; -+} -+ -+/* -+ * toi_prune_print_debug_stats -+ * @buffer: Pointer to a buffer into which the debug info will be printed. -+ * @size: Size of the buffer. -+ * -+ * Print information to be recorded for debugging purposes into a buffer. -+ * Returns: Number of characters written to the buffer. -+ */ -+ -+static int toi_prune_print_debug_stats(char *buffer, int size) -+{ -+ int len; -+ -+ /* Output the number of pages pruned. */ -+ if (*toi_prune_hash_algo_name) -+ len = scnprintf(buffer, size, "- Compressor is '%s'.\n", -+ toi_prune_hash_algo_name); -+ else -+ len = scnprintf(buffer, size, "- Compressor is not set.\n"); -+ -+ if (toi_pruned_pages) -+ len += scnprintf(buffer+len, size - len, " Pruned " -+ "%lu pages).\n", -+ toi_pruned_pages); -+ return len; -+} -+ -+/* -+ * toi_prune_memory_needed -+ * -+ * Tell the caller how much memory we need to operate during hibernate/resume. -+ * Returns: Unsigned long. Maximum number of bytes of memory required for -+ * operation. -+ */ -+static int toi_prune_memory_needed(void) -+{ -+ return 2 * PAGE_SIZE; -+} -+ -+static int toi_prune_storage_needed(void) -+{ -+ return 2 * sizeof(unsigned long) + 2 * sizeof(int) + -+ strlen(toi_prune_hash_algo_name) + 1; -+} -+ -+/* -+ * toi_prune_save_config_info -+ * @buffer: Pointer to a buffer of size PAGE_SIZE. -+ * -+ * Save informaton needed when reloading the image at resume time. -+ * Returns: Number of bytes used for saving our data. -+ */ -+static int toi_prune_save_config_info(char *buffer) -+{ -+ int len = strlen(toi_prune_hash_algo_name) + 1, offset = 0; -+ -+ *((unsigned long *) buffer) = toi_pruned_pages; -+ offset += sizeof(unsigned long); -+ *((int *) (buffer + offset)) = len; -+ offset += sizeof(int); -+ strncpy(buffer + offset, toi_prune_hash_algo_name, len); -+ return offset + len; -+} -+ -+/* toi_prune_load_config_info -+ * @buffer: Pointer to the start of the data. -+ * @size: Number of bytes that were saved. -+ * -+ * Description: Reload information needed for passing back to the -+ * resumed kernel. -+ */ -+static void toi_prune_load_config_info(char *buffer, int size) -+{ -+ int len, offset = 0; -+ -+ toi_pruned_pages = *((unsigned long *) buffer); -+ offset += sizeof(unsigned long); -+ len = *((int *) (buffer + offset)); -+ offset += sizeof(int); -+ strncpy(toi_prune_hash_algo_name, buffer + offset, len); -+} -+ -+static void toi_prune_pre_atomic_restore(struct toi_boot_kernel_data *bkd) -+{ -+ bkd->pruned_pages = toi_pruned_pages; -+} -+ -+static void toi_prune_post_atomic_restore(struct toi_boot_kernel_data *bkd) -+{ -+ toi_pruned_pages = bkd->pruned_pages; -+} -+ -+/* -+ * toi_expected_ratio -+ * -+ * Description: Returns the expected ratio between data passed into this module -+ * and the amount of data output when writing. -+ * Returns: 100 - we have no idea how many pages will be pruned. -+ */ -+ -+static int toi_prune_expected_ratio(void) -+{ -+ return 100; -+} -+ -+/* -+ * data for our sysfs entries. -+ */ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_INT("enabled", SYSFS_RW, &toi_prune_ops.enabled, 0, 1, 0, -+ NULL), -+ SYSFS_STRING("algorithm", SYSFS_RW, toi_prune_hash_algo_name, 31, 0, NULL), -+}; -+ -+/* -+ * Ops structure. -+ */ -+static struct toi_module_ops toi_prune_ops = { -+ .type = FILTER_MODULE, -+ .name = "prune", -+ .directory = "prune", -+ .module = THIS_MODULE, -+ .initialise = toi_prune_init, -+ .memory_needed = toi_prune_memory_needed, -+ .print_debug_info = toi_prune_print_debug_stats, -+ .save_config_info = toi_prune_save_config_info, -+ .load_config_info = toi_prune_load_config_info, -+ .storage_needed = toi_prune_storage_needed, -+ .expected_compression = toi_prune_expected_ratio, -+ -+ .pre_atomic_restore = toi_prune_pre_atomic_restore, -+ .post_atomic_restore = toi_prune_post_atomic_restore, -+ -+ .rw_init = toi_prune_rw_init, -+ .rw_cleanup = toi_prune_rw_cleanup, -+ -+ .write_page = toi_prune_write_page, -+ .read_page = toi_prune_read_page, -+ -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+/* ---- Registration ---- */ -+ -+static __init int toi_prune_load(void) -+{ -+ return toi_register_module(&toi_prune_ops); -+} -+ -+#ifdef MODULE -+static __exit void toi_prune_unload(void) -+{ -+ toi_unregister_module(&toi_prune_ops); -+} -+ -+module_init(toi_prune_load); -+module_exit(toi_prune_unload); -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Nigel Cunningham"); -+MODULE_DESCRIPTION("Image Pruning Support for TuxOnIce"); -+#else -+late_initcall(toi_prune_load); -+#endif -diff --git a/kernel/power/tuxonice_storage.c b/kernel/power/tuxonice_storage.c -new file mode 100644 -index 0000000..8a1f1eb ---- /dev/null -+++ b/kernel/power/tuxonice_storage.c -@@ -0,0 +1,283 @@ -+/* -+ * kernel/power/tuxonice_storage.c -+ * -+ * Copyright (C) 2005-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Routines for talking to a userspace program that manages storage. -+ * -+ * The kernel side: -+ * - starts the userspace program; -+ * - sends messages telling it when to open and close the connection; -+ * - tells it when to quit; -+ * -+ * The user space side: -+ * - passes messages regarding status; -+ * -+ */ -+ -+#include -+#include -+#include -+ -+#include "tuxonice_sysfs.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_netlink.h" -+#include "tuxonice_storage.h" -+#include "tuxonice_ui.h" -+ -+static struct user_helper_data usm_helper_data; -+static struct toi_module_ops usm_ops; -+static int message_received, usm_prepare_count; -+static int storage_manager_last_action, storage_manager_action; -+ -+static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) -+{ -+ int type; -+ int *data; -+ -+ type = nlh->nlmsg_type; -+ -+ /* A control message: ignore them */ -+ if (type < NETLINK_MSG_BASE) -+ return 0; -+ -+ /* Unknown message: reply with EINVAL */ -+ if (type >= USM_MSG_MAX) -+ return -EINVAL; -+ -+ /* All operations require privileges, even GET */ -+ if (!capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ -+ /* Only allow one task to receive NOFREEZE privileges */ -+ if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1) -+ return -EBUSY; -+ -+ data = (int *) NLMSG_DATA(nlh); -+ -+ switch (type) { -+ case USM_MSG_SUCCESS: -+ case USM_MSG_FAILED: -+ message_received = type; -+ complete(&usm_helper_data.wait_for_process); -+ break; -+ default: -+ printk(KERN_INFO "Storage manager doesn't recognise " -+ "message %d.\n", type); -+ } -+ -+ return 1; -+} -+ -+#ifdef CONFIG_NET -+static int activations; -+ -+int toi_activate_storage(int force) -+{ -+ int tries = 1; -+ -+ if (usm_helper_data.pid == -1 || !usm_ops.enabled) -+ return 0; -+ -+ message_received = 0; -+ activations++; -+ -+ if (activations > 1 && !force) -+ return 0; -+ -+ while ((!message_received || message_received == USM_MSG_FAILED) && -+ tries < 2) { -+ toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt " -+ "%d.\n", tries); -+ -+ init_completion(&usm_helper_data.wait_for_process); -+ -+ toi_send_netlink_message(&usm_helper_data, -+ USM_MSG_CONNECT, -+ NULL, 0); -+ -+ /* Wait 2 seconds for the userspace process to make contact */ -+ wait_for_completion_timeout(&usm_helper_data.wait_for_process, -+ 2*HZ); -+ -+ tries++; -+ } -+ -+ return 0; -+} -+ -+int toi_deactivate_storage(int force) -+{ -+ if (usm_helper_data.pid == -1 || !usm_ops.enabled) -+ return 0; -+ -+ message_received = 0; -+ activations--; -+ -+ if (activations && !force) -+ return 0; -+ -+ init_completion(&usm_helper_data.wait_for_process); -+ -+ toi_send_netlink_message(&usm_helper_data, -+ USM_MSG_DISCONNECT, -+ NULL, 0); -+ -+ wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ); -+ -+ if (!message_received || message_received == USM_MSG_FAILED) { -+ printk(KERN_INFO "Returning failure disconnecting storage.\n"); -+ return 1; -+ } -+ -+ return 0; -+} -+#endif -+ -+static void storage_manager_simulate(void) -+{ -+ printk(KERN_INFO "--- Storage manager simulate ---\n"); -+ toi_prepare_usm(); -+ schedule(); -+ printk(KERN_INFO "--- Activate storage 1 ---\n"); -+ toi_activate_storage(1); -+ schedule(); -+ printk(KERN_INFO "--- Deactivate storage 1 ---\n"); -+ toi_deactivate_storage(1); -+ schedule(); -+ printk(KERN_INFO "--- Cleanup usm ---\n"); -+ toi_cleanup_usm(); -+ schedule(); -+ printk(KERN_INFO "--- Storage manager simulate ends ---\n"); -+} -+ -+static int usm_storage_needed(void) -+{ -+ return sizeof(int) + strlen(usm_helper_data.program) + 1; -+} -+ -+static int usm_save_config_info(char *buf) -+{ -+ int len = strlen(usm_helper_data.program); -+ memcpy(buf, usm_helper_data.program, len + 1); -+ return sizeof(int) + len + 1; -+} -+ -+static void usm_load_config_info(char *buf, int size) -+{ -+ /* Don't load the saved path if one has already been set */ -+ if (usm_helper_data.program[0]) -+ return; -+ -+ memcpy(usm_helper_data.program, buf + sizeof(int), *((int *) buf)); -+} -+ -+static int usm_memory_needed(void) -+{ -+ /* ball park figure of 32 pages */ -+ return 32 * PAGE_SIZE; -+} -+ -+/* toi_prepare_usm -+ */ -+int toi_prepare_usm(void) -+{ -+ usm_prepare_count++; -+ -+ if (usm_prepare_count > 1 || !usm_ops.enabled) -+ return 0; -+ -+ usm_helper_data.pid = -1; -+ -+ if (!*usm_helper_data.program) -+ return 0; -+ -+ toi_netlink_setup(&usm_helper_data); -+ -+ if (usm_helper_data.pid == -1) -+ printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't" -+ " start it.\n"); -+ -+ toi_activate_storage(0); -+ -+ return usm_helper_data.pid != -1; -+} -+ -+void toi_cleanup_usm(void) -+{ -+ usm_prepare_count--; -+ -+ if (usm_helper_data.pid > -1 && !usm_prepare_count) { -+ toi_deactivate_storage(0); -+ toi_netlink_close(&usm_helper_data); -+ } -+} -+ -+static void storage_manager_activate(void) -+{ -+ if (storage_manager_action == storage_manager_last_action) -+ return; -+ -+ if (storage_manager_action) -+ toi_prepare_usm(); -+ else -+ toi_cleanup_usm(); -+ -+ storage_manager_last_action = storage_manager_action; -+} -+ -+/* -+ * User interface specific /sys/power/tuxonice entries. -+ */ -+ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate), -+ SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL), -+ SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0, -+ NULL), -+ SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1, -+ 0, storage_manager_activate) -+}; -+ -+static struct toi_module_ops usm_ops = { -+ .type = MISC_MODULE, -+ .name = "usm", -+ .directory = "storage_manager", -+ .module = THIS_MODULE, -+ .storage_needed = usm_storage_needed, -+ .save_config_info = usm_save_config_info, -+ .load_config_info = usm_load_config_info, -+ .memory_needed = usm_memory_needed, -+ -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+/* toi_usm_sysfs_init -+ * Description: Boot time initialisation for user interface. -+ */ -+int toi_usm_init(void) -+{ -+ usm_helper_data.nl = NULL; -+ usm_helper_data.program[0] = '\0'; -+ usm_helper_data.pid = -1; -+ usm_helper_data.skb_size = 0; -+ usm_helper_data.pool_limit = 6; -+ usm_helper_data.netlink_id = NETLINK_TOI_USM; -+ usm_helper_data.name = "userspace storage manager"; -+ usm_helper_data.rcv_msg = usm_user_rcv_msg; -+ usm_helper_data.interface_version = 2; -+ usm_helper_data.must_init = 0; -+ init_completion(&usm_helper_data.wait_for_process); -+ -+ return toi_register_module(&usm_ops); -+} -+ -+void toi_usm_exit(void) -+{ -+ toi_netlink_close_complete(&usm_helper_data); -+ toi_unregister_module(&usm_ops); -+} -diff --git a/kernel/power/tuxonice_storage.h b/kernel/power/tuxonice_storage.h -new file mode 100644 -index 0000000..e43a603 ---- /dev/null -+++ b/kernel/power/tuxonice_storage.h -@@ -0,0 +1,45 @@ -+/* -+ * kernel/power/tuxonice_storage.h -+ * -+ * Copyright (C) 2005-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ */ -+ -+#ifdef CONFIG_NET -+int toi_prepare_usm(void); -+void toi_cleanup_usm(void); -+ -+int toi_activate_storage(int force); -+int toi_deactivate_storage(int force); -+extern int toi_usm_init(void); -+extern void toi_usm_exit(void); -+#else -+static inline int toi_usm_init(void) { return 0; } -+static inline void toi_usm_exit(void) { } -+ -+static inline int toi_activate_storage(int force) -+{ -+ return 0; -+} -+ -+static inline int toi_deactivate_storage(int force) -+{ -+ return 0; -+} -+ -+static inline int toi_prepare_usm(void) { return 0; } -+static inline void toi_cleanup_usm(void) { } -+#endif -+ -+enum { -+ USM_MSG_BASE = 0x10, -+ -+ /* Kernel -> Userspace */ -+ USM_MSG_CONNECT = 0x30, -+ USM_MSG_DISCONNECT = 0x31, -+ USM_MSG_SUCCESS = 0x40, -+ USM_MSG_FAILED = 0x41, -+ -+ USM_MSG_MAX, -+}; -diff --git a/kernel/power/tuxonice_swap.c b/kernel/power/tuxonice_swap.c -new file mode 100644 -index 0000000..298cb81 ---- /dev/null -+++ b/kernel/power/tuxonice_swap.c -@@ -0,0 +1,463 @@ -+/* -+ * kernel/power/tuxonice_swap.c -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * Distributed under GPLv2. -+ * -+ * This file encapsulates functions for usage of swap space as a -+ * backing store. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "tuxonice.h" -+#include "tuxonice_sysfs.h" -+#include "tuxonice_modules.h" -+#include "tuxonice_io.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_extent.h" -+#include "tuxonice_bio.h" -+#include "tuxonice_alloc.h" -+#include "tuxonice_builtin.h" -+ -+static struct toi_module_ops toi_swapops; -+ -+/* For swapfile automatically swapon/off'd. */ -+static char swapfilename[255] = ""; -+static int toi_swapon_status; -+ -+/* Swap Pages */ -+static unsigned long swap_allocated; -+ -+static struct sysinfo swapinfo; -+ -+static int is_ram_backed(struct swap_info_struct *si) -+{ -+ if (!strncmp(si->bdev->bd_disk->disk_name, "ram", 3) || -+ !strncmp(si->bdev->bd_disk->disk_name, "zram", 4)) -+ return 1; -+ -+ return 0; -+} -+ -+/** -+ * enable_swapfile: Swapon the user specified swapfile prior to hibernating. -+ * -+ * Activate the given swapfile if it wasn't already enabled. Remember whether -+ * we really did swapon it for swapoffing later. -+ */ -+static void enable_swapfile(void) -+{ -+ int activateswapresult = -EINVAL; -+ -+ if (swapfilename[0]) { -+ /* Attempt to swap on with maximum priority */ -+ activateswapresult = sys_swapon(swapfilename, 0xFFFF); -+ if (activateswapresult && activateswapresult != -EBUSY) -+ printk(KERN_ERR "TuxOnIce: The swapfile/partition " -+ "specified by /sys/power/tuxonice/swap/swapfile" -+ " (%s) could not be turned on (error %d). " -+ "Attempting to continue.\n", -+ swapfilename, activateswapresult); -+ if (!activateswapresult) -+ toi_swapon_status = 1; -+ } -+} -+ -+/** -+ * disable_swapfile: Swapoff any file swaponed at the start of the cycle. -+ * -+ * If we did successfully swapon a file at the start of the cycle, swapoff -+ * it now (finishing up). -+ */ -+static void disable_swapfile(void) -+{ -+ if (!toi_swapon_status) -+ return; -+ -+ sys_swapoff(swapfilename); -+ toi_swapon_status = 0; -+} -+ -+static int add_blocks_to_extent_chain(struct toi_bdev_info *chain, -+ unsigned long start, unsigned long end) -+{ -+ if (test_action_state(TOI_TEST_BIO)) -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %lu-%lu to " -+ "chain %p.", start << chain->bmap_shift, -+ end << chain->bmap_shift, chain); -+ -+ return toi_add_to_extent_chain(&chain->blocks, start, end); -+} -+ -+ -+static int get_main_pool_phys_params(struct toi_bdev_info *chain) -+{ -+ struct hibernate_extent *extentpointer = NULL; -+ unsigned long address, extent_min = 0, extent_max = 0; -+ int empty = 1; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "get main pool phys params for " -+ "chain %d.", chain->allocator_index); -+ -+ if (!chain->allocations.first) -+ return 0; -+ -+ if (chain->blocks.first) -+ toi_put_extent_chain(&chain->blocks); -+ -+ toi_extent_for_each(&chain->allocations, extentpointer, address) { -+ swp_entry_t swap_address = (swp_entry_t) { address }; -+ struct block_device *bdev; -+ sector_t new_sector = map_swap_entry(swap_address, &bdev); -+ -+ if (empty) { -+ empty = 0; -+ extent_min = extent_max = new_sector; -+ continue; -+ } -+ -+ if (new_sector == extent_max + 1) { -+ extent_max++; -+ continue; -+ } -+ -+ if (add_blocks_to_extent_chain(chain, extent_min, extent_max)) { -+ printk(KERN_ERR "Out of memory while making block " -+ "chains.\n"); -+ return -ENOMEM; -+ } -+ -+ extent_min = new_sector; -+ extent_max = new_sector; -+ } -+ -+ if (!empty && -+ add_blocks_to_extent_chain(chain, extent_min, extent_max)) { -+ printk(KERN_ERR "Out of memory while making block chains.\n"); -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Like si_swapinfo, except that we don't include ram backed swap (compcache!) -+ * and don't need to use the spinlocks (userspace is stopped when this -+ * function is called). -+ */ -+void si_swapinfo_no_compcache(void) -+{ -+ unsigned int i; -+ -+ si_swapinfo(&swapinfo); -+ swapinfo.freeswap = 0; -+ swapinfo.totalswap = 0; -+ -+ for (i = 0; i < MAX_SWAPFILES; i++) { -+ struct swap_info_struct *si = get_swap_info_struct(i); -+ if (si && (si->flags & SWP_WRITEOK) && !is_ram_backed(si)) { -+ swapinfo.totalswap += si->inuse_pages; -+ swapinfo.freeswap += si->pages - si->inuse_pages; -+ } -+ } -+} -+/* -+ * We can't just remember the value from allocation time, because other -+ * processes might have allocated swap in the mean time. -+ */ -+static unsigned long toi_swap_storage_available(void) -+{ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "In toi_swap_storage_available."); -+ si_swapinfo_no_compcache(); -+ return swapinfo.freeswap + swap_allocated; -+} -+ -+static int toi_swap_initialise(int starting_cycle) -+{ -+ if (!starting_cycle) -+ return 0; -+ -+ enable_swapfile(); -+ return 0; -+} -+ -+static void toi_swap_cleanup(int ending_cycle) -+{ -+ if (!ending_cycle) -+ return; -+ -+ disable_swapfile(); -+} -+ -+static void toi_swap_free_storage(struct toi_bdev_info *chain) -+{ -+ /* Free swap entries */ -+ struct hibernate_extent *extentpointer; -+ unsigned long extentvalue; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing storage for chain %p.", -+ chain); -+ -+ swap_allocated -= chain->allocations.size; -+ toi_extent_for_each(&chain->allocations, extentpointer, extentvalue) -+ swap_free((swp_entry_t) { extentvalue }); -+ -+ toi_put_extent_chain(&chain->allocations); -+} -+ -+static void free_swap_range(unsigned long min, unsigned long max) -+{ -+ int j; -+ -+ for (j = min; j <= max; j++) -+ swap_free((swp_entry_t) { j }); -+ swap_allocated -= (max - min + 1); -+} -+ -+/* -+ * Allocation of a single swap type. Swap priorities are handled at the higher -+ * level. -+ */ -+static int toi_swap_allocate_storage(struct toi_bdev_info *chain, -+ unsigned long request) -+{ -+ unsigned long gotten = 0; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, " Swap allocate storage: Asked to" -+ " allocate %lu pages from device %d.", request, -+ chain->allocator_index); -+ -+ while (gotten < request) { -+ swp_entry_t start, end; -+ get_swap_range_of_type(chain->allocator_index, &start, &end, -+ request - gotten + 1); -+ if (start.val) { -+ int added = end.val - start.val + 1; -+ if (toi_add_to_extent_chain(&chain->allocations, -+ start.val, end.val)) { -+ printk(KERN_INFO "Failed to allocate extent for " -+ "%lu-%lu.\n", start.val, end.val); -+ free_swap_range(start.val, end.val); -+ break; -+ } -+ gotten += added; -+ swap_allocated += added; -+ } else -+ break; -+ } -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, " Allocated %lu pages.", gotten); -+ return gotten; -+} -+ -+static int toi_swap_register_storage(void) -+{ -+ int i, result = 0; -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_swap_register_storage."); -+ for (i = 0; i < MAX_SWAPFILES; i++) { -+ struct swap_info_struct *si = get_swap_info_struct(i); -+ struct toi_bdev_info *devinfo; -+ unsigned char *p; -+ unsigned char buf[256]; -+ struct fs_info *fs_info; -+ -+ if (!si || !(si->flags & SWP_WRITEOK) || is_ram_backed(si)) -+ continue; -+ -+ devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), -+ GFP_ATOMIC); -+ if (!devinfo) { -+ printk("Failed to allocate devinfo struct for swap " -+ "device %d.\n", i); -+ return -ENOMEM; -+ } -+ -+ devinfo->bdev = si->bdev; -+ devinfo->allocator = &toi_swapops; -+ devinfo->allocator_index = i; -+ -+ fs_info = fs_info_from_block_dev(si->bdev); -+ if (fs_info && !IS_ERR(fs_info)) { -+ memcpy(devinfo->uuid, &fs_info->uuid, 16); -+ free_fs_info(fs_info); -+ } else -+ result = (int) PTR_ERR(fs_info); -+ -+ if (!fs_info) -+ printk("fs_info from block dev returned %d.\n", result); -+ devinfo->dev_t = si->bdev->bd_dev; -+ devinfo->prio = si->prio; -+ devinfo->bmap_shift = 3; -+ devinfo->blocks_per_page = 1; -+ -+ p = d_path(&si->swap_file->f_path, buf, sizeof(buf)); -+ sprintf(devinfo->name, "swap on %s", p); -+ -+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering swap storage:" -+ " Device %d (%lx), prio %d.", i, -+ (unsigned long) devinfo->dev_t, devinfo->prio); -+ toi_bio_ops.register_storage(devinfo); -+ } -+ -+ return 0; -+} -+ -+/* -+ * workspace_size -+ * -+ * Description: -+ * Returns the number of bytes of RAM needed for this -+ * code to do its work. (Used when calculating whether -+ * we have enough memory to be able to hibernate & resume). -+ * -+ */ -+static int toi_swap_memory_needed(void) -+{ -+ return 1; -+} -+ -+/* -+ * Print debug info -+ * -+ * Description: -+ */ -+static int toi_swap_print_debug_stats(char *buffer, int size) -+{ -+ int len = 0; -+ -+ len = scnprintf(buffer, size, "- Swap Allocator enabled.\n"); -+ if (swapfilename[0]) -+ len += scnprintf(buffer+len, size-len, -+ " Attempting to automatically swapon: %s.\n", -+ swapfilename); -+ -+ si_swapinfo_no_compcache(); -+ -+ len += scnprintf(buffer+len, size-len, -+ " Swap available for image: %lu pages.\n", -+ swapinfo.freeswap + swap_allocated); -+ -+ return len; -+} -+ -+static int header_locations_read_sysfs(const char *page, int count) -+{ -+ int i, printedpartitionsmessage = 0, len = 0, haveswap = 0; -+ struct inode *swapf = NULL; -+ int zone; -+ char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL); -+ char *path, *output = (char *) page; -+ int path_len; -+ -+ if (!page) -+ return 0; -+ -+ for (i = 0; i < MAX_SWAPFILES; i++) { -+ struct swap_info_struct *si = get_swap_info_struct(i); -+ -+ if (!si || !(si->flags & SWP_WRITEOK)) -+ continue; -+ -+ if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) { -+ haveswap = 1; -+ if (!printedpartitionsmessage) { -+ len += sprintf(output + len, -+ "For swap partitions, simply use the " -+ "format: resume=swap:/dev/hda1.\n"); -+ printedpartitionsmessage = 1; -+ } -+ } else { -+ path_len = 0; -+ -+ path = d_path(&si->swap_file->f_path, path_page, -+ PAGE_SIZE); -+ path_len = snprintf(path_page, PAGE_SIZE, "%s", path); -+ -+ haveswap = 1; -+ swapf = si->swap_file->f_mapping->host; -+ zone = bmap(swapf, 0); -+ if (!zone) { -+ len += sprintf(output + len, -+ "Swapfile %s has been corrupted. Reuse" -+ " mkswap on it and try again.\n", -+ path_page); -+ } else { -+ char name_buffer[BDEVNAME_SIZE]; -+ len += sprintf(output + len, -+ "For swapfile `%s`," -+ " use resume=swap:/dev/%s:0x%x.\n", -+ path_page, -+ bdevname(si->bdev, name_buffer), -+ zone << (swapf->i_blkbits - 9)); -+ } -+ } -+ } -+ -+ if (!haveswap) -+ len = sprintf(output, "You need to turn on swap partitions " -+ "before examining this file.\n"); -+ -+ toi_free_page(10, (unsigned long) path_page); -+ return len; -+} -+ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL), -+ SYSFS_CUSTOM("headerlocations", SYSFS_READONLY, -+ header_locations_read_sysfs, NULL, 0, NULL), -+ SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0, -+ attempt_to_parse_resume_device2), -+}; -+ -+static struct toi_bio_allocator_ops toi_bio_swapops = { -+ .register_storage = toi_swap_register_storage, -+ .storage_available = toi_swap_storage_available, -+ .allocate_storage = toi_swap_allocate_storage, -+ .bmap = get_main_pool_phys_params, -+ .free_storage = toi_swap_free_storage, -+}; -+ -+static struct toi_module_ops toi_swapops = { -+ .type = BIO_ALLOCATOR_MODULE, -+ .name = "swap storage", -+ .directory = "swap", -+ .module = THIS_MODULE, -+ .memory_needed = toi_swap_memory_needed, -+ .print_debug_info = toi_swap_print_debug_stats, -+ .initialise = toi_swap_initialise, -+ .cleanup = toi_swap_cleanup, -+ .bio_allocator_ops = &toi_bio_swapops, -+ -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+/* ---- Registration ---- */ -+static __init int toi_swap_load(void) -+{ -+ return toi_register_module(&toi_swapops); -+} -+ -+#ifdef MODULE -+static __exit void toi_swap_unload(void) -+{ -+ toi_unregister_module(&toi_swapops); -+} -+ -+module_init(toi_swap_load); -+module_exit(toi_swap_unload); -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Nigel Cunningham"); -+MODULE_DESCRIPTION("TuxOnIce SwapAllocator"); -+#else -+late_initcall(toi_swap_load); -+#endif -diff --git a/kernel/power/tuxonice_sysfs.c b/kernel/power/tuxonice_sysfs.c -new file mode 100644 -index 0000000..fd3ed29 ---- /dev/null -+++ b/kernel/power/tuxonice_sysfs.c -@@ -0,0 +1,335 @@ -+/* -+ * kernel/power/tuxonice_sysfs.c -+ * -+ * Copyright (C) 2002-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * This file contains support for sysfs entries for tuning TuxOnIce. -+ * -+ * We have a generic handler that deals with the most common cases, and -+ * hooks for special handlers to use. -+ */ -+ -+#include -+ -+#include "tuxonice_sysfs.h" -+#include "tuxonice.h" -+#include "tuxonice_storage.h" -+#include "tuxonice_alloc.h" -+ -+static int toi_sysfs_initialised; -+ -+static void toi_initialise_sysfs(void); -+ -+static struct toi_sysfs_data sysfs_params[]; -+ -+#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr) -+ -+static void toi_main_wrapper(void) -+{ -+ toi_try_hibernate(); -+} -+ -+static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr, -+ char *page) -+{ -+ struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr); -+ int len = 0; -+ int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ; -+ -+ if (full_prep && toi_start_anything(0)) -+ return -EBUSY; -+ -+ if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ) -+ toi_prepare_usm(); -+ -+ switch (sysfs_data->type) { -+ case TOI_SYSFS_DATA_CUSTOM: -+ len = (sysfs_data->data.special.read_sysfs) ? -+ (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE) -+ : 0; -+ break; -+ case TOI_SYSFS_DATA_BIT: -+ len = sprintf(page, "%d\n", -+ -test_bit(sysfs_data->data.bit.bit, -+ sysfs_data->data.bit.bit_vector)); -+ break; -+ case TOI_SYSFS_DATA_INTEGER: -+ len = sprintf(page, "%d\n", -+ *(sysfs_data->data.integer.variable)); -+ break; -+ case TOI_SYSFS_DATA_LONG: -+ len = sprintf(page, "%ld\n", -+ *(sysfs_data->data.a_long.variable)); -+ break; -+ case TOI_SYSFS_DATA_UL: -+ len = sprintf(page, "%lu\n", -+ *(sysfs_data->data.ul.variable)); -+ break; -+ case TOI_SYSFS_DATA_STRING: -+ len = sprintf(page, "%s\n", -+ sysfs_data->data.string.variable); -+ break; -+ } -+ -+ if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ) -+ toi_cleanup_usm(); -+ -+ if (full_prep) -+ toi_finish_anything(0); -+ -+ return len; -+} -+ -+#define BOUND(_variable, _type) do { \ -+ if (*_variable < sysfs_data->data._type.minimum) \ -+ *_variable = sysfs_data->data._type.minimum; \ -+ else if (*_variable > sysfs_data->data._type.maximum) \ -+ *_variable = sysfs_data->data._type.maximum; \ -+} while (0) -+ -+static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr, -+ const char *my_buf, size_t count) -+{ -+ int assigned_temp_buffer = 0, result = count; -+ struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr); -+ -+ if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME))) -+ return -EBUSY; -+ -+ ((char *) my_buf)[count] = 0; -+ -+ if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE) -+ toi_prepare_usm(); -+ -+ switch (sysfs_data->type) { -+ case TOI_SYSFS_DATA_CUSTOM: -+ if (sysfs_data->data.special.write_sysfs) -+ result = (sysfs_data->data.special.write_sysfs)(my_buf, -+ count); -+ break; -+ case TOI_SYSFS_DATA_BIT: -+ { -+ unsigned long value; -+ result = strict_strtoul(my_buf, 0, &value); -+ if (result) -+ break; -+ if (value) -+ set_bit(sysfs_data->data.bit.bit, -+ (sysfs_data->data.bit.bit_vector)); -+ else -+ clear_bit(sysfs_data->data.bit.bit, -+ (sysfs_data->data.bit.bit_vector)); -+ } -+ break; -+ case TOI_SYSFS_DATA_INTEGER: -+ { -+ long temp; -+ result = strict_strtol(my_buf, 0, &temp); -+ if (result) -+ break; -+ *(sysfs_data->data.integer.variable) = (int) temp; -+ BOUND(sysfs_data->data.integer.variable, integer); -+ break; -+ } -+ case TOI_SYSFS_DATA_LONG: -+ { -+ long *variable = -+ sysfs_data->data.a_long.variable; -+ result = strict_strtol(my_buf, 0, variable); -+ if (result) -+ break; -+ BOUND(variable, a_long); -+ break; -+ } -+ case TOI_SYSFS_DATA_UL: -+ { -+ unsigned long *variable = -+ sysfs_data->data.ul.variable; -+ result = strict_strtoul(my_buf, 0, variable); -+ if (result) -+ break; -+ BOUND(variable, ul); -+ break; -+ } -+ break; -+ case TOI_SYSFS_DATA_STRING: -+ { -+ int copy_len = count; -+ char *variable = -+ sysfs_data->data.string.variable; -+ -+ if (sysfs_data->data.string.max_length && -+ (copy_len > sysfs_data->data.string.max_length)) -+ copy_len = sysfs_data->data.string.max_length; -+ -+ if (!variable) { -+ variable = (char *) toi_get_zeroed_page(31, -+ TOI_ATOMIC_GFP); -+ sysfs_data->data.string.variable = variable; -+ assigned_temp_buffer = 1; -+ } -+ strncpy(variable, my_buf, copy_len); -+ if (copy_len && my_buf[copy_len - 1] == '\n') -+ variable[count - 1] = 0; -+ variable[count] = 0; -+ } -+ break; -+ } -+ -+ if (!result) -+ result = count; -+ -+ /* Side effect routine? */ -+ if (result == count && sysfs_data->write_side_effect) -+ sysfs_data->write_side_effect(); -+ -+ /* Free temporary buffers */ -+ if (assigned_temp_buffer) { -+ toi_free_page(31, -+ (unsigned long) sysfs_data->data.string.variable); -+ sysfs_data->data.string.variable = NULL; -+ } -+ -+ if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE) -+ toi_cleanup_usm(); -+ -+ toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME); -+ -+ return result; -+} -+ -+static struct sysfs_ops toi_sysfs_ops = { -+ .show = &toi_attr_show, -+ .store = &toi_attr_store, -+}; -+ -+static struct kobj_type toi_ktype = { -+ .sysfs_ops = &toi_sysfs_ops, -+}; -+ -+struct kobject *tuxonice_kobj; -+ -+/* Non-module sysfs entries. -+ * -+ * This array contains entries that are automatically registered at -+ * boot. Modules and the console code register their own entries separately. -+ */ -+ -+static struct toi_sysfs_data sysfs_params[] = { -+ SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL, -+ SYSFS_HIBERNATING, toi_main_wrapper), -+ SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL, -+ SYSFS_RESUMING, toi_try_resume) -+}; -+ -+void remove_toi_sysdir(struct kobject *kobj) -+{ -+ if (!kobj) -+ return; -+ -+ kobject_put(kobj); -+} -+ -+struct kobject *make_toi_sysdir(char *name) -+{ -+ struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj); -+ -+ if (!kobj) { -+ printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs " -+ "dir!\n"); -+ return NULL; -+ } -+ -+ kobj->ktype = &toi_ktype; -+ -+ return kobj; -+} -+ -+/* toi_register_sysfs_file -+ * -+ * Helper for registering a new /sysfs/tuxonice entry. -+ */ -+ -+int toi_register_sysfs_file( -+ struct kobject *kobj, -+ struct toi_sysfs_data *toi_sysfs_data) -+{ -+ int result; -+ -+ if (!toi_sysfs_initialised) -+ toi_initialise_sysfs(); -+ -+ result = sysfs_create_file(kobj, &toi_sysfs_data->attr); -+ if (result) -+ printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s " -+ "returned %d.\n", -+ toi_sysfs_data->attr.name, result); -+ kobj->ktype = &toi_ktype; -+ -+ return result; -+} -+EXPORT_SYMBOL_GPL(toi_register_sysfs_file); -+ -+/* toi_unregister_sysfs_file -+ * -+ * Helper for removing unwanted /sys/power/tuxonice entries. -+ * -+ */ -+void toi_unregister_sysfs_file(struct kobject *kobj, -+ struct toi_sysfs_data *toi_sysfs_data) -+{ -+ sysfs_remove_file(kobj, &toi_sysfs_data->attr); -+} -+EXPORT_SYMBOL_GPL(toi_unregister_sysfs_file); -+ -+void toi_cleanup_sysfs(void) -+{ -+ int i, -+ numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); -+ -+ if (!toi_sysfs_initialised) -+ return; -+ -+ for (i = 0; i < numfiles; i++) -+ toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]); -+ -+ kobject_put(tuxonice_kobj); -+ toi_sysfs_initialised = 0; -+} -+ -+/* toi_initialise_sysfs -+ * -+ * Initialise the /sysfs/tuxonice directory. -+ */ -+ -+static void toi_initialise_sysfs(void) -+{ -+ int i; -+ int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data); -+ -+ if (toi_sysfs_initialised) -+ return; -+ -+ /* Make our TuxOnIce directory a child of /sys/power */ -+ tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj); -+ if (!tuxonice_kobj) -+ return; -+ -+ toi_sysfs_initialised = 1; -+ -+ for (i = 0; i < numfiles; i++) -+ toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]); -+} -+ -+int toi_sysfs_init(void) -+{ -+ toi_initialise_sysfs(); -+ return 0; -+} -+ -+void toi_sysfs_exit(void) -+{ -+ toi_cleanup_sysfs(); -+} -diff --git a/kernel/power/tuxonice_sysfs.h b/kernel/power/tuxonice_sysfs.h -new file mode 100644 -index 0000000..7ac9e64 ---- /dev/null -+++ b/kernel/power/tuxonice_sysfs.h -@@ -0,0 +1,137 @@ -+/* -+ * kernel/power/tuxonice_sysfs.h -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ */ -+ -+#include -+ -+struct toi_sysfs_data { -+ struct attribute attr; -+ int type; -+ int flags; -+ union { -+ struct { -+ unsigned long *bit_vector; -+ int bit; -+ } bit; -+ struct { -+ int *variable; -+ int minimum; -+ int maximum; -+ } integer; -+ struct { -+ long *variable; -+ long minimum; -+ long maximum; -+ } a_long; -+ struct { -+ unsigned long *variable; -+ unsigned long minimum; -+ unsigned long maximum; -+ } ul; -+ struct { -+ char *variable; -+ int max_length; -+ } string; -+ struct { -+ int (*read_sysfs) (const char *buffer, int count); -+ int (*write_sysfs) (const char *buffer, int count); -+ void *data; -+ } special; -+ } data; -+ -+ /* Side effects routine. Used, eg, for reparsing the -+ * resume= entry when it changes */ -+ void (*write_side_effect) (void); -+ struct list_head sysfs_data_list; -+}; -+ -+enum { -+ TOI_SYSFS_DATA_NONE = 1, -+ TOI_SYSFS_DATA_CUSTOM, -+ TOI_SYSFS_DATA_BIT, -+ TOI_SYSFS_DATA_INTEGER, -+ TOI_SYSFS_DATA_UL, -+ TOI_SYSFS_DATA_LONG, -+ TOI_SYSFS_DATA_STRING -+}; -+ -+#define SYSFS_WRITEONLY 0200 -+#define SYSFS_READONLY 0444 -+#define SYSFS_RW 0644 -+ -+#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \ -+ .attr = {.name = _name , .mode = _mode }, \ -+ .type = TOI_SYSFS_DATA_BIT, \ -+ .flags = _flags, \ -+ .data = { .bit = { .bit_vector = _ul, .bit = _bit } } } -+ -+#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \ -+ .attr = {.name = _name , .mode = _mode }, \ -+ .type = TOI_SYSFS_DATA_INTEGER, \ -+ .flags = _flags, \ -+ .data = { .integer = { .variable = _int, .minimum = _min, \ -+ .maximum = _max } }, \ -+ .write_side_effect = _wse } -+ -+#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \ -+ .attr = {.name = _name , .mode = _mode }, \ -+ .type = TOI_SYSFS_DATA_UL, \ -+ .flags = _flags, \ -+ .data = { .ul = { .variable = _ul, .minimum = _min, \ -+ .maximum = _max } } } -+ -+#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \ -+ .attr = {.name = _name , .mode = _mode }, \ -+ .type = TOI_SYSFS_DATA_LONG, \ -+ .flags = _flags, \ -+ .data = { .a_long = { .variable = _long, .minimum = _min, \ -+ .maximum = _max } } } -+ -+#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \ -+ .attr = {.name = _name , .mode = _mode }, \ -+ .type = TOI_SYSFS_DATA_STRING, \ -+ .flags = _flags, \ -+ .data = { .string = { .variable = _string, .max_length = _max_len } }, \ -+ .write_side_effect = _wse } -+ -+#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \ -+ .attr = {.name = _name , .mode = _mode }, \ -+ .type = TOI_SYSFS_DATA_CUSTOM, \ -+ .flags = _flags, \ -+ .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \ -+ .write_side_effect = _wse } -+ -+#define SYSFS_NONE(_name, _wse) { \ -+ .attr = {.name = _name , .mode = SYSFS_WRITEONLY }, \ -+ .type = TOI_SYSFS_DATA_NONE, \ -+ .write_side_effect = _wse, \ -+} -+ -+/* Flags */ -+#define SYSFS_NEEDS_SM_FOR_READ 1 -+#define SYSFS_NEEDS_SM_FOR_WRITE 2 -+#define SYSFS_HIBERNATE 4 -+#define SYSFS_RESUME 8 -+#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME) -+#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE) -+#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE) -+#define SYSFS_NEEDS_SM_FOR_BOTH \ -+ (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE) -+ -+int toi_register_sysfs_file(struct kobject *kobj, -+ struct toi_sysfs_data *toi_sysfs_data); -+void toi_unregister_sysfs_file(struct kobject *kobj, -+ struct toi_sysfs_data *toi_sysfs_data); -+ -+extern struct kobject *tuxonice_kobj; -+ -+struct kobject *make_toi_sysdir(char *name); -+void remove_toi_sysdir(struct kobject *obj); -+extern void toi_cleanup_sysfs(void); -+ -+extern int toi_sysfs_init(void); -+extern void toi_sysfs_exit(void); -diff --git a/kernel/power/tuxonice_ui.c b/kernel/power/tuxonice_ui.c -new file mode 100644 -index 0000000..ae962a7 ---- /dev/null -+++ b/kernel/power/tuxonice_ui.c -@@ -0,0 +1,250 @@ -+/* -+ * kernel/power/tuxonice_ui.c -+ * -+ * Copyright (C) 1998-2001 Gabor Kuti -+ * Copyright (C) 1998,2001,2002 Pavel Machek -+ * Copyright (C) 2002-2003 Florent Chabaud -+ * Copyright (C) 2002-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Routines for TuxOnIce's user interface. -+ * -+ * The user interface code talks to a userspace program via a -+ * netlink socket. -+ * -+ * The kernel side: -+ * - starts the userui program; -+ * - sends text messages and progress bar status; -+ * -+ * The user space side: -+ * - passes messages regarding user requests (abort, toggle reboot etc) -+ * -+ */ -+ -+#define __KERNEL_SYSCALLS__ -+ -+#include -+ -+#include "tuxonice_sysfs.h" -+#include "tuxonice_modules.h" -+#include "tuxonice.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_netlink.h" -+#include "tuxonice_power_off.h" -+#include "tuxonice_builtin.h" -+ -+static char local_printf_buf[1024]; /* Same as printk - should be safe */ -+struct ui_ops *toi_current_ui; -+EXPORT_SYMBOL_GPL(toi_current_ui); -+ -+/** -+ * toi_wait_for_keypress - Wait for keypress via userui or /dev/console. -+ * -+ * @timeout: Maximum time to wait. -+ * -+ * Wait for a keypress, either from userui or /dev/console if userui isn't -+ * available. The non-userui path is particularly for at boot-time, prior -+ * to userui being started, when we have an important warning to give to -+ * the user. -+ */ -+static char toi_wait_for_keypress(int timeout) -+{ -+ if (toi_current_ui && toi_current_ui->wait_for_key(timeout)) -+ return ' '; -+ -+ return toi_wait_for_keypress_dev_console(timeout); -+} -+ -+/* toi_early_boot_message() -+ * Description: Handle errors early in the process of booting. -+ * The user may press C to continue booting, perhaps -+ * invalidating the image, or space to reboot. -+ * This works from either the serial console or normally -+ * attached keyboard. -+ * -+ * Note that we come in here from init, while the kernel is -+ * locked. If we want to get events from the serial console, -+ * we need to temporarily unlock the kernel. -+ * -+ * toi_early_boot_message may also be called post-boot. -+ * In this case, it simply printks the message and returns. -+ * -+ * Arguments: int Whether we are able to erase the image. -+ * int default_answer. What to do when we timeout. This -+ * will normally be continue, but the user might -+ * provide command line options (__setup) to override -+ * particular cases. -+ * Char *. Pointer to a string explaining why we're moaning. -+ */ -+ -+#define say(message, a...) printk(KERN_EMERG message, ##a) -+ -+void toi_early_boot_message(int message_detail, int default_answer, -+ char *warning_reason, ...) -+{ -+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE) -+ unsigned long orig_state = get_toi_state(), continue_req = 0; -+ unsigned long orig_loglevel = console_loglevel; -+ int can_ask = 1; -+#else -+ int can_ask = 0; -+#endif -+ -+ va_list args; -+ int printed_len; -+ -+ if (!toi_wait) { -+ set_toi_state(TOI_CONTINUE_REQ); -+ can_ask = 0; -+ } -+ -+ if (warning_reason) { -+ va_start(args, warning_reason); -+ printed_len = vsnprintf(local_printf_buf, -+ sizeof(local_printf_buf), -+ warning_reason, -+ args); -+ va_end(args); -+ } -+ -+ if (!test_toi_state(TOI_BOOT_TIME)) { -+ printk("TuxOnIce: %s\n", local_printf_buf); -+ return; -+ } -+ -+ if (!can_ask) { -+ continue_req = !!default_answer; -+ goto post_ask; -+ } -+ -+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE) -+ console_loglevel = 7; -+ -+ say("=== TuxOnIce ===\n\n"); -+ if (warning_reason) { -+ say("BIG FAT WARNING!! %s\n\n", local_printf_buf); -+ switch (message_detail) { -+ case 0: -+ say("If you continue booting, note that any image WILL" -+ "NOT BE REMOVED.\nTuxOnIce is unable to do so " -+ "because the appropriate modules aren't\n" -+ "loaded. You should manually remove the image " -+ "to avoid any\npossibility of corrupting your " -+ "filesystem(s) later.\n"); -+ break; -+ case 1: -+ say("If you want to use the current TuxOnIce image, " -+ "reboot and try\nagain with the same kernel " -+ "that you hibernated from. If you want\n" -+ "to forget that image, continue and the image " -+ "will be erased.\n"); -+ break; -+ } -+ say("Press SPACE to reboot or C to continue booting with " -+ "this kernel\n\n"); -+ if (toi_wait > 0) -+ say("Default action if you don't select one in %d " -+ "seconds is: %s.\n", -+ toi_wait, -+ default_answer == TOI_CONTINUE_REQ ? -+ "continue booting" : "reboot"); -+ } else { -+ say("BIG FAT WARNING!!\n\n" -+ "You have tried to resume from this image before.\n" -+ "If it failed once, it may well fail again.\n" -+ "Would you like to remove the image and boot " -+ "normally?\nThis will be equivalent to entering " -+ "noresume on the\nkernel command line.\n\n" -+ "Press SPACE to remove the image or C to continue " -+ "resuming.\n\n"); -+ if (toi_wait > 0) -+ say("Default action if you don't select one in %d " -+ "seconds is: %s.\n", toi_wait, -+ !!default_answer ? -+ "continue resuming" : "remove the image"); -+ } -+ console_loglevel = orig_loglevel; -+ -+ set_toi_state(TOI_SANITY_CHECK_PROMPT); -+ clear_toi_state(TOI_CONTINUE_REQ); -+ -+ if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */ -+ continue_req = !!default_answer; -+ else -+ continue_req = test_toi_state(TOI_CONTINUE_REQ); -+ -+#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */ -+ -+post_ask: -+ if ((warning_reason) && (!continue_req)) -+ kernel_restart(NULL); -+ -+ restore_toi_state(orig_state); -+ if (continue_req) -+ set_toi_state(TOI_CONTINUE_REQ); -+} -+EXPORT_SYMBOL_GPL(toi_early_boot_message); -+#undef say -+ -+/* -+ * User interface specific /sys/power/tuxonice entries. -+ */ -+ -+static struct toi_sysfs_data sysfs_params[] = { -+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS) -+ SYSFS_INT("default_console_level", SYSFS_RW, -+ &toi_bkd.toi_default_console_level, 0, 7, 0, NULL), -+ SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0, -+ 1 << 30, 0), -+ SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL, -+ 0) -+#endif -+}; -+ -+static struct toi_module_ops userui_ops = { -+ .type = MISC_HIDDEN_MODULE, -+ .name = "printk ui", -+ .directory = "user_interface", -+ .module = THIS_MODULE, -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+int toi_register_ui_ops(struct ui_ops *this_ui) -+{ -+ if (toi_current_ui) { -+ printk(KERN_INFO "Only one TuxOnIce user interface module can " -+ "be loaded at a time."); -+ return -EBUSY; -+ } -+ -+ toi_current_ui = this_ui; -+ -+ return 0; -+} -+EXPORT_SYMBOL_GPL(toi_register_ui_ops); -+ -+void toi_remove_ui_ops(struct ui_ops *this_ui) -+{ -+ if (toi_current_ui != this_ui) -+ return; -+ -+ toi_current_ui = NULL; -+} -+EXPORT_SYMBOL_GPL(toi_remove_ui_ops); -+ -+/* toi_console_sysfs_init -+ * Description: Boot time initialisation for user interface. -+ */ -+ -+int toi_ui_init(void) -+{ -+ return toi_register_module(&userui_ops); -+} -+ -+void toi_ui_exit(void) -+{ -+ toi_unregister_module(&userui_ops); -+} -diff --git a/kernel/power/tuxonice_ui.h b/kernel/power/tuxonice_ui.h -new file mode 100644 -index 0000000..64ffc67 ---- /dev/null -+++ b/kernel/power/tuxonice_ui.h -@@ -0,0 +1,97 @@ -+/* -+ * kernel/power/tuxonice_ui.h -+ * -+ * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net) -+ */ -+ -+enum { -+ DONT_CLEAR_BAR, -+ CLEAR_BAR -+}; -+ -+enum { -+ /* Userspace -> Kernel */ -+ USERUI_MSG_ABORT = 0x11, -+ USERUI_MSG_SET_STATE = 0x12, -+ USERUI_MSG_GET_STATE = 0x13, -+ USERUI_MSG_GET_DEBUG_STATE = 0x14, -+ USERUI_MSG_SET_DEBUG_STATE = 0x15, -+ USERUI_MSG_SPACE = 0x18, -+ USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A, -+ USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B, -+ USERUI_MSG_GET_LOGLEVEL = 0x1C, -+ USERUI_MSG_SET_LOGLEVEL = 0x1D, -+ USERUI_MSG_PRINTK = 0x1E, -+ -+ /* Kernel -> Userspace */ -+ USERUI_MSG_MESSAGE = 0x21, -+ USERUI_MSG_PROGRESS = 0x22, -+ USERUI_MSG_POST_ATOMIC_RESTORE = 0x25, -+ -+ USERUI_MSG_MAX, -+}; -+ -+struct userui_msg_params { -+ u32 a, b, c, d; -+ char text[255]; -+}; -+ -+struct ui_ops { -+ char (*wait_for_key) (int timeout); -+ u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...); -+ void (*prepare_status) (int clearbar, const char *fmt, ...); -+ void (*cond_pause) (int pause, char *message); -+ void (*abort)(int result_code, const char *fmt, ...); -+ void (*prepare)(void); -+ void (*cleanup)(void); -+ void (*message)(u32 section, u32 level, u32 normally_logged, -+ const char *fmt, ...); -+}; -+ -+extern struct ui_ops *toi_current_ui; -+ -+#define toi_update_status(val, max, fmt, args...) \ -+ (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \ -+ max) -+ -+#define toi_prepare_console(void) \ -+ do { if (toi_current_ui) \ -+ (toi_current_ui->prepare)(); \ -+ } while (0) -+ -+#define toi_cleanup_console(void) \ -+ do { if (toi_current_ui) \ -+ (toi_current_ui->cleanup)(); \ -+ } while (0) -+ -+#define abort_hibernate(result, fmt, args...) \ -+ do { if (toi_current_ui) \ -+ (toi_current_ui->abort)(result, fmt, ##args); \ -+ else { \ -+ set_abort_result(result); \ -+ } \ -+ } while (0) -+ -+#define toi_cond_pause(pause, message) \ -+ do { if (toi_current_ui) \ -+ (toi_current_ui->cond_pause)(pause, message); \ -+ } while (0) -+ -+#define toi_prepare_status(clear, fmt, args...) \ -+ do { if (toi_current_ui) \ -+ (toi_current_ui->prepare_status)(clear, fmt, ##args); \ -+ else \ -+ printk(KERN_INFO fmt "%s", ##args, "\n"); \ -+ } while (0) -+ -+#define toi_message(sn, lev, log, fmt, a...) \ -+do { \ -+ if (toi_current_ui && (!sn || test_debug_state(sn))) \ -+ toi_current_ui->message(sn, lev, log, fmt, ##a); \ -+} while (0) -+ -+__exit void toi_ui_cleanup(void); -+extern int toi_ui_init(void); -+extern void toi_ui_exit(void); -+extern int toi_register_ui_ops(struct ui_ops *this_ui); -+extern void toi_remove_ui_ops(struct ui_ops *this_ui); -diff --git a/kernel/power/tuxonice_userui.c b/kernel/power/tuxonice_userui.c -new file mode 100644 -index 0000000..8aaa747 ---- /dev/null -+++ b/kernel/power/tuxonice_userui.c -@@ -0,0 +1,675 @@ -+/* -+ * kernel/power/user_ui.c -+ * -+ * Copyright (C) 2005-2007 Bernard Blackham -+ * Copyright (C) 2002-2014 Nigel Cunningham (nigel at tuxonice net) -+ * -+ * This file is released under the GPLv2. -+ * -+ * Routines for TuxOnIce's user interface. -+ * -+ * The user interface code talks to a userspace program via a -+ * netlink socket. -+ * -+ * The kernel side: -+ * - starts the userui program; -+ * - sends text messages and progress bar status; -+ * -+ * The user space side: -+ * - passes messages regarding user requests (abort, toggle reboot etc) -+ * -+ */ -+ -+#define __KERNEL_SYSCALLS__ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "tuxonice_sysfs.h" -+#include "tuxonice_modules.h" -+#include "tuxonice.h" -+#include "tuxonice_ui.h" -+#include "tuxonice_netlink.h" -+#include "tuxonice_power_off.h" -+ -+static char local_printf_buf[1024]; /* Same as printk - should be safe */ -+ -+static struct user_helper_data ui_helper_data; -+static struct toi_module_ops userui_ops; -+static int orig_kmsg; -+ -+static char lastheader[512]; -+static int lastheader_message_len; -+static int ui_helper_changed; /* Used at resume-time so don't overwrite value -+ set from initrd/ramfs. */ -+ -+/* Number of distinct progress amounts that userspace can display */ -+static int progress_granularity = 30; -+ -+static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key); -+static int userui_wait_should_wake; -+ -+#define toi_stop_waiting_for_userui_key() \ -+{ \ -+ userui_wait_should_wake = true; \ -+ wake_up_interruptible(&userui_wait_for_key); \ -+} -+ -+/** -+ * ui_nl_set_state - Update toi_action based on a message from userui. -+ * -+ * @n: The bit (1 << bit) to set. -+ */ -+static void ui_nl_set_state(int n) -+{ -+ /* Only let them change certain settings */ -+ static const u32 toi_action_mask = -+ (1 << TOI_REBOOT) | (1 << TOI_PAUSE) | -+ (1 << TOI_LOGALL) | -+ (1 << TOI_SINGLESTEP) | -+ (1 << TOI_PAUSE_NEAR_PAGESET_END); -+ static unsigned long new_action; -+ -+ new_action = (toi_bkd.toi_action & (~toi_action_mask)) | -+ (n & toi_action_mask); -+ -+ printk(KERN_DEBUG "n is %x. Action flags being changed from %lx " -+ "to %lx.", n, toi_bkd.toi_action, new_action); -+ toi_bkd.toi_action = new_action; -+ -+ if (!test_action_state(TOI_PAUSE) && -+ !test_action_state(TOI_SINGLESTEP)) -+ toi_stop_waiting_for_userui_key(); -+} -+ -+/** -+ * userui_post_atomic_restore - Tell userui that atomic restore just happened. -+ * -+ * Tell userui that atomic restore just occured, so that it can do things like -+ * redrawing the screen, re-getting settings and so on. -+ */ -+static void userui_post_atomic_restore(struct toi_boot_kernel_data *bkd) -+{ -+ toi_send_netlink_message(&ui_helper_data, -+ USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0); -+} -+ -+/** -+ * userui_storage_needed - Report how much memory in image header is needed. -+ */ -+static int userui_storage_needed(void) -+{ -+ return sizeof(ui_helper_data.program) + 1 + sizeof(int); -+} -+ -+/** -+ * userui_save_config_info - Fill buffer with config info for image header. -+ * -+ * @buf: Buffer into which to put the config info we want to save. -+ */ -+static int userui_save_config_info(char *buf) -+{ -+ *((int *) buf) = progress_granularity; -+ memcpy(buf + sizeof(int), ui_helper_data.program, -+ sizeof(ui_helper_data.program)); -+ return sizeof(ui_helper_data.program) + sizeof(int) + 1; -+} -+ -+/** -+ * userui_load_config_info - Restore config info from buffer. -+ * -+ * @buf: Buffer containing header info loaded. -+ * @size: Size of data loaded for this module. -+ */ -+static void userui_load_config_info(char *buf, int size) -+{ -+ progress_granularity = *((int *) buf); -+ size -= sizeof(int); -+ -+ /* Don't load the saved path if one has already been set */ -+ if (ui_helper_changed) -+ return; -+ -+ if (size > sizeof(ui_helper_data.program)) -+ size = sizeof(ui_helper_data.program); -+ -+ memcpy(ui_helper_data.program, buf + sizeof(int), size); -+ ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0'; -+} -+ -+/** -+ * set_ui_program_set: Record that userui program was changed. -+ * -+ * Side effect routine for when the userui program is set. In an initrd or -+ * ramfs, the user may set a location for the userui program. If this happens, -+ * we don't want to reload the value that was saved in the image header. This -+ * routine allows us to flag that we shouldn't restore the program name from -+ * the image header. -+ */ -+static void set_ui_program_set(void) -+{ -+ ui_helper_changed = 1; -+} -+ -+/** -+ * userui_memory_needed - Tell core how much memory to reserve for us. -+ */ -+static int userui_memory_needed(void) -+{ -+ /* ball park figure of 128 pages */ -+ return 128 * PAGE_SIZE; -+} -+ -+/** -+ * userui_update_status - Update the progress bar and (if on) in-bar message. -+ * -+ * @value: Current progress percentage numerator. -+ * @maximum: Current progress percentage denominator. -+ * @fmt: Message to be displayed in the middle of the progress bar. -+ * -+ * Note that a NULL message does not mean that any previous message is erased! -+ * For that, you need toi_prepare_status with clearbar on. -+ * -+ * Returns an unsigned long, being the next numerator (as determined by the -+ * maximum and progress granularity) where status needs to be updated. -+ * This is to reduce unnecessary calls to update_status. -+ */ -+static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...) -+{ -+ static u32 last_step = 9999; -+ struct userui_msg_params msg; -+ u32 this_step, next_update; -+ int bitshift; -+ -+ if (ui_helper_data.pid == -1) -+ return 0; -+ -+ if ((!maximum) || (!progress_granularity)) -+ return maximum; -+ -+ if (value < 0) -+ value = 0; -+ -+ if (value > maximum) -+ value = maximum; -+ -+ /* Try to avoid math problems - we can't do 64 bit math here -+ * (and shouldn't need it - anyone got screen resolution -+ * of 65536 pixels or more?) */ -+ bitshift = fls(maximum) - 16; -+ if (bitshift > 0) { -+ u32 temp_maximum = maximum >> bitshift; -+ u32 temp_value = value >> bitshift; -+ this_step = (u32) -+ (temp_value * progress_granularity / temp_maximum); -+ next_update = (((this_step + 1) * temp_maximum / -+ progress_granularity) + 1) << bitshift; -+ } else { -+ this_step = (u32) (value * progress_granularity / maximum); -+ next_update = ((this_step + 1) * maximum / -+ progress_granularity) + 1; -+ } -+ -+ if (this_step == last_step) -+ return next_update; -+ -+ memset(&msg, 0, sizeof(msg)); -+ -+ msg.a = this_step; -+ msg.b = progress_granularity; -+ -+ if (fmt) { -+ va_list args; -+ va_start(args, fmt); -+ vsnprintf(msg.text, sizeof(msg.text), fmt, args); -+ va_end(args); -+ msg.text[sizeof(msg.text)-1] = '\0'; -+ } -+ -+ toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS, -+ &msg, sizeof(msg)); -+ last_step = this_step; -+ -+ return next_update; -+} -+ -+/** -+ * userui_message - Display a message without necessarily logging it. -+ * -+ * @section: Type of message. Messages can be filtered by type. -+ * @level: Degree of importance of the message. Lower values = higher priority. -+ * @normally_logged: Whether logged even if log_everything is off. -+ * @fmt: Message (and parameters). -+ * -+ * This function is intended to do the same job as printk, but without normally -+ * logging what is printed. The point is to be able to get debugging info on -+ * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M" -+ * -+ * It may be called from an interrupt context - can't sleep! -+ */ -+static void userui_message(u32 section, u32 level, u32 normally_logged, -+ const char *fmt, ...) -+{ -+ struct userui_msg_params msg; -+ -+ if ((level) && (level > console_loglevel)) -+ return; -+ -+ memset(&msg, 0, sizeof(msg)); -+ -+ msg.a = section; -+ msg.b = level; -+ msg.c = normally_logged; -+ -+ if (fmt) { -+ va_list args; -+ va_start(args, fmt); -+ vsnprintf(msg.text, sizeof(msg.text), fmt, args); -+ va_end(args); -+ msg.text[sizeof(msg.text)-1] = '\0'; -+ } -+ -+ if (test_action_state(TOI_LOGALL)) -+ printk(KERN_INFO "%s\n", msg.text); -+ -+ toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE, -+ &msg, sizeof(msg)); -+} -+ -+/** -+ * wait_for_key_via_userui - Wait for userui to receive a keypress. -+ */ -+static void wait_for_key_via_userui(void) -+{ -+ DECLARE_WAITQUEUE(wait, current); -+ -+ add_wait_queue(&userui_wait_for_key, &wait); -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ wait_event_interruptible(userui_wait_for_key, userui_wait_should_wake); -+ userui_wait_should_wake = false; -+ -+ set_current_state(TASK_RUNNING); -+ remove_wait_queue(&userui_wait_for_key, &wait); -+} -+ -+/** -+ * userui_prepare_status - Display high level messages. -+ * -+ * @clearbar: Whether to clear the progress bar. -+ * @fmt...: New message for the title. -+ * -+ * Prepare the 'nice display', drawing the header and version, along with the -+ * current action and perhaps also resetting the progress bar. -+ */ -+static void userui_prepare_status(int clearbar, const char *fmt, ...) -+{ -+ va_list args; -+ -+ if (fmt) { -+ va_start(args, fmt); -+ lastheader_message_len = vsnprintf(lastheader, 512, fmt, args); -+ va_end(args); -+ } -+ -+ if (clearbar) -+ toi_update_status(0, 1, NULL); -+ -+ if (ui_helper_data.pid == -1) -+ printk(KERN_EMERG "%s\n", lastheader); -+ else -+ toi_message(0, TOI_STATUS, 1, lastheader, NULL); -+} -+ -+/** -+ * toi_wait_for_keypress - Wait for keypress via userui. -+ * -+ * @timeout: Maximum time to wait. -+ * -+ * Wait for a keypress from userui. -+ * -+ * FIXME: Implement timeout? -+ */ -+static char userui_wait_for_keypress(int timeout) -+{ -+ char key = '\0'; -+ -+ if (ui_helper_data.pid != -1) { -+ wait_for_key_via_userui(); -+ key = ' '; -+ } -+ -+ return key; -+} -+ -+/** -+ * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it. -+ * -+ * @result_code: Reason why we're aborting (1 << bit). -+ * @fmt: Message to display if telling the user what's going on. -+ * -+ * Abort a cycle. If this wasn't at the user's request (and we're displaying -+ * output), tell the user why and wait for them to acknowledge the message. -+ */ -+static void userui_abort_hibernate(int result_code, const char *fmt, ...) -+{ -+ va_list args; -+ int printed_len = 0; -+ -+ set_result_state(result_code); -+ -+ if (test_result_state(TOI_ABORTED)) -+ return; -+ -+ set_result_state(TOI_ABORTED); -+ -+ if (test_result_state(TOI_ABORT_REQUESTED)) -+ return; -+ -+ va_start(args, fmt); -+ printed_len = vsnprintf(local_printf_buf, sizeof(local_printf_buf), -+ fmt, args); -+ va_end(args); -+ if (ui_helper_data.pid != -1) -+ printed_len = sprintf(local_printf_buf + printed_len, -+ " (Press SPACE to continue)"); -+ -+ toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf); -+ -+ if (ui_helper_data.pid != -1) -+ userui_wait_for_keypress(0); -+} -+ -+/** -+ * request_abort_hibernate - Abort hibernating or resuming at user request. -+ * -+ * Handle the user requesting the cancellation of a hibernation or resume by -+ * pressing escape. -+ */ -+static void request_abort_hibernate(void) -+{ -+ if (test_result_state(TOI_ABORT_REQUESTED) || -+ !test_action_state(TOI_CAN_CANCEL)) -+ return; -+ -+ if (test_toi_state(TOI_NOW_RESUMING)) { -+ toi_prepare_status(CLEAR_BAR, "Escape pressed. " -+ "Powering down again."); -+ set_toi_state(TOI_STOP_RESUME); -+ while (!test_toi_state(TOI_IO_STOPPED)) -+ schedule(); -+ if (toiActiveAllocator->mark_resume_attempted) -+ toiActiveAllocator->mark_resume_attempted(0); -+ toi_power_down(); -+ } -+ -+ toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :" -+ " ABORTING HIBERNATION ---"); -+ set_abort_result(TOI_ABORT_REQUESTED); -+ toi_stop_waiting_for_userui_key(); -+} -+ -+/** -+ * userui_user_rcv_msg - Receive a netlink message from userui. -+ * -+ * @skb: skb received. -+ * @nlh: Netlink header received. -+ */ -+static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) -+{ -+ int type; -+ int *data; -+ -+ type = nlh->nlmsg_type; -+ -+ /* A control message: ignore them */ -+ if (type < NETLINK_MSG_BASE) -+ return 0; -+ -+ /* Unknown message: reply with EINVAL */ -+ if (type >= USERUI_MSG_MAX) -+ return -EINVAL; -+ -+ /* All operations require privileges, even GET */ -+ if (!capable(CAP_NET_ADMIN)) -+ return -EPERM; -+ -+ /* Only allow one task to receive NOFREEZE privileges */ -+ if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) { -+ printk(KERN_INFO "Got NOFREEZE_ME request when " -+ "ui_helper_data.pid is %d.\n", ui_helper_data.pid); -+ return -EBUSY; -+ } -+ -+ data = (int *) NLMSG_DATA(nlh); -+ -+ switch (type) { -+ case USERUI_MSG_ABORT: -+ request_abort_hibernate(); -+ return 0; -+ case USERUI_MSG_GET_STATE: -+ toi_send_netlink_message(&ui_helper_data, -+ USERUI_MSG_GET_STATE, &toi_bkd.toi_action, -+ sizeof(toi_bkd.toi_action)); -+ return 0; -+ case USERUI_MSG_GET_DEBUG_STATE: -+ toi_send_netlink_message(&ui_helper_data, -+ USERUI_MSG_GET_DEBUG_STATE, -+ &toi_bkd.toi_debug_state, -+ sizeof(toi_bkd.toi_debug_state)); -+ return 0; -+ case USERUI_MSG_SET_STATE: -+ if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) -+ return -EINVAL; -+ ui_nl_set_state(*data); -+ return 0; -+ case USERUI_MSG_SET_DEBUG_STATE: -+ if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) -+ return -EINVAL; -+ toi_bkd.toi_debug_state = (*data); -+ return 0; -+ case USERUI_MSG_SPACE: -+ toi_stop_waiting_for_userui_key(); -+ return 0; -+ case USERUI_MSG_GET_POWERDOWN_METHOD: -+ toi_send_netlink_message(&ui_helper_data, -+ USERUI_MSG_GET_POWERDOWN_METHOD, -+ &toi_poweroff_method, -+ sizeof(toi_poweroff_method)); -+ return 0; -+ case USERUI_MSG_SET_POWERDOWN_METHOD: -+ if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(char))) -+ return -EINVAL; -+ toi_poweroff_method = (unsigned long)(*data); -+ return 0; -+ case USERUI_MSG_GET_LOGLEVEL: -+ toi_send_netlink_message(&ui_helper_data, -+ USERUI_MSG_GET_LOGLEVEL, -+ &toi_bkd.toi_default_console_level, -+ sizeof(toi_bkd.toi_default_console_level)); -+ return 0; -+ case USERUI_MSG_SET_LOGLEVEL: -+ if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) -+ return -EINVAL; -+ toi_bkd.toi_default_console_level = (*data); -+ return 0; -+ case USERUI_MSG_PRINTK: -+ printk(KERN_INFO "%s", (char *) data); -+ return 0; -+ } -+ -+ /* Unhandled here */ -+ return 1; -+} -+ -+/** -+ * userui_cond_pause - Possibly pause at user request. -+ * -+ * @pause: Whether to pause or just display the message. -+ * @message: Message to display at the start of pausing. -+ * -+ * Potentially pause and wait for the user to tell us to continue. We normally -+ * only pause when @pause is set. While paused, the user can do things like -+ * changing the loglevel, toggling the display of debugging sections and such -+ * like. -+ */ -+static void userui_cond_pause(int pause, char *message) -+{ -+ int displayed_message = 0, last_key = 0; -+ -+ while (last_key != 32 && -+ ui_helper_data.pid != -1 && -+ ((test_action_state(TOI_PAUSE) && pause) || -+ (test_action_state(TOI_SINGLESTEP)))) { -+ if (!displayed_message) { -+ toi_prepare_status(DONT_CLEAR_BAR, -+ "%s Press SPACE to continue.%s", -+ message ? message : "", -+ (test_action_state(TOI_SINGLESTEP)) ? -+ " Single step on." : ""); -+ displayed_message = 1; -+ } -+ last_key = userui_wait_for_keypress(0); -+ } -+ schedule(); -+} -+ -+/** -+ * userui_prepare_console - Prepare the console for use. -+ * -+ * Prepare a console for use, saving current kmsg settings and attempting to -+ * start userui. Console loglevel changes are handled by userui. -+ */ -+static void userui_prepare_console(void) -+{ -+ orig_kmsg = vt_kmsg_redirect(fg_console + 1); -+ -+ ui_helper_data.pid = -1; -+ -+ if (!userui_ops.enabled) { -+ printk(KERN_INFO "TuxOnIce: Userui disabled.\n"); -+ return; -+ } -+ -+ if (*ui_helper_data.program) -+ toi_netlink_setup(&ui_helper_data); -+ else -+ printk(KERN_INFO "TuxOnIce: Userui program not configured.\n"); -+} -+ -+/** -+ * userui_cleanup_console - Cleanup after a cycle. -+ * -+ * Tell userui to cleanup, and restore kmsg_redirect to its original value. -+ */ -+ -+static void userui_cleanup_console(void) -+{ -+ if (ui_helper_data.pid > -1) -+ toi_netlink_close(&ui_helper_data); -+ -+ vt_kmsg_redirect(orig_kmsg); -+} -+ -+/* -+ * User interface specific /sys/power/tuxonice entries. -+ */ -+ -+static struct toi_sysfs_data sysfs_params[] = { -+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS) -+ SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_CAN_CANCEL, 0), -+ SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action, -+ TOI_PAUSE, 0), -+ SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL), -+ SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1, -+ 2048, 0, NULL), -+ SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0, -+ set_ui_program_set), -+ SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL) -+#endif -+}; -+ -+static struct toi_module_ops userui_ops = { -+ .type = MISC_MODULE, -+ .name = "userui", -+ .shared_directory = "user_interface", -+ .module = THIS_MODULE, -+ .storage_needed = userui_storage_needed, -+ .save_config_info = userui_save_config_info, -+ .load_config_info = userui_load_config_info, -+ .memory_needed = userui_memory_needed, -+ .post_atomic_restore = userui_post_atomic_restore, -+ .sysfs_data = sysfs_params, -+ .num_sysfs_entries = sizeof(sysfs_params) / -+ sizeof(struct toi_sysfs_data), -+}; -+ -+static struct ui_ops my_ui_ops = { -+ .update_status = userui_update_status, -+ .message = userui_message, -+ .prepare_status = userui_prepare_status, -+ .abort = userui_abort_hibernate, -+ .cond_pause = userui_cond_pause, -+ .prepare = userui_prepare_console, -+ .cleanup = userui_cleanup_console, -+ .wait_for_key = userui_wait_for_keypress, -+}; -+ -+/** -+ * toi_user_ui_init - Boot time initialisation for user interface. -+ * -+ * Invoked from the core init routine. -+ */ -+static __init int toi_user_ui_init(void) -+{ -+ int result; -+ -+ ui_helper_data.nl = NULL; -+ strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255); -+ ui_helper_data.pid = -1; -+ ui_helper_data.skb_size = sizeof(struct userui_msg_params); -+ ui_helper_data.pool_limit = 6; -+ ui_helper_data.netlink_id = NETLINK_TOI_USERUI; -+ ui_helper_data.name = "userspace ui"; -+ ui_helper_data.rcv_msg = userui_user_rcv_msg; -+ ui_helper_data.interface_version = 8; -+ ui_helper_data.must_init = 0; -+ ui_helper_data.not_ready = userui_cleanup_console; -+ init_completion(&ui_helper_data.wait_for_process); -+ result = toi_register_module(&userui_ops); -+ if (!result) -+ result = toi_register_ui_ops(&my_ui_ops); -+ if (result) -+ toi_unregister_module(&userui_ops); -+ -+ return result; -+} -+ -+#ifdef MODULE -+/** -+ * toi_user_ui_ext - Cleanup code for if the core is unloaded. -+ */ -+static __exit void toi_user_ui_exit(void) -+{ -+ toi_netlink_close_complete(&ui_helper_data); -+ toi_remove_ui_ops(&my_ui_ops); -+ toi_unregister_module(&userui_ops); -+} -+ -+module_init(toi_user_ui_init); -+module_exit(toi_user_ui_exit); -+MODULE_AUTHOR("Nigel Cunningham"); -+MODULE_DESCRIPTION("TuxOnIce Userui Support"); -+MODULE_LICENSE("GPL"); -+#else -+late_initcall(toi_user_ui_init); -+#endif -diff --git a/kernel/power/user.c b/kernel/power/user.c -index 98d3575..0c50ed1 100644 ---- a/kernel/power/user.c -+++ b/kernel/power/user.c -@@ -12,6 +12,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -43,6 +44,7 @@ static struct snapshot_data { - } snapshot_state; - - atomic_t snapshot_device_available = ATOMIC_INIT(1); -+EXPORT_SYMBOL_GPL(snapshot_device_available); - - static int snapshot_open(struct inode *inode, struct file *filp) - { -diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c -index 7228258..82d78f3 100644 ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -34,6 +34,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -67,6 +68,7 @@ int console_printk[4] = { - MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ - DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ - }; -+EXPORT_SYMBOL_GPL(console_printk); - - /* - * Low level drivers may need that to know if they can schedule in -@@ -1885,6 +1887,7 @@ void suspend_console(void) - up(&console_sem); - mutex_release(&console_lock_dep_map, 1, _RET_IP_); - } -+EXPORT_SYMBOL_GPL(suspend_console); - - void resume_console(void) - { -@@ -1895,6 +1898,7 @@ void resume_console(void) - console_suspended = 0; - console_unlock(); - } -+EXPORT_SYMBOL_GPL(resume_console); - - /** - * console_cpu_notify - print deferred console messages after CPU hotplug -diff --git a/mm/highmem.c b/mm/highmem.c -index b32b70c..db3d6ea 100644 ---- a/mm/highmem.c -+++ b/mm/highmem.c -@@ -66,6 +66,7 @@ unsigned int nr_free_highpages (void) - - return pages; - } -+EXPORT_SYMBOL_GPL(nr_free_highpages); - - static int pkmap_count[LAST_PKMAP]; - static unsigned int last_pkmap_nr; -diff --git a/mm/memory.c b/mm/memory.c -index 037b812..cf4157a 100644 ---- a/mm/memory.c -+++ b/mm/memory.c -@@ -1651,6 +1651,7 @@ no_page_table: - return ERR_PTR(-EFAULT); - return page; - } -+EXPORT_SYMBOL_GPL(follow_page_mask); - - static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) - { -diff --git a/mm/mmzone.c b/mm/mmzone.c -index bf34fb8..0990dd2 100644 ---- a/mm/mmzone.c -+++ b/mm/mmzone.c -@@ -8,11 +8,13 @@ - #include - #include - #include -+#include - - struct pglist_data *first_online_pgdat(void) - { - return NODE_DATA(first_online_node); - } -+EXPORT_SYMBOL_GPL(first_online_pgdat); - - struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) - { -@@ -22,6 +24,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) - return NULL; - return NODE_DATA(nid); - } -+EXPORT_SYMBOL_GPL(next_online_pgdat); - - /* - * next_zone - helper magic for for_each_zone() -@@ -41,6 +44,7 @@ struct zone *next_zone(struct zone *zone) - } - return zone; - } -+EXPORT_SYMBOL_GPL(next_zone); - - static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes) - { -diff --git a/mm/page-writeback.c b/mm/page-writeback.c -index a4317da..9effc35 100644 ---- a/mm/page-writeback.c -+++ b/mm/page-writeback.c -@@ -111,6 +111,7 @@ unsigned int dirty_expire_interval = 30 * 100; /* centiseconds */ - * Flag that makes the machine dump writes/reads and block dirtyings. - */ - int block_dump; -+EXPORT_SYMBOL_GPL(block_dump); - - /* - * Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies: -diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index 5dba293..cff38be 100644 ---- a/mm/page_alloc.c -+++ b/mm/page_alloc.c -@@ -141,6 +141,7 @@ void pm_restore_gfp_mask(void) - saved_gfp_mask = 0; - } - } -+EXPORT_SYMBOL_GPL(pm_restore_gfp_mask); - - void pm_restrict_gfp_mask(void) - { -@@ -149,6 +150,7 @@ void pm_restrict_gfp_mask(void) - saved_gfp_mask = gfp_allowed_mask; - gfp_allowed_mask &= ~GFP_IOFS; - } -+EXPORT_SYMBOL_GPL(pm_restrict_gfp_mask); - - bool pm_suspended_storage(void) - { -@@ -6514,6 +6516,12 @@ static const struct trace_print_flags pageflag_names[] = { - #ifdef CONFIG_TRANSPARENT_HUGEPAGE - {1UL << PG_compound_lock, "compound_lock" }, - #endif -+#ifdef CONFIG_TOI_INCREMENTAL -+ {1UL << PG_toi_ignore, "toi_ignore" }, -+ {1UL << PG_toi_ro, "toi_ro" }, -+ {1UL << PG_toi_cbw, "toi_cbw" }, -+ {1UL << PG_toi_dirty, "toi_dirty" }, -+#endif - }; - - static void dump_page_flags(unsigned long flags) -diff --git a/mm/shmem.c b/mm/shmem.c -index a2801ba..becaf27 100644 ---- a/mm/shmem.c -+++ b/mm/shmem.c -@@ -1295,7 +1295,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) - } - - static struct inode *shmem_get_inode(struct super_block *sb, const struct inode *dir, -- umode_t mode, dev_t dev, unsigned long flags) -+ umode_t mode, dev_t dev, unsigned long flags, int atomic_copy) - { - struct inode *inode; - struct shmem_inode_info *info; -@@ -1316,6 +1316,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode - memset(info, 0, (char *)inode - (char *)info); - spin_lock_init(&info->lock); - info->flags = flags & VM_NORESERVE; -+ if (atomic_copy) -+ inode->i_flags |= S_ATOMIC_COPY; - INIT_LIST_HEAD(&info->swaplist); - simple_xattrs_init(&info->xattrs); - cache_no_acl(inode); -@@ -1853,7 +1855,7 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) - struct inode *inode; - int error = -ENOSPC; - -- inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE); -+ inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE, 0); - if (inode) { - error = simple_acl_create(dir, inode); - if (error) -@@ -1882,7 +1884,7 @@ shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) - struct inode *inode; - int error = -ENOSPC; - -- inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE); -+ inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE, 0); - if (inode) { - error = security_inode_init_security(inode, dir, - NULL, -@@ -2011,7 +2013,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s - if (len > PAGE_CACHE_SIZE) - return -ENAMETOOLONG; - -- inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE); -+ inode = shmem_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE, 0); - if (!inode) - return -ENOSPC; - -@@ -2555,7 +2557,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) - sb->s_flags |= MS_POSIXACL; - #endif - -- inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); -+ inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE, 0); - if (!inode) - goto failed; - inode->i_uid = sbinfo->uid; -@@ -2816,7 +2818,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range); - - #define shmem_vm_ops generic_file_vm_ops - #define shmem_file_operations ramfs_file_operations --#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) -+#define shmem_get_inode(sb, dir, mode, dev, flags, atomic_copy) ramfs_get_inode(sb, dir, mode, dev) - #define shmem_acct_size(flags, size) 0 - #define shmem_unacct_size(flags, size) do {} while (0) - -@@ -2829,7 +2831,8 @@ static struct dentry_operations anon_ops = { - }; - - static struct file *__shmem_file_setup(const char *name, loff_t size, -- unsigned long flags, unsigned int i_flags) -+ unsigned long flags, unsigned int i_flags, -+ int atomic_copy) - { - struct file *res; - struct inode *inode; -@@ -2858,7 +2861,7 @@ static struct file *__shmem_file_setup(const char *name, loff_t size, - path.mnt = mntget(shm_mnt); - - res = ERR_PTR(-ENOSPC); -- inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); -+ inode = shmem_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0, flags, atomic_copy); - if (!inode) - goto put_dentry; - -@@ -2894,9 +2897,9 @@ put_memory: - * @size: size to be set for the file - * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size - */ --struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) -+struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy) - { -- return __shmem_file_setup(name, size, flags, S_PRIVATE); -+ return __shmem_file_setup(name, size, flags, S_PRIVATE, atomic_copy); - } - - /** -@@ -2905,9 +2908,9 @@ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned lon - * @size: size to be set for the file - * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size - */ --struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) -+struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags, int atomic_copy) - { -- return __shmem_file_setup(name, size, flags, 0); -+ return __shmem_file_setup(name, size, flags, 0, atomic_copy); - } - EXPORT_SYMBOL_GPL(shmem_file_setup); - -@@ -2920,7 +2923,7 @@ int shmem_zero_setup(struct vm_area_struct *vma) - struct file *file; - loff_t size = vma->vm_end - vma->vm_start; - -- file = shmem_file_setup("dev/zero", size, vma->vm_flags); -+ file = shmem_file_setup("dev/zero", size, vma->vm_flags, 0); - if (IS_ERR(file)) - return PTR_ERR(file); - -diff --git a/mm/swapfile.c b/mm/swapfile.c -index 4a7f7e6..4044ab6 100644 ---- a/mm/swapfile.c -+++ b/mm/swapfile.c -@@ -9,6 +9,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -43,7 +44,6 @@ - static bool swap_count_continued(struct swap_info_struct *, pgoff_t, - unsigned char); - static void free_swap_count_continuations(struct swap_info_struct *); --static sector_t map_swap_entry(swp_entry_t, struct block_device**); - - DEFINE_SPINLOCK(swap_lock); - static unsigned int nr_swapfiles; -@@ -729,6 +729,62 @@ swp_entry_t get_swap_page_of_type(int type) - spin_unlock(&si->lock); - return (swp_entry_t) {0}; - } -+EXPORT_SYMBOL_GPL(get_swap_page_of_type); -+ -+static unsigned int find_next_to_unuse(struct swap_info_struct *si, -+ unsigned int prev, bool frontswap); -+ -+void get_swap_range_of_type(int type, swp_entry_t *start, swp_entry_t *end, -+ unsigned int limit) -+{ -+ struct swap_info_struct *si; -+ pgoff_t start_at; -+ unsigned int i; -+ -+ *start = swp_entry(0, 0); -+ *end = swp_entry(0, 0); -+ si = swap_info[type]; -+ spin_lock(&si->lock); -+ if (si && (si->flags & SWP_WRITEOK)) { -+ atomic_long_dec(&nr_swap_pages); -+ /* This is called for allocating swap entry, not cache */ -+ start_at = scan_swap_map(si, 1); -+ if (start_at) { -+ unsigned long stop_at = find_next_to_unuse(si, start_at, 0); -+ if (stop_at > start_at) -+ stop_at--; -+ else -+ stop_at = si->max - 1; -+ if (stop_at - start_at + 1 > limit) -+ stop_at = min_t(unsigned int, -+ start_at + limit - 1, -+ si->max - 1); -+ /* Mark them used */ -+ for (i = start_at; i <= stop_at; i++) -+ si->swap_map[i] = 1; -+ /* first page already done above */ -+ si->inuse_pages += stop_at - start_at; -+ -+ atomic_long_sub(stop_at - start_at, &nr_swap_pages); -+ if (start_at == si->lowest_bit) -+ si->lowest_bit = stop_at + 1; -+ if (stop_at == si->highest_bit) -+ si->highest_bit = start_at - 1; -+ if (si->inuse_pages == si->pages) { -+ si->lowest_bit = si->max; -+ si->highest_bit = 0; -+ } -+ for (i = start_at + 1; i <= stop_at; i++) -+ inc_cluster_info_page(si, si->cluster_info, i); -+ si->cluster_next = stop_at + 1; -+ *start = swp_entry(type, start_at); -+ *end = swp_entry(type, stop_at); -+ } else -+ atomic_long_inc(&nr_swap_pages); -+ } -+ spin_unlock(&si->lock); -+} -+EXPORT_SYMBOL_GPL(get_swap_range_of_type); - - static struct swap_info_struct *swap_info_get(swp_entry_t entry) - { -@@ -876,6 +932,7 @@ void swapcache_free(swp_entry_t entry, struct page *page) - spin_unlock(&p->lock); - } - } -+EXPORT_SYMBOL_GPL(swap_free); - - /* - * How many references to page are currently swapped out? -@@ -1597,7 +1654,7 @@ static void drain_mmlist(void) - * Note that the type of this function is sector_t, but it returns page offset - * into the bdev, not sector offset. - */ --static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) -+sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) - { - struct swap_info_struct *sis; - struct swap_extent *start_se; -@@ -1624,6 +1681,7 @@ static sector_t map_swap_entry(swp_entry_t entry, struct block_device **bdev) - BUG_ON(se == start_se); /* It *must* be present */ - } - } -+EXPORT_SYMBOL_GPL(map_swap_entry); - - /* - * Returns the page offset into bdev for the specified page's swap entry. -@@ -1968,6 +2026,7 @@ out: - putname(pathname); - return err; - } -+EXPORT_SYMBOL_GPL(sys_swapoff); - - #ifdef CONFIG_PROC_FS - static unsigned swaps_poll(struct file *file, poll_table *wait) -@@ -2574,6 +2633,7 @@ out: - mutex_unlock(&inode->i_mutex); - return error; - } -+EXPORT_SYMBOL_GPL(sys_swapon); - - void si_swapinfo(struct sysinfo *val) - { -@@ -2591,6 +2651,7 @@ void si_swapinfo(struct sysinfo *val) - val->totalswap = total_swap_pages + nr_to_be_unused; - spin_unlock(&swap_lock); - } -+EXPORT_SYMBOL_GPL(si_swapinfo); - - /* - * Verify that a swap entry is valid and increment its swap map count. -@@ -2735,8 +2796,15 @@ pgoff_t __page_file_index(struct page *page) - VM_BUG_ON_PAGE(!PageSwapCache(page), page); - return swp_offset(swap); - } -+ - EXPORT_SYMBOL_GPL(__page_file_index); - -+struct swap_info_struct *get_swap_info_struct(unsigned type) -+{ -+ return swap_info[type]; -+} -+EXPORT_SYMBOL_GPL(get_swap_info_struct); -+ - /* - * add_swap_count_continuation - called when a swap count is duplicated - * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's -diff --git a/mm/util.c b/mm/util.c -index d5ea733..d814934 100644 ---- a/mm/util.c -+++ b/mm/util.c -@@ -414,6 +414,7 @@ struct address_space *page_mapping(struct page *page) - mapping = NULL; - return mapping; - } -+EXPORT_SYMBOL_GPL(page_mapping); - - int overcommit_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, -diff --git a/mm/vmscan.c b/mm/vmscan.c -index 32c661d..ea54eb5 100644 ---- a/mm/vmscan.c -+++ b/mm/vmscan.c -@@ -1359,7 +1359,7 @@ static int too_many_isolated(struct zone *zone, int file, - { - unsigned long inactive, isolated; - -- if (current_is_kswapd()) -+ if (current_is_kswapd() || sc->hibernation_mode) - return 0; - - if (!global_reclaim(sc)) -@@ -2154,6 +2154,9 @@ static inline bool should_continue_reclaim(struct zone *zone, - unsigned long pages_for_compaction; - unsigned long inactive_lru_pages; - -+ if (nr_reclaimed && nr_scanned && sc->nr_to_reclaim >= sc->nr_reclaimed) -+ return true; -+ - /* If not in reclaim/compaction mode, stop */ - if (!in_reclaim_compaction(sc)) - return false; -@@ -2353,7 +2356,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) - if (sc->priority != DEF_PRIORITY && - !zone_reclaimable(zone)) - continue; /* Let kswapd poll it */ -- if (IS_ENABLED(CONFIG_COMPACTION)) { -+ if (IS_ENABLED(CONFIG_COMPACTION) && !sc->hibernation_mode) { - /* - * If we already have plenty of memory free for - * compaction in this zone, don't free any more. -@@ -2453,6 +2456,11 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, - unsigned long writeback_threshold; - bool aborted_reclaim; - -+#ifdef CONFIG_FREEZER -+ if (unlikely(pm_freezing && !sc->hibernation_mode)) -+ return 0; -+#endif -+ - delayacct_freepages_start(); - - if (global_reclaim(sc)) -@@ -3316,6 +3324,11 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) - if (!populated_zone(zone)) - return; - -+#ifdef CONFIG_FREEZER -+ if (pm_freezing) -+ return; -+#endif -+ - if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) - return; - pgdat = zone->zone_pgdat; -@@ -3341,11 +3354,11 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) - * LRU order by reclaiming preferentially - * inactive > active > active referenced > active mapped - */ --unsigned long shrink_all_memory(unsigned long nr_to_reclaim) -+unsigned long shrink_memory_mask(unsigned long nr_to_reclaim, gfp_t mask) - { - struct reclaim_state reclaim_state; - struct scan_control sc = { -- .gfp_mask = GFP_HIGHUSER_MOVABLE, -+ .gfp_mask = mask, - .may_swap = 1, - .may_unmap = 1, - .may_writepage = 1, -@@ -3371,6 +3384,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) - - return nr_reclaimed; - } -+EXPORT_SYMBOL_GPL(shrink_memory_mask); -+ -+unsigned long shrink_all_memory(unsigned long nr_to_reclaim) -+{ -+ return shrink_memory_mask(nr_to_reclaim, GFP_HIGHUSER_MOVABLE); -+} -+EXPORT_SYMBOL_GPL(shrink_all_memory); - #endif /* CONFIG_HIBERNATION */ - - /* It's optimal to keep kswapds on the same CPUs as their memory, but -diff --git a/security/keys/big_key.c b/security/keys/big_key.c -index 8137b27..e2436f9 100644 ---- a/security/keys/big_key.c -+++ b/security/keys/big_key.c -@@ -70,7 +70,7 @@ int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep) - * - * TODO: Encrypt the stored data with a temporary key. - */ -- file = shmem_kernel_file_setup("", datalen, 0); -+ file = shmem_kernel_file_setup("", datalen, 0, 0); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto err_quota; diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0_config-i686 b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0_config-i686 deleted file mode 100644 index 4eae57ab1..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0_config-i686 +++ /dev/null @@ -1,5088 +0,0 @@ -# Calculate name=.config os_arch_machine!=x86_64 -# -# Automatically generated file; DO NOT EDIT. -# Linux/x86 3.15.4-calculate Kernel Configuration -# -# CONFIG_64BIT is not set -CONFIG_X86_32=y -CONFIG_X86=y -CONFIG_INSTRUCTION_DECODER=y -CONFIG_OUTPUT_FORMAT="elf32-i386" -CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" -CONFIG_LOCKDEP_SUPPORT=y -CONFIG_STACKTRACE_SUPPORT=y -CONFIG_HAVE_LATENCYTOP_SUPPORT=y -CONFIG_MMU=y -CONFIG_NEED_SG_DMA_LENGTH=y -CONFIG_GENERIC_ISA_DMA=y -CONFIG_GENERIC_BUG=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_ARCH_HAS_CPU_RELAX=y -CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y -CONFIG_HAVE_SETUP_PER_CPU_AREA=y -CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y -CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y -CONFIG_ARCH_HIBERNATION_POSSIBLE=y -CONFIG_ARCH_SUSPEND_POSSIBLE=y -CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y -CONFIG_ARCH_WANT_GENERAL_HUGETLB=y -# CONFIG_ZONE_DMA32 is not set -# CONFIG_AUDIT_ARCH is not set -CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y -CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y -CONFIG_X86_32_SMP=y -CONFIG_X86_HT=y -CONFIG_X86_32_LAZY_GS=y -CONFIG_ARCH_HWEIGHT_CFLAGS="-fcall-saved-ecx -fcall-saved-edx" -CONFIG_ARCH_SUPPORTS_UPROBES=y -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" -CONFIG_IRQ_WORK=y -CONFIG_BUILDTIME_EXTABLE_SORT=y - -# -# General setup -# -CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_CROSS_COMPILE="" -# CONFIG_COMPILE_TEST is not set -CONFIG_LOCALVERSION="" -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_HAVE_KERNEL_GZIP=y -CONFIG_HAVE_KERNEL_BZIP2=y -CONFIG_HAVE_KERNEL_LZMA=y -CONFIG_HAVE_KERNEL_XZ=y -CONFIG_HAVE_KERNEL_LZO=y -CONFIG_HAVE_KERNEL_LZ4=y -# CONFIG_KERNEL_GZIP is not set -# CONFIG_KERNEL_BZIP2 is not set -# CONFIG_KERNEL_LZMA is not set -CONFIG_KERNEL_XZ=y -# CONFIG_KERNEL_LZO is not set -# CONFIG_KERNEL_LZ4 is not set -CONFIG_DEFAULT_HOSTNAME="calculate" -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y -CONFIG_POSIX_MQUEUE=y -CONFIG_POSIX_MQUEUE_SYSCTL=y -CONFIG_FHANDLE=y -CONFIG_USELIB=y -CONFIG_AUDIT=y -CONFIG_HAVE_ARCH_AUDITSYSCALL=y -CONFIG_AUDITSYSCALL=y -CONFIG_AUDIT_WATCH=y -CONFIG_AUDIT_TREE=y - -# -# IRQ subsystem -# -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_IRQ_SHOW=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_IRQ_DOMAIN=y -CONFIG_IRQ_FORCED_THREADING=y -CONFIG_SPARSE_IRQ=y -CONFIG_CLOCKSOURCE_WATCHDOG=y -CONFIG_ARCH_CLOCKSOURCE_DATA=y -CONFIG_GENERIC_TIME_VSYSCALL=y -CONFIG_KTIME_SCALAR=y -CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_GENERIC_CLOCKEVENTS_BUILD=y -CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y -CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y -CONFIG_GENERIC_CMOS_UPDATE=y - -# -# Timers subsystem -# -CONFIG_TICK_ONESHOT=y -CONFIG_NO_HZ_COMMON=y -# CONFIG_HZ_PERIODIC is not set -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y - -# -# CPU/Task time and stats accounting -# -CONFIG_TICK_CPU_ACCOUNTING=y -# CONFIG_IRQ_TIME_ACCOUNTING is not set -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y - -# -# RCU Subsystem -# -CONFIG_TREE_PREEMPT_RCU=y -CONFIG_PREEMPT_RCU=y -CONFIG_RCU_STALL_COMMON=y -CONFIG_RCU_FANOUT=32 -CONFIG_RCU_FANOUT_LEAF=16 -# CONFIG_RCU_FANOUT_EXACT is not set -# CONFIG_RCU_FAST_NO_HZ is not set -# CONFIG_TREE_RCU_TRACE is not set -# CONFIG_RCU_BOOST is not set -# CONFIG_RCU_NOCB_CPU is not set -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=15 -CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y -CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y -CONFIG_ARCH_WANTS_PROT_NUMA_PROT_NONE=y -CONFIG_CGROUPS=y -# CONFIG_CGROUP_DEBUG is not set -# CONFIG_CGROUP_FREEZER is not set -# CONFIG_CGROUP_DEVICE is not set -# CONFIG_CPUSETS is not set -CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y -CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y -# CONFIG_MEMCG_SWAP_ENABLED is not set -# CONFIG_MEMCG_KMEM is not set -# CONFIG_CGROUP_PERF is not set -CONFIG_CGROUP_SCHED=y -CONFIG_FAIR_GROUP_SCHED=y -CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y -CONFIG_BLK_CGROUP=y -# CONFIG_DEBUG_BLK_CGROUP is not set -# CONFIG_CHECKPOINT_RESTORE is not set -CONFIG_NAMESPACES=y -CONFIG_UTS_NS=y -CONFIG_IPC_NS=y -CONFIG_USER_NS=y -CONFIG_PID_NS=y -CONFIG_NET_NS=y -CONFIG_SCHED_AUTOGROUP=y -CONFIG_MM_OWNER=y -# CONFIG_SYSFS_DEPRECATED is not set -# CONFIG_RELAY is not set -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="/usr/share/v86d/initramfs " -CONFIG_INITRAMFS_ROOT_UID=0 -CONFIG_INITRAMFS_ROOT_GID=0 -CONFIG_RD_GZIP=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_RD_XZ=y -CONFIG_RD_LZO=y -CONFIG_RD_LZ4=y -# CONFIG_INITRAMFS_COMPRESSION_NONE is not set -# CONFIG_INITRAMFS_COMPRESSION_GZIP is not set -# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set -# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_XZ=y -# CONFIG_INITRAMFS_COMPRESSION_LZO is not set -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SYSCTL=y -CONFIG_ANON_INODES=y -CONFIG_HAVE_UID16=y -CONFIG_SYSCTL_EXCEPTION_TRACE=y -CONFIG_HAVE_PCSPKR_PLATFORM=y -# CONFIG_EXPERT is not set -CONFIG_UID16=y -CONFIG_SYSFS_SYSCALL=y -# CONFIG_SYSCTL_SYSCALL is not set -CONFIG_KALLSYMS=y -# CONFIG_KALLSYMS_ALL is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_PCSPKR_PLATFORM=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y -CONFIG_EVENTFD=y -CONFIG_SHMEM=y -CONFIG_AIO=y -CONFIG_PCI_QUIRKS=y -# CONFIG_EMBEDDED is not set -CONFIG_HAVE_PERF_EVENTS=y - -# -# Kernel Performance Events And Counters -# -CONFIG_PERF_EVENTS=y -# CONFIG_DEBUG_PERF_USE_VMALLOC is not set -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLUB_DEBUG=y -# CONFIG_COMPAT_BRK is not set -# CONFIG_SLAB is not set -CONFIG_SLUB=y -CONFIG_SLUB_CPU_PARTIAL=y -# CONFIG_SYSTEM_TRUSTED_KEYRING is not set -# CONFIG_PROFILING is not set -CONFIG_HAVE_OPROFILE=y -CONFIG_OPROFILE_NMI_TIMER=y -# CONFIG_KPROBES is not set -# CONFIG_JUMP_LABEL is not set -# CONFIG_UPROBES is not set -# CONFIG_HAVE_64BIT_ALIGNED_ACCESS is not set -CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y -CONFIG_ARCH_USE_BUILTIN_BSWAP=y -CONFIG_USER_RETURN_NOTIFIER=y -CONFIG_HAVE_IOREMAP_PROT=y -CONFIG_HAVE_KPROBES=y -CONFIG_HAVE_KRETPROBES=y -CONFIG_HAVE_OPTPROBES=y -CONFIG_HAVE_KPROBES_ON_FTRACE=y -CONFIG_HAVE_ARCH_TRACEHOOK=y -CONFIG_HAVE_DMA_ATTRS=y -CONFIG_HAVE_DMA_CONTIGUOUS=y -CONFIG_GENERIC_SMP_IDLE_THREAD=y -CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y -CONFIG_HAVE_DMA_API_DEBUG=y -CONFIG_HAVE_HW_BREAKPOINT=y -CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y -CONFIG_HAVE_USER_RETURN_NOTIFIER=y -CONFIG_HAVE_PERF_EVENTS_NMI=y -CONFIG_HAVE_PERF_REGS=y -CONFIG_HAVE_PERF_USER_STACK_DUMP=y -CONFIG_HAVE_ARCH_JUMP_LABEL=y -CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y -CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y -CONFIG_HAVE_CMPXCHG_LOCAL=y -CONFIG_HAVE_CMPXCHG_DOUBLE=y -CONFIG_ARCH_WANT_IPC_PARSE_VERSION=y -CONFIG_HAVE_ARCH_SECCOMP_FILTER=y -CONFIG_SECCOMP_FILTER=y -CONFIG_HAVE_CC_STACKPROTECTOR=y -# CONFIG_CC_STACKPROTECTOR is not set -CONFIG_CC_STACKPROTECTOR_NONE=y -# CONFIG_CC_STACKPROTECTOR_REGULAR is not set -# CONFIG_CC_STACKPROTECTOR_STRONG is not set -CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y -CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y -CONFIG_HAVE_ARCH_SOFT_DIRTY=y -CONFIG_MODULES_USE_ELF_REL=y -CONFIG_CLONE_BACKWARDS=y -CONFIG_OLD_SIGSUSPEND3=y -CONFIG_OLD_SIGACTION=y - -# -# GCOV-based kernel profiling -# -CONFIG_HAVE_GENERIC_DMA_COHERENT=y -CONFIG_SLABINFO=y -CONFIG_RT_MUTEXES=y -CONFIG_BASE_SMALL=0 -CONFIG_MODULES=y -# CONFIG_MODULE_FORCE_LOAD is not set -CONFIG_MODULE_UNLOAD=y -# CONFIG_MODULE_FORCE_UNLOAD is not set -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_MODULE_SIG is not set -CONFIG_STOP_MACHINE=y -CONFIG_BLOCK=y -CONFIG_LBDAF=y -CONFIG_BLK_DEV_BSG=y -CONFIG_BLK_DEV_BSGLIB=y -CONFIG_BLK_DEV_INTEGRITY=y -# CONFIG_BLK_DEV_THROTTLING is not set -# CONFIG_BLK_CMDLINE_PARSER is not set - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_AIX_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_KARMA_PARTITION is not set -CONFIG_EFI_PARTITION=y -# CONFIG_SYSV68_PARTITION is not set -# CONFIG_CMDLINE_PARTITION is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_CFQ_GROUP_IOSCHED is not set -CONFIG_IOSCHED_BFQ=y -CONFIG_CGROUP_BFQIO=y -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_BFQ is not set -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_PREEMPT_NOTIFIERS=y -CONFIG_UNINLINE_SPIN_UNLOCK=y -CONFIG_MUTEX_SPIN_ON_OWNER=y -CONFIG_FREEZER=y - -# -# Processor type and features -# -CONFIG_ZONE_DMA=y -CONFIG_SMP=y -CONFIG_X86_MPPARSE=y -# CONFIG_X86_BIGSMP is not set -# CONFIG_X86_EXTENDED_PLATFORM is not set -# CONFIG_X86_INTEL_LPSS is not set -CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y -# CONFIG_X86_32_IRIS is not set -CONFIG_SCHED_OMIT_FRAME_POINTER=y -# CONFIG_HYPERVISOR_GUEST is not set -CONFIG_NO_BOOTMEM=y -# CONFIG_MEMTEST is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMII is not set -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUMM is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MK8 is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MEFFICEON is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MELAN is not set -# CONFIG_MGEODEGX1 is not set -# CONFIG_MGEODE_LX is not set -# CONFIG_MCYRIXIII is not set -# CONFIG_MVIAC3_2 is not set -# CONFIG_MVIAC7 is not set -# CONFIG_MCORE2 is not set -# CONFIG_MATOM is not set -# CONFIG_X86_GENERIC is not set -CONFIG_X86_INTERNODE_CACHE_SHIFT=5 -CONFIG_X86_L1_CACHE_SHIFT=5 -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_TSC=y -CONFIG_X86_CMPXCHG64=y -CONFIG_X86_CMOV=y -CONFIG_X86_MINIMUM_CPU_FAMILY=5 -CONFIG_X86_DEBUGCTLMSR=y -CONFIG_CPU_SUP_INTEL=y -CONFIG_CPU_SUP_AMD=y -CONFIG_CPU_SUP_CENTAUR=y -CONFIG_CPU_SUP_TRANSMETA_32=y -CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y -CONFIG_DMI=y -CONFIG_NR_CPUS=8 -CONFIG_SCHED_SMT=y -CONFIG_SCHED_MC=y -# CONFIG_PREEMPT_NONE is not set -# CONFIG_PREEMPT_VOLUNTARY is not set -CONFIG_PREEMPT=y -CONFIG_PREEMPT_COUNT=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_X86_IO_APIC=y -# CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS is not set -CONFIG_X86_MCE=y -CONFIG_X86_MCE_INTEL=y -CONFIG_X86_MCE_AMD=y -# CONFIG_X86_ANCIENT_MCE is not set -CONFIG_X86_MCE_THRESHOLD=y -# CONFIG_X86_MCE_INJECT is not set -CONFIG_X86_THERMAL_VECTOR=y -CONFIG_VM86=y -CONFIG_TOSHIBA=m -CONFIG_I8K=m -CONFIG_X86_REBOOTFIXUPS=y -CONFIG_MICROCODE=y -CONFIG_MICROCODE_INTEL=y -CONFIG_MICROCODE_AMD=y -CONFIG_MICROCODE_OLD_INTERFACE=y -CONFIG_MICROCODE_INTEL_EARLY=y -CONFIG_MICROCODE_AMD_EARLY=y -CONFIG_MICROCODE_EARLY=y -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_NOHIGHMEM is not set -CONFIG_HIGHMEM4G=y -# CONFIG_HIGHMEM64G is not set -CONFIG_PAGE_OFFSET=0xC0000000 -CONFIG_HIGHMEM=y -CONFIG_ARCH_FLATMEM_ENABLE=y -CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_ARCH_SELECT_MEMORY_MODEL=y -CONFIG_ILLEGAL_POINTER_VALUE=0 -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -CONFIG_SPARSEMEM_STATIC=y -CONFIG_HAVE_MEMBLOCK=y -CONFIG_HAVE_MEMBLOCK_NODE_MAP=y -CONFIG_ARCH_DISCARD_MEMBLOCK=y -# CONFIG_HAVE_BOOTMEM_INFO_NODE is not set -CONFIG_PAGEFLAGS_EXTENDED=y -CONFIG_SPLIT_PTLOCK_CPUS=4 -# CONFIG_COMPACTION is not set -# CONFIG_PHYS_ADDR_T_64BIT is not set -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_BOUNCE=y -CONFIG_NEED_BOUNCE_POOL=y -CONFIG_VIRT_TO_BUS=y -CONFIG_MMU_NOTIFIER=y -CONFIG_KSM=y -CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 -CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y -# CONFIG_MEMORY_FAILURE is not set -# CONFIG_TRANSPARENT_HUGEPAGE is not set -CONFIG_CROSS_MEMORY_ATTACH=y -CONFIG_CLEANCACHE=y -CONFIG_FRONTSWAP=y -# CONFIG_CMA is not set -# CONFIG_ZBUD is not set -# CONFIG_ZSWAP is not set -CONFIG_ZSMALLOC=y -# CONFIG_PGTABLE_MAPPING is not set -CONFIG_GENERIC_EARLY_IOREMAP=y -# CONFIG_HIGHPTE is not set -# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set -CONFIG_X86_RESERVE_LOW=64 -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -CONFIG_MTRR_SANITIZER=y -CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0 -CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 -CONFIG_X86_PAT=y -CONFIG_ARCH_USES_PG_UNCACHED=y -CONFIG_ARCH_RANDOM=y -CONFIG_X86_SMAP=y -CONFIG_EFI=y -CONFIG_EFI_STUB=y -CONFIG_SECCOMP=y -# CONFIG_HZ_100 is not set -# CONFIG_HZ_250 is not set -# CONFIG_HZ_300 is not set -CONFIG_HZ_1000=y -CONFIG_HZ=1000 -CONFIG_SCHED_HRTICK=y -CONFIG_KEXEC=y -# CONFIG_CRASH_DUMP is not set -# CONFIG_KEXEC_JUMP is not set -CONFIG_PHYSICAL_START=0x1000000 -# CONFIG_RELOCATABLE is not set -CONFIG_PHYSICAL_ALIGN=0x100000 -CONFIG_HOTPLUG_CPU=y -# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set -# CONFIG_DEBUG_HOTPLUG_CPU0 is not set -CONFIG_COMPAT_VDSO=y -# CONFIG_CMDLINE_BOOL is not set -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y - -# -# Power management and ACPI options -# -CONFIG_SUSPEND=y -CONFIG_SUSPEND_FREEZER=y -CONFIG_HIBERNATE_CALLBACKS=y -CONFIG_HIBERNATION=y -CONFIG_PM_STD_PARTITION="" -CONFIG_TOI_CORE=y - -# -# Image Storage (you need at least one allocator) -# -CONFIG_TOI_FILE=y -CONFIG_TOI_SWAP=y - -# -# General Options -# -CONFIG_TOI_CRYPTO=y -# CONFIG_TOI_USERUI is not set -CONFIG_TOI_DEFAULT_IMAGE_SIZE_LIMIT=-2 -# CONFIG_TOI_KEEP_IMAGE is not set - -# -# No incremental image support available without Keep Image support. -# -CONFIG_TOI_REPLACE_SWSUSP=y -CONFIG_TOI_IGNORE_LATE_INITCALL=y -CONFIG_TOI_DEFAULT_WAIT=25 -CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE=2000 -CONFIG_TOI_CHECKSUM=y -CONFIG_TOI=y -CONFIG_TOI_ZRAM_SUPPORT=y -CONFIG_PM_SLEEP=y -CONFIG_PM_SLEEP_SMP=y -# CONFIG_PM_AUTOSLEEP is not set -# CONFIG_PM_WAKELOCKS is not set -CONFIG_PM_RUNTIME=y -CONFIG_PM=y -CONFIG_PM_DEBUG=y -CONFIG_PM_ADVANCED_DEBUG=y -# CONFIG_PM_TEST_SUSPEND is not set -CONFIG_PM_SLEEP_DEBUG=y -# CONFIG_PM_TRACE_RTC is not set -# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set -CONFIG_ACPI=y -CONFIG_ACPI_SLEEP=y -# CONFIG_ACPI_PROCFS_POWER is not set -# CONFIG_ACPI_EC_DEBUGFS is not set -CONFIG_ACPI_AC=m -CONFIG_ACPI_BATTERY=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_VIDEO=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_DOCK=y -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_HOTPLUG_CPU=y -CONFIG_ACPI_PROCESSOR_AGGREGATOR=m -CONFIG_ACPI_THERMAL=m -# CONFIG_ACPI_CUSTOM_DSDT is not set -# CONFIG_ACPI_INITRD_TABLE_OVERRIDE is not set -# CONFIG_ACPI_DEBUG is not set -# CONFIG_ACPI_PCI_SLOT is not set -CONFIG_X86_PM_TIMER=y -CONFIG_ACPI_CONTAINER=y -CONFIG_ACPI_SBS=m -# CONFIG_ACPI_HED is not set -# CONFIG_ACPI_BGRT is not set -# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set -# CONFIG_ACPI_APEI is not set -# CONFIG_ACPI_EXTLOG is not set -CONFIG_SFI=y -CONFIG_X86_APM_BOOT=y -CONFIG_APM=m -CONFIG_APM_IGNORE_USER_SUSPEND=y -CONFIG_APM_DO_ENABLE=y -CONFIG_APM_CPU_IDLE=y -CONFIG_APM_DISPLAY_BLANK=y -CONFIG_APM_ALLOW_INTS=y - -# -# CPU Frequency scaling -# -CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_GOV_COMMON=y -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set -CONFIG_CPU_FREQ_GOV_PERFORMANCE=y -CONFIG_CPU_FREQ_GOV_POWERSAVE=m -CONFIG_CPU_FREQ_GOV_USERSPACE=m -CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m - -# -# x86 CPU frequency scaling drivers -# -CONFIG_X86_INTEL_PSTATE=y -CONFIG_X86_PCC_CPUFREQ=m -CONFIG_X86_ACPI_CPUFREQ=m -CONFIG_X86_ACPI_CPUFREQ_CPB=y -CONFIG_X86_POWERNOW_K6=m -CONFIG_X86_POWERNOW_K7=m -CONFIG_X86_POWERNOW_K7_ACPI=y -CONFIG_X86_POWERNOW_K8=m -CONFIG_X86_AMD_FREQ_SENSITIVITY=m -CONFIG_X86_GX_SUSPMOD=m -CONFIG_X86_SPEEDSTEP_CENTRINO=m -CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE=y -CONFIG_X86_SPEEDSTEP_ICH=m -CONFIG_X86_SPEEDSTEP_SMI=m -CONFIG_X86_P4_CLOCKMOD=m -CONFIG_X86_CPUFREQ_NFORCE2=m -CONFIG_X86_LONGRUN=m -CONFIG_X86_LONGHAUL=m -CONFIG_X86_E_POWERSAVER=m - -# -# shared options -# -CONFIG_X86_SPEEDSTEP_LIB=m -# CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK is not set - -# -# CPU Idle -# -CONFIG_CPU_IDLE=y -# CONFIG_CPU_IDLE_MULTIPLE_DRIVERS is not set -CONFIG_CPU_IDLE_GOV_LADDER=y -CONFIG_CPU_IDLE_GOV_MENU=y -# CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED is not set -# CONFIG_INTEL_IDLE is not set - -# -# Bus options (PCI etc.) -# -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GOMMCONFIG is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_PCI_MMCONFIG=y -CONFIG_PCI_DOMAINS=y -CONFIG_PCIEPORTBUS=y -# CONFIG_HOTPLUG_PCI_PCIE is not set -CONFIG_PCIEAER=y -CONFIG_PCIE_ECRC=y -# CONFIG_PCIEAER_INJECT is not set -CONFIG_PCIEASPM=y -# CONFIG_PCIEASPM_DEBUG is not set -CONFIG_PCIEASPM_DEFAULT=y -# CONFIG_PCIEASPM_POWERSAVE is not set -# CONFIG_PCIEASPM_PERFORMANCE is not set -CONFIG_PCIE_PME=y -CONFIG_PCI_MSI=y -# CONFIG_PCI_DEBUG is not set -# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set -# CONFIG_PCI_STUB is not set -CONFIG_HT_IRQ=y -# CONFIG_PCI_IOV is not set -# CONFIG_PCI_PRI is not set -# CONFIG_PCI_PASID is not set -CONFIG_PCI_IOAPIC=y -CONFIG_PCI_LABEL=y - -# -# PCI host controller drivers -# -CONFIG_ISA_DMA_API=y -CONFIG_ISA=y -CONFIG_EISA=y -# CONFIG_EISA_VLB_PRIMING is not set -CONFIG_EISA_PCI_EISA=y -CONFIG_EISA_VIRTUAL_ROOT=y -CONFIG_EISA_NAMES=y -# CONFIG_SCx200 is not set -# CONFIG_OLPC is not set -# CONFIG_ALIX is not set -# CONFIG_NET5501 is not set -# CONFIG_GEOS is not set -CONFIG_AMD_NB=y -CONFIG_PCCARD=m -CONFIG_PCMCIA=m -CONFIG_PCMCIA_LOAD_CIS=y -CONFIG_CARDBUS=y - -# -# PC-card bridges -# -CONFIG_YENTA=m -CONFIG_YENTA_O2=y -CONFIG_YENTA_RICOH=y -CONFIG_YENTA_TI=y -CONFIG_YENTA_ENE_TUNE=y -CONFIG_YENTA_TOSHIBA=y -CONFIG_PD6729=m -CONFIG_I82092=m -# CONFIG_I82365 is not set -# CONFIG_TCIC is not set -CONFIG_PCMCIA_PROBE=y -CONFIG_PCCARD_NONSTATIC=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_COMPAQ=m -CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y -CONFIG_HOTPLUG_PCI_IBM=m -CONFIG_HOTPLUG_PCI_ACPI=y -CONFIG_HOTPLUG_PCI_ACPI_IBM=m -CONFIG_HOTPLUG_PCI_CPCI=y -CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m -CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m -CONFIG_HOTPLUG_PCI_SHPC=m -# CONFIG_RAPIDIO is not set -# CONFIG_X86_SYSFB is not set - -# -# Executable file formats / Emulations -# -CONFIG_BINFMT_ELF=y -CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_BINFMT_SCRIPT=y -CONFIG_HAVE_AOUT=y -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_MISC=y -CONFIG_COREDUMP=y -CONFIG_HAVE_ATOMIC_IOMAP=y -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=m -# CONFIG_PACKET_DIAG is not set -CONFIG_UNIX=y -# CONFIG_UNIX_DIAG is not set -CONFIG_XFRM=y -CONFIG_XFRM_ALGO=m -# CONFIG_XFRM_USER is not set -# CONFIG_XFRM_SUB_POLICY is not set -# CONFIG_XFRM_MIGRATE is not set -# CONFIG_XFRM_STATISTICS is not set -CONFIG_XFRM_IPCOMP=m -CONFIG_NET_KEY=m -# CONFIG_NET_KEY_MIGRATE is not set -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -# CONFIG_IP_FIB_TRIE_STATS is not set -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_CLASSID=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -# CONFIG_NET_IPGRE_DEMUX is not set -CONFIG_NET_IP_TUNNEL=m -CONFIG_IP_MROUTE=y -# CONFIG_IP_MROUTE_MULTIPLE_TABLES is not set -# CONFIG_IP_PIMSM_V1 is not set -# CONFIG_IP_PIMSM_V2 is not set -CONFIG_SYN_COOKIES=y -# CONFIG_NET_IPVTI is not set -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_TUNNEL=m -CONFIG_INET_TUNNEL=m -CONFIG_INET_XFRM_MODE_TRANSPORT=y -CONFIG_INET_XFRM_MODE_TUNNEL=y -CONFIG_INET_XFRM_MODE_BEET=y -CONFIG_INET_LRO=m -# CONFIG_INET_DIAG is not set -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_CUBIC=y -CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -# CONFIG_IPV6_ROUTE_INFO is not set -# CONFIG_IPV6_OPTIMISTIC_DAD is not set -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_TUNNEL=m -CONFIG_INET6_TUNNEL=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_SIT=m -# CONFIG_IPV6_SIT_6RD is not set -CONFIG_IPV6_NDISC_NODETYPE=y -CONFIG_IPV6_TUNNEL=m -CONFIG_IPV6_GRE=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -# CONFIG_IPV6_MROUTE is not set -# CONFIG_NETWORK_SECMARK is not set -CONFIG_NET_PTP_CLASSIFY=y -# CONFIG_NETWORK_PHY_TIMESTAMPING is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_NETFILTER_ADVANCED=y -CONFIG_BRIDGE_NETFILTER=y - -# -# Core Netfilter Configuration -# -CONFIG_NETFILTER_NETLINK=m -CONFIG_NETFILTER_NETLINK_ACCT=m -CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_NETLINK_LOG=m -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_MARK=y -CONFIG_NF_CONNTRACK_ZONES=y -CONFIG_NF_CONNTRACK_PROCFS=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_TIMEOUT=y -CONFIG_NF_CONNTRACK_TIMESTAMP=y -CONFIG_NF_CONNTRACK_LABELS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_GRE=m -CONFIG_NF_CT_PROTO_SCTP=m -CONFIG_NF_CT_PROTO_UDPLITE=m -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_BROADCAST=m -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_SNMP=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_NETLINK_HELPER=m -CONFIG_NETFILTER_NETLINK_QUEUE_CT=y -CONFIG_NF_NAT=m -CONFIG_NF_NAT_NEEDED=y -CONFIG_NF_NAT_PROTO_DCCP=m -CONFIG_NF_NAT_PROTO_UDPLITE=m -CONFIG_NF_NAT_PROTO_SCTP=m -CONFIG_NF_NAT_AMANDA=m -CONFIG_NF_NAT_FTP=m -CONFIG_NF_NAT_IRC=m -CONFIG_NF_NAT_SIP=m -CONFIG_NF_NAT_TFTP=m -CONFIG_NETFILTER_SYNPROXY=m -CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NFT_EXTHDR=m -CONFIG_NFT_META=m -CONFIG_NFT_CT=m -CONFIG_NFT_RBTREE=m -CONFIG_NFT_HASH=m -CONFIG_NFT_COUNTER=m -CONFIG_NFT_LOG=m -CONFIG_NFT_LIMIT=m -CONFIG_NFT_NAT=m -CONFIG_NFT_QUEUE=m -CONFIG_NFT_REJECT=m -CONFIG_NFT_REJECT_INET=m -CONFIG_NFT_COMPAT=m -CONFIG_NETFILTER_XTABLES=m - -# -# Xtables combined modules -# -CONFIG_NETFILTER_XT_MARK=m -CONFIG_NETFILTER_XT_CONNMARK=m -CONFIG_NETFILTER_XT_SET=m - -# -# Xtables targets -# -# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set -CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CT=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_HL=m -CONFIG_NETFILTER_XT_TARGET_HMARK=m -CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m -CONFIG_NETFILTER_XT_TARGET_LED=m -CONFIG_NETFILTER_XT_TARGET_LOG=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NETMAP=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m -CONFIG_NETFILTER_XT_TARGET_RATEEST=m -CONFIG_NETFILTER_XT_TARGET_REDIRECT=m -CONFIG_NETFILTER_XT_TARGET_TEE=m -CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m - -# -# Xtables matches -# -CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_CGROUP=m -CONFIG_NETFILTER_XT_MATCH_CLUSTER=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_CPU=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ECN=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_HL=m -CONFIG_NETFILTER_XT_MATCH_IPCOMP=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_L2TP=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_NFACCT=m -CONFIG_NETFILTER_XT_MATCH_OSF=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_SOCKET=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_IP_SET=m -CONFIG_IP_SET_MAX=256 -CONFIG_IP_SET_BITMAP_IP=m -CONFIG_IP_SET_BITMAP_IPMAC=m -CONFIG_IP_SET_BITMAP_PORT=m -CONFIG_IP_SET_HASH_IP=m -CONFIG_IP_SET_HASH_IPMARK=m -CONFIG_IP_SET_HASH_IPPORT=m -CONFIG_IP_SET_HASH_IPPORTIP=m -CONFIG_IP_SET_HASH_IPPORTNET=m -CONFIG_IP_SET_HASH_NETPORTNET=m -CONFIG_IP_SET_HASH_NET=m -CONFIG_IP_SET_HASH_NETNET=m -CONFIG_IP_SET_HASH_NETPORT=m -CONFIG_IP_SET_HASH_NETIFACE=m -CONFIG_IP_SET_LIST_SET=m -# CONFIG_IP_VS is not set - -# -# IP: Netfilter Configuration -# -CONFIG_NF_DEFRAG_IPV4=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_CONNTRACK_PROC_COMPAT=y -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NFT_CHAIN_NAT_IPV4=m -CONFIG_NFT_REJECT_IPV4=m -CONFIG_NF_TABLES_ARP=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_SYNPROXY=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_NF_NAT_IPV4=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_NF_NAT_SNMP_BASIC=m -CONFIG_NF_NAT_PROTO_GRE=m -CONFIG_NF_NAT_PPTP=m -CONFIG_NF_NAT_H323=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m - -# -# IPv6: Netfilter Configuration -# -CONFIG_NF_DEFRAG_IPV6=m -CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_NFT_REJECT_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_NF_NAT_IPV6=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m -# CONFIG_BRIDGE_NF_EBTABLES is not set -# CONFIG_IP_DCCP is not set -CONFIG_IP_SCTP=m -# CONFIG_SCTP_DBG_OBJCNT is not set -CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5=y -# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1 is not set -# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set -CONFIG_SCTP_COOKIE_HMAC_MD5=y -CONFIG_SCTP_COOKIE_HMAC_SHA1=y -CONFIG_RDS=m -CONFIG_RDS_TCP=m -# CONFIG_RDS_DEBUG is not set -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_L2TP is not set -CONFIG_STP=m -CONFIG_BRIDGE=m -CONFIG_BRIDGE_IGMP_SNOOPING=y -CONFIG_BRIDGE_VLAN_FILTERING=y -CONFIG_HAVE_NET_DSA=y -CONFIG_NET_DSA=m -CONFIG_NET_DSA_TAG_DSA=y -CONFIG_NET_DSA_TAG_EDSA=y -CONFIG_NET_DSA_TAG_TRAILER=y -CONFIG_VLAN_8021Q=m -# CONFIG_VLAN_8021Q_GVRP is not set -# CONFIG_VLAN_8021Q_MVRP is not set -# CONFIG_DECNET is not set -CONFIG_LLC=m -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_PHONET is not set -# CONFIG_IEEE802154 is not set -CONFIG_6LOWPAN_IPHC=m -CONFIG_NET_SCHED=y - -# -# Queueing/Scheduling -# -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_MULTIQ=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFB=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_MQPRIO=m -CONFIG_NET_SCH_CHOKE=m -CONFIG_NET_SCH_QFQ=m -CONFIG_NET_SCH_CODEL=m -CONFIG_NET_SCH_FQ_CODEL=m -CONFIG_NET_SCH_FQ=m -CONFIG_NET_SCH_HHF=m -CONFIG_NET_SCH_PIE=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_SCH_PLUG=m - -# -# Classification -# -CONFIG_NET_CLS=y -CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_CLS_U32_PERF=y -CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_FLOW=m -CONFIG_NET_CLS_CGROUP=m -CONFIG_NET_CLS_BPF=m -CONFIG_NET_EMATCH=y -CONFIG_NET_EMATCH_STACK=32 -CONFIG_NET_EMATCH_CMP=m -CONFIG_NET_EMATCH_NBYTE=m -CONFIG_NET_EMATCH_U32=m -CONFIG_NET_EMATCH_META=m -CONFIG_NET_EMATCH_TEXT=m -CONFIG_NET_EMATCH_IPSET=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=m -CONFIG_NET_ACT_GACT=m -CONFIG_GACT_PROB=y -CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m -CONFIG_NET_ACT_NAT=m -CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_ACT_SIMP=m -CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_ACT_CSUM=m -CONFIG_NET_CLS_IND=y -CONFIG_NET_SCH_FIFO=y -# CONFIG_DCB is not set -CONFIG_DNS_RESOLVER=y -CONFIG_BATMAN_ADV=m -CONFIG_BATMAN_ADV_BLA=y -CONFIG_BATMAN_ADV_DAT=y -CONFIG_BATMAN_ADV_NC=y -CONFIG_BATMAN_ADV_MCAST=y -# CONFIG_BATMAN_ADV_DEBUG is not set -CONFIG_OPENVSWITCH=m -CONFIG_OPENVSWITCH_VXLAN=y -CONFIG_VSOCKETS=m -# CONFIG_NETLINK_MMAP is not set -# CONFIG_NETLINK_DIAG is not set -CONFIG_NET_MPLS_GSO=m -CONFIG_HSR=m -CONFIG_RPS=y -CONFIG_RFS_ACCEL=y -CONFIG_XPS=y -# CONFIG_CGROUP_NET_PRIO is not set -CONFIG_CGROUP_NET_CLASSID=y -CONFIG_NET_RX_BUSY_POLL=y -CONFIG_BQL=y -CONFIG_NET_FLOW_LIMIT=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set -# CONFIG_CAN is not set -# CONFIG_IRDA is not set -CONFIG_BT=m -CONFIG_BT_6LOWPAN=y -CONFIG_BT_RFCOMM=m -CONFIG_BT_RFCOMM_TTY=y -CONFIG_BT_BNEP=m -CONFIG_BT_BNEP_MC_FILTER=y -CONFIG_BT_BNEP_PROTO_FILTER=y -CONFIG_BT_HIDP=m - -# -# Bluetooth device drivers -# -CONFIG_BT_HCIBTUSB=m -# CONFIG_BT_HCIBTSDIO is not set -# CONFIG_BT_HCIUART is not set -CONFIG_BT_HCIBCM203X=m -CONFIG_BT_HCIBPA10X=m -CONFIG_BT_HCIBFUSB=m -CONFIG_BT_HCIDTL1=m -CONFIG_BT_HCIBT3C=m -CONFIG_BT_HCIBLUECARD=m -CONFIG_BT_HCIBTUART=m -# CONFIG_BT_HCIVHCI is not set -CONFIG_BT_MRVL=m -CONFIG_BT_MRVL_SDIO=m -CONFIG_BT_ATH3K=m -# CONFIG_AF_RXRPC is not set -CONFIG_FIB_RULES=y -CONFIG_WIRELESS=y -CONFIG_WIRELESS_EXT=y -CONFIG_WEXT_CORE=y -CONFIG_WEXT_PROC=y -CONFIG_WEXT_SPY=y -CONFIG_WEXT_PRIV=y -CONFIG_CFG80211=m -# CONFIG_NL80211_TESTMODE is not set -# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set -# CONFIG_CFG80211_REG_DEBUG is not set -CONFIG_CFG80211_DEFAULT_PS=y -# CONFIG_CFG80211_INTERNAL_REGDB is not set -CONFIG_CFG80211_WEXT=y -CONFIG_LIB80211=m -CONFIG_LIB80211_CRYPT_WEP=m -CONFIG_LIB80211_CRYPT_CCMP=m -CONFIG_LIB80211_CRYPT_TKIP=m -# CONFIG_LIB80211_DEBUG is not set -CONFIG_MAC80211=m -CONFIG_MAC80211_HAS_RC=y -CONFIG_MAC80211_RC_MINSTREL=y -CONFIG_MAC80211_RC_MINSTREL_HT=y -CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y -CONFIG_MAC80211_RC_DEFAULT="minstrel_ht" -CONFIG_MAC80211_MESH=y -CONFIG_MAC80211_LEDS=y -# CONFIG_MAC80211_MESSAGE_TRACING is not set -# CONFIG_MAC80211_DEBUG_MENU is not set -CONFIG_WIMAX=m -CONFIG_WIMAX_DEBUG_LEVEL=8 -CONFIG_RFKILL=m -CONFIG_RFKILL_LEDS=y -CONFIG_RFKILL_INPUT=y -# CONFIG_NET_9P is not set -# CONFIG_CAIF is not set -# CONFIG_CEPH_LIB is not set -# CONFIG_NFC is not set - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_DEVTMPFS=y -CONFIG_DEVTMPFS_MOUNT=y -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -CONFIG_FW_LOADER=y -CONFIG_FIRMWARE_IN_KERNEL=y -CONFIG_EXTRA_FIRMWARE="" -CONFIG_FW_LOADER_USER_HELPER=y -# CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set -# CONFIG_SYS_HYPERVISOR is not set -# CONFIG_GENERIC_CPU_DEVICES is not set -CONFIG_GENERIC_CPU_AUTOPROBE=y -CONFIG_REGMAP=y -CONFIG_REGMAP_I2C=m -CONFIG_REGMAP_MMIO=y -CONFIG_REGMAP_IRQ=y -CONFIG_DMA_SHARED_BUFFER=y - -# -# Bus devices -# -CONFIG_CONNECTOR=y -CONFIG_PROC_EVENTS=y -# CONFIG_MTD is not set -CONFIG_PARPORT=y -CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y -CONFIG_PARPORT_PC=y -# CONFIG_PARPORT_SERIAL is not set -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_PC_PCMCIA is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_AX88796 is not set -# CONFIG_PARPORT_1284 is not set -CONFIG_PARPORT_NOT_PC=y -CONFIG_PNP=y -CONFIG_PNP_DEBUG_MESSAGES=y - -# -# Protocols -# -CONFIG_ISAPNP=y -# CONFIG_PNPBIOS is not set -CONFIG_PNPACPI=y -CONFIG_BLK_DEV=y -# CONFIG_BLK_DEV_NULL_BLK is not set -CONFIG_BLK_DEV_FD=m -# CONFIG_PARIDE is not set -CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m -CONFIG_ZRAM=m -CONFIG_ZRAM_LZ4_COMPRESS=y -# CONFIG_ZRAM_DEBUG is not set -CONFIG_BLK_CPQ_CISS_DA=m -# CONFIG_CISS_SCSI_TAPE is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -# CONFIG_BLK_DEV_COW_COMMON is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -# CONFIG_BLK_DEV_DRBD is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_NVME is not set -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -# CONFIG_BLK_DEV_XIP is not set -CONFIG_CDROM_PKTCDVD=m -CONFIG_CDROM_PKTCDVD_BUFFERS=8 -# CONFIG_CDROM_PKTCDVD_WCACHE is not set -# CONFIG_ATA_OVER_ETH is not set -CONFIG_VIRTIO_BLK=m -# CONFIG_BLK_DEV_HD is not set -# CONFIG_BLK_DEV_RBD is not set -CONFIG_BLK_DEV_RSXX=m - -# -# Misc devices -# -CONFIG_SENSORS_LIS3LV02D=m -# CONFIG_AD525X_DPOT is not set -# CONFIG_DUMMY_IRQ is not set -CONFIG_IBM_ASM=m -CONFIG_PHANTOM=m -CONFIG_SGI_IOC4=m -CONFIG_TIFM_CORE=m -CONFIG_TIFM_7XX1=m -# CONFIG_ICS932S401 is not set -# CONFIG_ATMEL_SSC is not set -CONFIG_ENCLOSURE_SERVICES=m -# CONFIG_HP_ILO is not set -# CONFIG_APDS9802ALS is not set -# CONFIG_ISL29003 is not set -# CONFIG_ISL29020 is not set -# CONFIG_SENSORS_TSL2550 is not set -# CONFIG_SENSORS_BH1780 is not set -# CONFIG_SENSORS_BH1770 is not set -# CONFIG_SENSORS_APDS990X is not set -# CONFIG_HMC6352 is not set -# CONFIG_DS1682 is not set -# CONFIG_BMP085_I2C is not set -# CONFIG_PCH_PHUB is not set -# CONFIG_USB_SWITCH_FSA9480 is not set -# CONFIG_SRAM is not set -# CONFIG_C2PORT is not set - -# -# EEPROM support -# -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set -# CONFIG_EEPROM_MAX6875 is not set -CONFIG_EEPROM_93CX6=m -CONFIG_CB710_CORE=m -# CONFIG_CB710_DEBUG is not set -CONFIG_CB710_DEBUG_ASSUMPTIONS=y - -# -# Texas Instruments shared transport line discipline -# -CONFIG_SENSORS_LIS3_I2C=m - -# -# Altera FPGA firmware download module -# -CONFIG_ALTERA_STAPL=m -CONFIG_INTEL_MEI=m -CONFIG_INTEL_MEI_ME=m -CONFIG_INTEL_MEI_TXE=m -# CONFIG_VMWARE_VMCI is not set - -# -# Intel MIC Host Driver -# - -# -# Intel MIC Card Driver -# -# CONFIG_ECHO is not set -CONFIG_HAVE_IDE=y -# CONFIG_IDE is not set - -# -# SCSI device support -# -CONFIG_SCSI_MOD=y -CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y -CONFIG_SCSI_DMA=y -CONFIG_SCSI_TGT=m -CONFIG_SCSI_NETLINK=y -CONFIG_SCSI_PROC_FS=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=m -# CONFIG_CHR_DEV_OSST is not set -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=m -# CONFIG_CHR_DEV_SCH is not set -# CONFIG_SCSI_ENCLOSURE is not set -CONFIG_SCSI_MULTI_LUN=y -# CONFIG_SCSI_CONSTANTS is not set -# CONFIG_SCSI_LOGGING is not set -# CONFIG_SCSI_SCAN_ASYNC is not set - -# -# SCSI Transports -# -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=m -# CONFIG_SCSI_FC_TGT_ATTRS is not set -CONFIG_SCSI_ISCSI_ATTRS=m -CONFIG_SCSI_SAS_ATTRS=m -CONFIG_SCSI_SAS_LIBSAS=m -# CONFIG_SCSI_SAS_ATA is not set -CONFIG_SCSI_SAS_HOST_SMP=y -# CONFIG_SCSI_SRP_ATTRS is not set -CONFIG_SCSI_LOWLEVEL=y -CONFIG_ISCSI_TCP=m -CONFIG_ISCSI_BOOT_SYSFS=m -CONFIG_SCSI_CXGB3_ISCSI=m -CONFIG_SCSI_CXGB4_ISCSI=m -CONFIG_SCSI_BNX2_ISCSI=m -CONFIG_SCSI_BNX2X_FCOE=m -CONFIG_BE2ISCSI=m -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_HPSA=m -CONFIG_SCSI_3W_9XXX=m -CONFIG_SCSI_3W_SAS=m -# CONFIG_SCSI_7000FASST is not set -CONFIG_SCSI_ACARD=m -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=5000 -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_AIC79XX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC94XX=m -# CONFIG_AIC94XX_DEBUG is not set -CONFIG_SCSI_MVSAS=m -CONFIG_SCSI_MVSAS_DEBUG=y -# CONFIG_SCSI_MVSAS_TASKLET is not set -CONFIG_SCSI_MVUMI=m -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -# CONFIG_SCSI_IN2000 is not set -CONFIG_SCSI_ARCMSR=m -CONFIG_SCSI_ESAS2R=m -CONFIG_MEGARAID_NEWGEN=y -CONFIG_MEGARAID_MM=m -CONFIG_MEGARAID_MAILBOX=m -CONFIG_MEGARAID_LEGACY=m -CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_MPT2SAS=m -CONFIG_SCSI_MPT2SAS_MAX_SGE=128 -CONFIG_SCSI_MPT2SAS_LOGGING=y -CONFIG_SCSI_MPT3SAS=m -CONFIG_SCSI_MPT3SAS_MAX_SGE=128 -CONFIG_SCSI_MPT3SAS_LOGGING=y -CONFIG_SCSI_UFSHCD=m -CONFIG_SCSI_UFSHCD_PCI=m -CONFIG_SCSI_UFSHCD_PLATFORM=m -CONFIG_SCSI_HPTIOP=m -CONFIG_SCSI_BUSLOGIC=m -CONFIG_SCSI_FLASHPOINT=y -CONFIG_VMWARE_PVSCSI=m -CONFIG_LIBFC=m -CONFIG_LIBFCOE=m -CONFIG_FCOE=m -CONFIG_FCOE_FNIC=m -CONFIG_SCSI_DMX3191D=m -# CONFIG_SCSI_DTC3280 is not set -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -CONFIG_SCSI_EATA_LINKED_COMMANDS=y -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_ISCI=m -# CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -CONFIG_SCSI_IZIP_EPP16=y -CONFIG_SCSI_IZIP_SLOW_CTR=y -# CONFIG_SCSI_NCR53C406A is not set -CONFIG_SCSI_STEX=m -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -CONFIG_SCSI_SYM53C8XX_MMIO=y -CONFIG_SCSI_IPR=m -CONFIG_SCSI_IPR_TRACE=y -CONFIG_SCSI_IPR_DUMP=y -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_QLA_FC=m -CONFIG_SCSI_QLA_ISCSI=m -CONFIG_SCSI_LPFC=m -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -CONFIG_SCSI_DC395x=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -# CONFIG_SCSI_ULTRASTOR is not set -CONFIG_SCSI_NSP32=m -CONFIG_SCSI_DEBUG=m -CONFIG_SCSI_PMCRAID=m -CONFIG_SCSI_PM8001=m -CONFIG_SCSI_SRP=m -CONFIG_SCSI_BFA_FC=m -CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_CHELSIO_FCOE=m -# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set -# CONFIG_SCSI_DH is not set -# CONFIG_SCSI_OSD_INITIATOR is not set -CONFIG_ATA=y -# CONFIG_ATA_NONSTANDARD is not set -# CONFIG_ATA_VERBOSE_ERROR is not set -CONFIG_ATA_ACPI=y -# CONFIG_SATA_ZPODD is not set -CONFIG_SATA_PMP=y - -# -# Controllers with non-SFF native interface -# -CONFIG_SATA_AHCI=y -# CONFIG_SATA_AHCI_PLATFORM is not set -CONFIG_SATA_INIC162X=m -# CONFIG_SATA_ACARD_AHCI is not set -CONFIG_SATA_SIL24=m -CONFIG_ATA_SFF=y - -# -# SFF controllers with custom DMA interface -# -CONFIG_PDC_ADMA=m -CONFIG_SATA_QSTOR=m -CONFIG_SATA_SX4=m -CONFIG_ATA_BMDMA=y - -# -# SATA SFF controllers with BMDMA -# -CONFIG_ATA_PIIX=m -CONFIG_SATA_MV=m -CONFIG_SATA_NV=m -CONFIG_SATA_PROMISE=m -CONFIG_SATA_SIL=m -CONFIG_SATA_SIS=m -CONFIG_SATA_SVW=m -CONFIG_SATA_ULI=m -CONFIG_SATA_VIA=m -CONFIG_SATA_VITESSE=m - -# -# PATA SFF controllers with BMDMA -# -CONFIG_PATA_ALI=m -CONFIG_PATA_AMD=m -CONFIG_PATA_ARTOP=m -CONFIG_PATA_ATIIXP=m -CONFIG_PATA_ATP867X=m -CONFIG_PATA_CMD64X=m -CONFIG_PATA_CS5520=m -CONFIG_PATA_CS5530=m -CONFIG_PATA_CS5535=m -CONFIG_PATA_CS5536=m -CONFIG_PATA_CYPRESS=m -CONFIG_PATA_EFAR=m -CONFIG_PATA_HPT366=m -CONFIG_PATA_HPT37X=m -CONFIG_PATA_HPT3X2N=m -CONFIG_PATA_HPT3X3=m -CONFIG_PATA_HPT3X3_DMA=y -CONFIG_PATA_IT8213=m -CONFIG_PATA_IT821X=m -CONFIG_PATA_JMICRON=m -CONFIG_PATA_MARVELL=m -CONFIG_PATA_NETCELL=m -CONFIG_PATA_NINJA32=m -CONFIG_PATA_NS87415=m -CONFIG_PATA_OLDPIIX=m -CONFIG_PATA_OPTIDMA=m -CONFIG_PATA_PDC2027X=m -CONFIG_PATA_PDC_OLD=m -CONFIG_PATA_RADISYS=m -CONFIG_PATA_RDC=m -CONFIG_PATA_SC1200=m -CONFIG_PATA_SCH=m -CONFIG_PATA_SERVERWORKS=m -CONFIG_PATA_SIL680=m -CONFIG_PATA_SIS=m -CONFIG_PATA_TOSHIBA=m -CONFIG_PATA_TRIFLEX=m -CONFIG_PATA_VIA=m -CONFIG_PATA_WINBOND=m - -# -# PIO-only SFF controllers -# -CONFIG_PATA_CMD640_PCI=m -# CONFIG_PATA_ISAPNP is not set -CONFIG_PATA_MPIIX=m -CONFIG_PATA_NS87410=m -CONFIG_PATA_OPTI=m -CONFIG_PATA_PCMCIA=m -# CONFIG_PATA_QDI is not set -CONFIG_PATA_RZ1000=m -# CONFIG_PATA_WINBOND_VLB is not set - -# -# Generic fallback / legacy drivers -# -CONFIG_PATA_ACPI=m -CONFIG_ATA_GENERIC=m -CONFIG_PATA_LEGACY=m -CONFIG_MD=y -CONFIG_BLK_DEV_MD=m -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID10=m -CONFIG_MD_RAID456=m -# CONFIG_MD_MULTIPATH is not set -# CONFIG_MD_FAULTY is not set -CONFIG_BCACHE=m -# CONFIG_BCACHE_DEBUG is not set -# CONFIG_BCACHE_CLOSURES_DEBUG is not set -CONFIG_BLK_DEV_DM_BUILTIN=y -CONFIG_BLK_DEV_DM=m -# CONFIG_DM_DEBUG is not set -CONFIG_DM_BUFIO=m -CONFIG_DM_BIO_PRISON=m -CONFIG_DM_PERSISTENT_DATA=m -# CONFIG_DM_DEBUG_BLOCK_STACK_TRACING is not set -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_CACHE=m -CONFIG_DM_CACHE_MQ=m -CONFIG_DM_CACHE_CLEANER=m -CONFIG_DM_ERA=m -CONFIG_DM_MIRROR=m -# CONFIG_DM_LOG_USERSPACE is not set -CONFIG_DM_RAID=m -# CONFIG_DM_ZERO is not set -# CONFIG_DM_MULTIPATH is not set -# CONFIG_DM_DELAY is not set -# CONFIG_DM_UEVENT is not set -# CONFIG_DM_FLAKEY is not set -# CONFIG_DM_VERITY is not set -# CONFIG_DM_SWITCH is not set -# CONFIG_TARGET_CORE is not set -CONFIG_FUSION=y -CONFIG_FUSION_SPI=m -CONFIG_FUSION_FC=m -CONFIG_FUSION_SAS=m -CONFIG_FUSION_MAX_SGE=128 -CONFIG_FUSION_CTL=m -# CONFIG_FUSION_LOGGING is not set - -# -# IEEE 1394 (FireWire) support -# -CONFIG_FIREWIRE=m -CONFIG_FIREWIRE_OHCI=m -CONFIG_FIREWIRE_SBP2=m -# CONFIG_FIREWIRE_NET is not set -# CONFIG_FIREWIRE_NOSY is not set -# CONFIG_I2O is not set -CONFIG_MACINTOSH_DRIVERS=y -CONFIG_MAC_EMUMOUSEBTN=y -CONFIG_NETDEVICES=y -CONFIG_MII=m -CONFIG_NET_CORE=y -CONFIG_BONDING=m -CONFIG_DUMMY=m -# CONFIG_EQUALIZER is not set -# CONFIG_NET_FC is not set -CONFIG_IFB=m -CONFIG_NET_TEAM=m -CONFIG_NET_TEAM_MODE_BROADCAST=m -CONFIG_NET_TEAM_MODE_ROUNDROBIN=m -CONFIG_NET_TEAM_MODE_RANDOM=m -CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m -CONFIG_NET_TEAM_MODE_LOADBALANCE=m -# CONFIG_MACVLAN is not set -CONFIG_VXLAN=m -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -CONFIG_TUN=m -# CONFIG_VETH is not set -CONFIG_VIRTIO_NET=m -CONFIG_NLMON=m -CONFIG_SUNGEM_PHY=m -# CONFIG_ARCNET is not set - -# -# CAIF transport drivers -# -CONFIG_VHOST_NET=m -CONFIG_VHOST_RING=m -CONFIG_VHOST=m - -# -# Distributed Switch Architecture drivers -# -CONFIG_NET_DSA_MV88E6XXX=m -CONFIG_NET_DSA_MV88E6060=m -CONFIG_NET_DSA_MV88E6XXX_NEED_PPU=y -CONFIG_NET_DSA_MV88E6131=m -CONFIG_NET_DSA_MV88E6123_61_65=m -CONFIG_ETHERNET=y -CONFIG_MDIO=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL3=m -CONFIG_3C515=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_3C589=m -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -CONFIG_NET_VENDOR_ADAPTEC=y -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_NET_VENDOR_ALTEON=y -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_ALTERA_TSE=m -CONFIG_NET_VENDOR_AMD=y -CONFIG_AMD8111_ETH=m -CONFIG_LANCE=m -CONFIG_PCNET32=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_NI65=m -CONFIG_NET_VENDOR_ARC=y -CONFIG_NET_VENDOR_ATHEROS=y -CONFIG_ATL2=m -CONFIG_ATL1=m -CONFIG_ATL1E=m -CONFIG_ATL1C=m -CONFIG_ALX=m -CONFIG_NET_VENDOR_BROADCOM=y -CONFIG_B44=m -CONFIG_B44_PCI_AUTOSELECT=y -CONFIG_B44_PCICORE_AUTOSELECT=y -CONFIG_B44_PCI=y -CONFIG_BNX2=m -CONFIG_CNIC=m -CONFIG_TIGON3=m -CONFIG_BNX2X=m -CONFIG_NET_VENDOR_BROCADE=y -CONFIG_BNA=m -CONFIG_NET_CALXEDA_XGMAC=m -CONFIG_NET_VENDOR_CHELSIO=y -CONFIG_CHELSIO_T1=m -CONFIG_CHELSIO_T1_1G=y -CONFIG_CHELSIO_T3=m -CONFIG_CHELSIO_T4=m -CONFIG_CHELSIO_T4VF=m -CONFIG_NET_VENDOR_CIRRUS=y -CONFIG_CS89x0=m -CONFIG_CS89x0_PLATFORM=y -CONFIG_NET_VENDOR_CISCO=y -CONFIG_ENIC=m -CONFIG_CX_ECAT=m -CONFIG_DNET=m -CONFIG_NET_VENDOR_DEC=y -CONFIG_NET_TULIP=y -CONFIG_DE2104X=m -CONFIG_DE2104X_DSL=0 -CONFIG_TULIP=m -CONFIG_TULIP_MWI=y -CONFIG_TULIP_MMIO=y -CONFIG_TULIP_NAPI=y -CONFIG_TULIP_NAPI_HW_MITIGATION=y -CONFIG_DE4X5=m -CONFIG_WINBOND_840=m -CONFIG_DM9102=m -CONFIG_ULI526X=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_NET_VENDOR_DLINK=y -CONFIG_DL2K=m -CONFIG_SUNDANCE=m -CONFIG_SUNDANCE_MMIO=y -CONFIG_NET_VENDOR_EMULEX=y -# CONFIG_BE2NET is not set -CONFIG_NET_VENDOR_EXAR=y -CONFIG_S2IO=m -CONFIG_VXGE=m -# CONFIG_VXGE_DEBUG_TRACE_ALL is not set -CONFIG_NET_VENDOR_FUJITSU=y -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_NET_VENDOR_HP=y -CONFIG_HP100=m -CONFIG_NET_VENDOR_INTEL=y -CONFIG_E100=m -CONFIG_E1000=m -CONFIG_E1000E=m -CONFIG_IGB=m -CONFIG_IGB_HWMON=y -CONFIG_IGB_DCA=y -CONFIG_IGBVF=m -CONFIG_IXGB=m -CONFIG_IXGBE=m -CONFIG_IXGBE_HWMON=y -CONFIG_IXGBE_DCA=y -CONFIG_IXGBEVF=m -CONFIG_I40E=m -# CONFIG_I40E_VXLAN is not set -CONFIG_I40EVF=m -CONFIG_NET_VENDOR_I825XX=y -CONFIG_IP1000=m -CONFIG_JME=m -CONFIG_NET_VENDOR_MARVELL=y -CONFIG_MVMDIO=m -CONFIG_SKGE=m -CONFIG_SKGE_GENESIS=y -CONFIG_SKY2=m -CONFIG_NET_VENDOR_MELLANOX=y -CONFIG_MLX4_EN=m -CONFIG_MLX4_EN_VXLAN=y -CONFIG_MLX4_CORE=m -CONFIG_MLX4_DEBUG=y -# CONFIG_MLX5_CORE is not set -CONFIG_NET_VENDOR_MICREL=y -CONFIG_KS8842=m -CONFIG_KS8851_MLL=m -CONFIG_KSZ884X_PCI=m -CONFIG_NET_VENDOR_MYRI=y -CONFIG_MYRI10GE=m -CONFIG_MYRI10GE_DCA=y -CONFIG_FEALNX=m -CONFIG_NET_VENDOR_NATSEMI=y -CONFIG_NATSEMI=m -CONFIG_NS83820=m -CONFIG_NET_VENDOR_8390=y -CONFIG_PCMCIA_AXNET=m -CONFIG_NE2000=m -CONFIG_NE2K_PCI=m -CONFIG_PCMCIA_PCNET=m -CONFIG_ULTRA=m -CONFIG_WD80x3=m -CONFIG_NET_VENDOR_NVIDIA=y -CONFIG_FORCEDETH=m -CONFIG_NET_VENDOR_OKI=y -CONFIG_PCH_GBE=m -CONFIG_ETHOC=m -CONFIG_NET_PACKET_ENGINE=y -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_NET_VENDOR_QLOGIC=y -CONFIG_QLA3XXX=m -CONFIG_QLCNIC=m -# CONFIG_QLCNIC_VXLAN is not set -CONFIG_QLGE=m -CONFIG_NETXEN_NIC=m -CONFIG_NET_VENDOR_REALTEK=y -CONFIG_ATP=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -CONFIG_8139TOO_PIO=y -CONFIG_8139TOO_TUNE_TWISTER=y -CONFIG_8139TOO_8129=y -CONFIG_8139_OLD_RX_RESET=y -CONFIG_R8169=m -CONFIG_SH_ETH=m -CONFIG_NET_VENDOR_RDC=y -CONFIG_R6040=m -CONFIG_NET_VENDOR_SAMSUNG=y -CONFIG_SXGBE_ETH=m -CONFIG_NET_VENDOR_SEEQ=y -CONFIG_NET_VENDOR_SILAN=y -CONFIG_SC92031=m -CONFIG_NET_VENDOR_SIS=y -CONFIG_SIS900=m -CONFIG_SIS190=m -CONFIG_SFC=m -CONFIG_SFC_MCDI_MON=y -CONFIG_NET_VENDOR_SMSC=y -CONFIG_SMC9194=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_EPIC100=m -CONFIG_SMSC911X=m -# CONFIG_SMSC911X_ARCH_HOOKS is not set -CONFIG_SMSC9420=m -CONFIG_NET_VENDOR_STMICRO=y -CONFIG_STMMAC_ETH=m -CONFIG_STMMAC_PLATFORM=y -CONFIG_STMMAC_PCI=y -CONFIG_STMMAC_DA=y -CONFIG_NET_VENDOR_SUN=y -CONFIG_HAPPYMEAL=m -CONFIG_SUNGEM=m -CONFIG_CASSINI=m -# CONFIG_NIU is not set -CONFIG_NET_VENDOR_TEHUTI=y -CONFIG_TEHUTI=m -CONFIG_NET_VENDOR_TI=y -CONFIG_TLAN=m -CONFIG_NET_VENDOR_VIA=y -CONFIG_VIA_RHINE=m -CONFIG_VIA_RHINE_MMIO=y -CONFIG_VIA_VELOCITY=m -CONFIG_NET_VENDOR_WIZNET=y -CONFIG_WIZNET_W5100=m -CONFIG_WIZNET_W5300=m -# CONFIG_WIZNET_BUS_DIRECT is not set -# CONFIG_WIZNET_BUS_INDIRECT is not set -CONFIG_WIZNET_BUS_ANY=y -CONFIG_NET_VENDOR_XIRCOM=y -CONFIG_PCMCIA_XIRC2PS=m -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_NET_SB1000 is not set -CONFIG_PHYLIB=m - -# -# MII PHY device drivers -# -CONFIG_AT803X_PHY=m -# CONFIG_AMD_PHY is not set -# CONFIG_MARVELL_PHY is not set -# CONFIG_DAVICOM_PHY is not set -# CONFIG_QSEMI_PHY is not set -# CONFIG_LXT_PHY is not set -# CONFIG_CICADA_PHY is not set -# CONFIG_VITESSE_PHY is not set -CONFIG_SMSC_PHY=m -CONFIG_BROADCOM_PHY=m -CONFIG_BCM7XXX_PHY=m -CONFIG_BCM87XX_PHY=m -# CONFIG_ICPLUS_PHY is not set -# CONFIG_REALTEK_PHY is not set -# CONFIG_NATIONAL_PHY is not set -# CONFIG_STE10XP is not set -# CONFIG_LSI_ET1011C_PHY is not set -# CONFIG_MICREL_PHY is not set -CONFIG_MDIO_BITBANG=m -# CONFIG_PLIP is not set -CONFIG_PPP=y -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_MPPE=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPPOE=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_SLIP=m -CONFIG_SLHC=y -# CONFIG_SLIP_COMPRESSED is not set -# CONFIG_SLIP_SMART is not set -# CONFIG_SLIP_MODE_SLIP6 is not set - -# -# USB Network Adapters -# -CONFIG_USB_CATC=m -CONFIG_USB_KAWETH=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_RTL8152=m -CONFIG_USB_USBNET=m -CONFIG_USB_NET_AX8817X=m -CONFIG_USB_NET_AX88179_178A=m -CONFIG_USB_NET_CDCETHER=m -CONFIG_USB_NET_CDC_EEM=m -CONFIG_USB_NET_CDC_NCM=m -CONFIG_USB_NET_HUAWEI_CDC_NCM=m -CONFIG_USB_NET_CDC_MBIM=m -CONFIG_USB_NET_DM9601=m -CONFIG_USB_NET_SR9700=m -CONFIG_USB_NET_SR9800=m -CONFIG_USB_NET_SMSC75XX=m -CONFIG_USB_NET_SMSC95XX=m -CONFIG_USB_NET_GL620A=m -CONFIG_USB_NET_NET1080=m -CONFIG_USB_NET_PLUSB=m -CONFIG_USB_NET_MCS7830=m -CONFIG_USB_NET_RNDIS_HOST=m -CONFIG_USB_NET_CDC_SUBSET=m -CONFIG_USB_ALI_M5632=y -CONFIG_USB_AN2720=y -CONFIG_USB_BELKIN=y -CONFIG_USB_ARMLINUX=y -CONFIG_USB_EPSON2888=y -CONFIG_USB_KC2190=y -CONFIG_USB_NET_ZAURUS=m -# CONFIG_USB_NET_CX82310_ETH is not set -CONFIG_USB_NET_KALMIA=m -CONFIG_USB_NET_QMI_WWAN=m -CONFIG_USB_HSO=m -CONFIG_USB_NET_INT51X1=m -CONFIG_USB_IPHETH=m -CONFIG_USB_SIERRA_NET=m -CONFIG_USB_VL600=m -CONFIG_WLAN=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_LIBERTAS_THINFIRM=m -# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set -CONFIG_LIBERTAS_THINFIRM_USB=m -CONFIG_AIRO=m -CONFIG_ATMEL=m -CONFIG_PCI_ATMEL=m -CONFIG_PCMCIA_ATMEL=m -CONFIG_AT76C50X_USB=m -CONFIG_AIRO_CS=m -CONFIG_PCMCIA_WL3501=m -CONFIG_PRISM54=m -CONFIG_USB_ZD1201=m -CONFIG_USB_NET_RNDIS_WLAN=m -CONFIG_RTL8180=m -CONFIG_RTL8187=m -CONFIG_RTL8187_LEDS=y -CONFIG_ADM8211=m -CONFIG_MAC80211_HWSIM=m -CONFIG_MWL8K=m -CONFIG_ATH_COMMON=m -CONFIG_ATH_CARDS=m -# CONFIG_ATH_DEBUG is not set -CONFIG_ATH5K=m -# CONFIG_ATH5K_DEBUG is not set -CONFIG_ATH5K_PCI=y -CONFIG_ATH9K_HW=m -CONFIG_ATH9K_COMMON=m -CONFIG_ATH9K_BTCOEX_SUPPORT=y -CONFIG_ATH9K=m -CONFIG_ATH9K_PCI=y -CONFIG_ATH9K_AHB=y -# CONFIG_ATH9K_WOW is not set -CONFIG_ATH9K_RFKILL=y -CONFIG_ATH9K_HTC=m -# CONFIG_CARL9170 is not set -CONFIG_ATH6KL=m -CONFIG_ATH6KL_SDIO=m -CONFIG_ATH6KL_USB=m -# CONFIG_ATH6KL_DEBUG is not set -CONFIG_AR5523=m -CONFIG_WIL6210=m -CONFIG_WIL6210_ISR_COR=y -CONFIG_ATH10K=m -CONFIG_ATH10K_PCI=m -# CONFIG_ATH10K_DEBUG is not set -# CONFIG_ATH10K_DEBUGFS is not set -CONFIG_WCN36XX=m -# CONFIG_WCN36XX_DEBUGFS is not set -CONFIG_B43=m -CONFIG_B43_BCMA=y -CONFIG_B43_SSB=y -CONFIG_B43_PCI_AUTOSELECT=y -CONFIG_B43_PCICORE_AUTOSELECT=y -CONFIG_B43_PCMCIA=y -CONFIG_B43_SDIO=y -CONFIG_B43_BCMA_PIO=y -CONFIG_B43_PIO=y -CONFIG_B43_PHY_N=y -CONFIG_B43_PHY_LP=y -CONFIG_B43_PHY_HT=y -CONFIG_B43_LEDS=y -CONFIG_B43_HWRNG=y -# CONFIG_B43_DEBUG is not set -CONFIG_B43LEGACY=m -CONFIG_B43LEGACY_PCI_AUTOSELECT=y -CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y -CONFIG_B43LEGACY_LEDS=y -CONFIG_B43LEGACY_HWRNG=y -CONFIG_B43LEGACY_DEBUG=y -CONFIG_B43LEGACY_DMA=y -CONFIG_B43LEGACY_PIO=y -CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y -# CONFIG_B43LEGACY_DMA_MODE is not set -# CONFIG_B43LEGACY_PIO_MODE is not set -CONFIG_BRCMUTIL=m -CONFIG_BRCMSMAC=m -CONFIG_BRCMFMAC=m -CONFIG_BRCMFMAC_SDIO=y -CONFIG_BRCMFMAC_USB=y -# CONFIG_BRCM_TRACING is not set -# CONFIG_BRCMDBG is not set -CONFIG_HOSTAP=m -CONFIG_HOSTAP_FIRMWARE=y -CONFIG_HOSTAP_FIRMWARE_NVRAM=y -CONFIG_HOSTAP_PLX=m -CONFIG_HOSTAP_PCI=m -CONFIG_HOSTAP_CS=m -CONFIG_IPW2100=m -CONFIG_IPW2100_MONITOR=y -# CONFIG_IPW2100_DEBUG is not set -CONFIG_IPW2200=m -CONFIG_IPW2200_MONITOR=y -CONFIG_IPW2200_RADIOTAP=y -CONFIG_IPW2200_PROMISCUOUS=y -CONFIG_IPW2200_QOS=y -# CONFIG_IPW2200_DEBUG is not set -CONFIG_LIBIPW=m -# CONFIG_LIBIPW_DEBUG is not set -CONFIG_IWLWIFI=m -CONFIG_IWLDVM=m -CONFIG_IWLMVM=m -CONFIG_IWLWIFI_OPMODE_MODULAR=y -CONFIG_IWLWIFI_BCAST_FILTERING=y - -# -# Debugging Options -# -# CONFIG_IWLWIFI_DEBUG is not set -CONFIG_IWLEGACY=m -CONFIG_IWL4965=m -CONFIG_IWL3945=m - -# -# iwl3945 / iwl4965 Debugging Options -# -# CONFIG_IWLEGACY_DEBUG is not set -CONFIG_LIBERTAS=m -CONFIG_LIBERTAS_USB=m -CONFIG_LIBERTAS_CS=m -CONFIG_LIBERTAS_SDIO=m -# CONFIG_LIBERTAS_DEBUG is not set -# CONFIG_LIBERTAS_MESH is not set -CONFIG_HERMES=m -CONFIG_HERMES_PRISM=y -CONFIG_HERMES_CACHE_FW_ON_INIT=y -CONFIG_PLX_HERMES=m -CONFIG_TMD_HERMES=m -CONFIG_NORTEL_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_PCMCIA_SPECTRUM=m -CONFIG_ORINOCO_USB=m -CONFIG_P54_COMMON=m -CONFIG_P54_USB=m -CONFIG_P54_PCI=m -CONFIG_P54_LEDS=y -CONFIG_RT2X00=m -CONFIG_RT2400PCI=m -CONFIG_RT2500PCI=m -CONFIG_RT61PCI=m -CONFIG_RT2800PCI=m -CONFIG_RT2800PCI_RT33XX=y -CONFIG_RT2800PCI_RT35XX=y -CONFIG_RT2800PCI_RT53XX=y -CONFIG_RT2800PCI_RT3290=y -CONFIG_RT2500USB=m -CONFIG_RT73USB=m -CONFIG_RT2800USB=m -CONFIG_RT2800USB_RT33XX=y -CONFIG_RT2800USB_RT35XX=y -CONFIG_RT2800USB_RT3573=y -CONFIG_RT2800USB_RT53XX=y -CONFIG_RT2800USB_RT55XX=y -CONFIG_RT2800USB_UNKNOWN=y -CONFIG_RT2800_LIB=m -CONFIG_RT2800_LIB_MMIO=m -CONFIG_RT2X00_LIB_MMIO=m -CONFIG_RT2X00_LIB_PCI=m -CONFIG_RT2X00_LIB_USB=m -CONFIG_RT2X00_LIB=m -CONFIG_RT2X00_LIB_FIRMWARE=y -CONFIG_RT2X00_LIB_CRYPTO=y -CONFIG_RT2X00_LIB_LEDS=y -# CONFIG_RT2X00_DEBUG is not set -CONFIG_RTL_CARDS=m -CONFIG_RTL8192CE=m -CONFIG_RTL8192SE=m -CONFIG_RTL8192DE=m -CONFIG_RTL8723AE=m -CONFIG_RTL8723BE=m -CONFIG_RTL8188EE=m -CONFIG_RTL8192CU=m -CONFIG_RTLWIFI=m -CONFIG_RTLWIFI_PCI=m -CONFIG_RTLWIFI_USB=m -# CONFIG_RTLWIFI_DEBUG is not set -CONFIG_RTL8192C_COMMON=m -CONFIG_RTL8723_COMMON=m -CONFIG_RTLBTCOEXIST=m -# CONFIG_WL_TI is not set -CONFIG_ZD1211RW=m -# CONFIG_ZD1211RW_DEBUG is not set -CONFIG_MWIFIEX=m -CONFIG_MWIFIEX_SDIO=m -CONFIG_MWIFIEX_PCIE=m -CONFIG_MWIFIEX_USB=m -CONFIG_CW1200=m -CONFIG_CW1200_WLAN_SDIO=m -CONFIG_RSI_91X=m -# CONFIG_RSI_DEBUGFS is not set -CONFIG_RSI_SDIO=m -CONFIG_RSI_USB=m - -# -# WiMAX Wireless Broadband devices -# -CONFIG_WIMAX_I2400M=m -CONFIG_WIMAX_I2400M_USB=m -CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8 -# CONFIG_WAN is not set -CONFIG_VMXNET3=m -# CONFIG_ISDN is not set - -# -# Input device support -# -CONFIG_INPUT=y -CONFIG_INPUT_FF_MEMLESS=y -CONFIG_INPUT_POLLDEV=m -CONFIG_INPUT_SPARSEKMAP=m -CONFIG_INPUT_MATRIXKMAP=m - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -CONFIG_INPUT_KEYBOARD=y -# CONFIG_KEYBOARD_ADP5588 is not set -# CONFIG_KEYBOARD_ADP5589 is not set -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_KEYBOARD_QT1070 is not set -# CONFIG_KEYBOARD_QT2160 is not set -# CONFIG_KEYBOARD_LKKBD is not set -# CONFIG_KEYBOARD_TCA6416 is not set -# CONFIG_KEYBOARD_TCA8418 is not set -# CONFIG_KEYBOARD_LM8323 is not set -# CONFIG_KEYBOARD_LM8333 is not set -# CONFIG_KEYBOARD_MAX7359 is not set -# CONFIG_KEYBOARD_MCS is not set -# CONFIG_KEYBOARD_MPR121 is not set -# CONFIG_KEYBOARD_NEWTON is not set -# CONFIG_KEYBOARD_OPENCORES is not set -# CONFIG_KEYBOARD_STOWAWAY is not set -# CONFIG_KEYBOARD_SUNKBD is not set -# CONFIG_KEYBOARD_XTKBD is not set -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=y -CONFIG_MOUSE_PS2_ALPS=y -CONFIG_MOUSE_PS2_LOGIPS2PP=y -CONFIG_MOUSE_PS2_SYNAPTICS=y -CONFIG_MOUSE_PS2_CYPRESS=y -CONFIG_MOUSE_PS2_LIFEBOOK=y -CONFIG_MOUSE_PS2_TRACKPOINT=y -CONFIG_MOUSE_PS2_ELANTECH=y -CONFIG_MOUSE_PS2_SENTELIC=y -CONFIG_MOUSE_PS2_TOUCHKIT=y -# CONFIG_MOUSE_SERIAL is not set -CONFIG_MOUSE_APPLETOUCH=m -CONFIG_MOUSE_BCM5974=m -CONFIG_MOUSE_CYAPA=m -# CONFIG_MOUSE_INPORT is not set -# CONFIG_MOUSE_LOGIBM is not set -# CONFIG_MOUSE_PC110PAD is not set -# CONFIG_MOUSE_VSXXXAA is not set -# CONFIG_MOUSE_SYNAPTICS_I2C is not set -CONFIG_MOUSE_SYNAPTICS_USB=m -CONFIG_INPUT_JOYSTICK=y -CONFIG_JOYSTICK_ANALOG=m -CONFIG_JOYSTICK_A3D=m -CONFIG_JOYSTICK_ADI=m -CONFIG_JOYSTICK_COBRA=m -CONFIG_JOYSTICK_GF2K=m -CONFIG_JOYSTICK_GRIP=m -CONFIG_JOYSTICK_GRIP_MP=m -CONFIG_JOYSTICK_GUILLEMOT=m -CONFIG_JOYSTICK_INTERACT=m -CONFIG_JOYSTICK_SIDEWINDER=m -CONFIG_JOYSTICK_TMDC=m -CONFIG_JOYSTICK_IFORCE=m -CONFIG_JOYSTICK_IFORCE_USB=y -CONFIG_JOYSTICK_IFORCE_232=y -CONFIG_JOYSTICK_WARRIOR=m -CONFIG_JOYSTICK_MAGELLAN=m -CONFIG_JOYSTICK_SPACEORB=m -CONFIG_JOYSTICK_SPACEBALL=m -CONFIG_JOYSTICK_STINGER=m -CONFIG_JOYSTICK_TWIDJOY=m -CONFIG_JOYSTICK_ZHENHUA=m -CONFIG_JOYSTICK_DB9=m -CONFIG_JOYSTICK_GAMECON=m -CONFIG_JOYSTICK_TURBOGRAFX=m -CONFIG_JOYSTICK_AS5011=m -CONFIG_JOYSTICK_JOYDUMP=m -CONFIG_JOYSTICK_XPAD=m -CONFIG_JOYSTICK_XPAD_FF=y -CONFIG_JOYSTICK_XPAD_LEDS=y -# CONFIG_JOYSTICK_WALKERA0701 is not set -CONFIG_INPUT_TABLET=y -CONFIG_TABLET_USB_ACECAD=m -CONFIG_TABLET_USB_AIPTEK=m -CONFIG_TABLET_USB_GTCO=m -# CONFIG_TABLET_USB_HANWANG is not set -CONFIG_TABLET_USB_KBTAB=m -CONFIG_TABLET_USB_WACOM=m -# CONFIG_INPUT_TOUCHSCREEN is not set -CONFIG_INPUT_MISC=y -# CONFIG_INPUT_AD714X is not set -CONFIG_INPUT_BMA150=m -CONFIG_INPUT_PCSPKR=m -# CONFIG_INPUT_MMA8450 is not set -# CONFIG_INPUT_MPU3050 is not set -# CONFIG_INPUT_APANEL is not set -# CONFIG_INPUT_WISTRON_BTNS is not set -# CONFIG_INPUT_ATLAS_BTNS is not set -# CONFIG_INPUT_ATI_REMOTE2 is not set -# CONFIG_INPUT_KEYSPAN_REMOTE is not set -# CONFIG_INPUT_KXTJ9 is not set -# CONFIG_INPUT_POWERMATE is not set -CONFIG_INPUT_YEALINK=m -# CONFIG_INPUT_CM109 is not set -# CONFIG_INPUT_RETU_PWRBUTTON is not set -CONFIG_INPUT_UINPUT=m -# CONFIG_INPUT_PCF8574 is not set -# CONFIG_INPUT_ADXL34X is not set -# CONFIG_INPUT_IMS_PCU is not set -# CONFIG_INPUT_CMA3000 is not set -# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set - -# -# Hardware I/O ports -# -CONFIG_SERIO=y -CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y -CONFIG_SERIO_I8042=y -CONFIG_SERIO_SERPORT=m -# CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PARKBD is not set -# CONFIG_SERIO_PCIPS2 is not set -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set -# CONFIG_SERIO_ALTERA_PS2 is not set -# CONFIG_SERIO_PS2MULT is not set -# CONFIG_SERIO_ARC_PS2 is not set -CONFIG_GAMEPORT=m -# CONFIG_GAMEPORT_NS558 is not set -# CONFIG_GAMEPORT_L4 is not set -CONFIG_GAMEPORT_EMU10K1=m -# CONFIG_GAMEPORT_FM801 is not set - -# -# Character devices -# -CONFIG_TTY=y -CONFIG_VT=y -CONFIG_CONSOLE_TRANSLATIONS=y -CONFIG_VT_CONSOLE=y -CONFIG_VT_CONSOLE_SLEEP=y -CONFIG_HW_CONSOLE=y -CONFIG_VT_HW_CONSOLE_BINDING=y -CONFIG_UNIX98_PTYS=y -# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set -# CONFIG_LEGACY_PTYS is not set -CONFIG_SERIAL_NONSTANDARD=y -# CONFIG_ROCKETPORT is not set -# CONFIG_CYCLADES is not set -# CONFIG_MOXA_INTELLIO is not set -# CONFIG_MOXA_SMARTIO is not set -# CONFIG_SYNCLINK is not set -# CONFIG_SYNCLINKMP is not set -# CONFIG_SYNCLINK_GT is not set -# CONFIG_NOZOMI is not set -# CONFIG_ISI is not set -# CONFIG_N_HDLC is not set -# CONFIG_N_GSM is not set -# CONFIG_TRACE_SINK is not set -CONFIG_DEVKMEM=y - -# -# Serial drivers -# -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y -CONFIG_SERIAL_8250_PNP=y -# CONFIG_SERIAL_8250_CONSOLE is not set -CONFIG_FIX_EARLYCON_MEM=y -CONFIG_SERIAL_8250_DMA=y -CONFIG_SERIAL_8250_PCI=y -CONFIG_SERIAL_8250_CS=m -CONFIG_SERIAL_8250_NR_UARTS=4 -CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set -# CONFIG_SERIAL_8250_DW is not set - -# -# Non-8250 serial port support -# -# CONFIG_SERIAL_MFD_HSU is not set -CONFIG_SERIAL_CORE=y -# CONFIG_SERIAL_JSM is not set -# CONFIG_SERIAL_SCCNXP is not set -# CONFIG_SERIAL_TIMBERDALE is not set -# CONFIG_SERIAL_ALTERA_JTAGUART is not set -# CONFIG_SERIAL_ALTERA_UART is not set -# CONFIG_SERIAL_PCH_UART is not set -# CONFIG_SERIAL_ARC is not set -# CONFIG_SERIAL_RP2 is not set -# CONFIG_SERIAL_FSL_LPUART is not set -CONFIG_PRINTER=y -# CONFIG_LP_CONSOLE is not set -# CONFIG_PPDEV is not set -CONFIG_HVC_DRIVER=y -# CONFIG_VIRTIO_CONSOLE is not set -# CONFIG_IPMI_HANDLER is not set -CONFIG_HW_RANDOM=m -# CONFIG_HW_RANDOM_TIMERIOMEM is not set -CONFIG_HW_RANDOM_INTEL=m -CONFIG_HW_RANDOM_AMD=m -CONFIG_HW_RANDOM_GEODE=m -CONFIG_HW_RANDOM_VIA=m -# CONFIG_HW_RANDOM_VIRTIO is not set -CONFIG_NVRAM=m -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# PCMCIA character devices -# -# CONFIG_SYNCLINK_CS is not set -# CONFIG_CARDMAN_4000 is not set -# CONFIG_CARDMAN_4040 is not set -# CONFIG_IPWIRELESS is not set -CONFIG_MWAVE=m -# CONFIG_PC8736x_GPIO is not set -# CONFIG_NSC_GPIO is not set -# CONFIG_RAW_DRIVER is not set -# CONFIG_HPET is not set -# CONFIG_HANGCHECK_TIMER is not set -# CONFIG_TCG_TPM is not set -# CONFIG_TELCLOCK is not set -CONFIG_DEVPORT=y -CONFIG_I2C=m -CONFIG_I2C_BOARDINFO=y -CONFIG_I2C_COMPAT=y -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_MUX=m - -# -# Multiplexer I2C Chip support -# -# CONFIG_I2C_MUX_PCA9541 is not set -# CONFIG_I2C_MUX_PCA954x is not set -CONFIG_I2C_HELPER_AUTO=y -CONFIG_I2C_SMBUS=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_ALGOPCA=m - -# -# I2C Hardware Bus support -# - -# -# PC SMBus host controller drivers -# -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI1563=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_AMD756=m -CONFIG_I2C_AMD756_S4882=m -CONFIG_I2C_AMD8111=m -CONFIG_I2C_I801=m -CONFIG_I2C_ISCH=m -CONFIG_I2C_ISMT=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_NFORCE2=m -CONFIG_I2C_NFORCE2_S4985=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_SIS630=m -CONFIG_I2C_SIS96X=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m - -# -# ACPI drivers -# -CONFIG_I2C_SCMI=m - -# -# I2C system bus drivers (mostly embedded / system-on-chip) -# -# CONFIG_I2C_DESIGNWARE_PLATFORM is not set -# CONFIG_I2C_DESIGNWARE_PCI is not set -CONFIG_I2C_EG20T=m -CONFIG_I2C_OCORES=m -CONFIG_I2C_PCA_PLATFORM=m -# CONFIG_I2C_PXA_PCI is not set -CONFIG_I2C_SIMTEC=m -CONFIG_I2C_XILINX=m - -# -# External I2C/SMBus adapter drivers -# -# CONFIG_I2C_DIOLAN_U2C is not set -CONFIG_I2C_PARPORT=m -CONFIG_I2C_PARPORT_LIGHT=m -# CONFIG_I2C_ROBOTFUZZ_OSIF is not set -CONFIG_I2C_TAOS_EVM=m -CONFIG_I2C_TINY_USB=m -CONFIG_I2C_VIPERBOARD=m - -# -# Other I2C/SMBus bus drivers -# -# CONFIG_I2C_PCA_ISA is not set -CONFIG_SCx200_ACB=m -CONFIG_I2C_STUB=m -# CONFIG_I2C_DEBUG_CORE is not set -# CONFIG_I2C_DEBUG_ALGO is not set -# CONFIG_I2C_DEBUG_BUS is not set -# CONFIG_SPI is not set -CONFIG_SPMI=m -# CONFIG_HSI is not set - -# -# PPS support -# -CONFIG_PPS=m -# CONFIG_PPS_DEBUG is not set - -# -# PPS clients support -# -# CONFIG_PPS_CLIENT_KTIMER is not set -# CONFIG_PPS_CLIENT_LDISC is not set -# CONFIG_PPS_CLIENT_PARPORT is not set -# CONFIG_PPS_CLIENT_GPIO is not set - -# -# PPS generators support -# - -# -# PTP clock support -# -CONFIG_PTP_1588_CLOCK=m - -# -# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. -# -CONFIG_PTP_1588_CLOCK_PCH=m -CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y -# CONFIG_GPIOLIB is not set -# CONFIG_W1 is not set -CONFIG_POWER_SUPPLY=y -# CONFIG_POWER_SUPPLY_DEBUG is not set -# CONFIG_PDA_POWER is not set -# CONFIG_TEST_POWER is not set -# CONFIG_BATTERY_DS2780 is not set -# CONFIG_BATTERY_DS2781 is not set -# CONFIG_BATTERY_DS2782 is not set -# CONFIG_BATTERY_SBS is not set -# CONFIG_BATTERY_BQ27x00 is not set -# CONFIG_BATTERY_MAX17040 is not set -# CONFIG_BATTERY_MAX17042 is not set -CONFIG_CHARGER_ISP1704=m -# CONFIG_CHARGER_MAX8903 is not set -# CONFIG_CHARGER_LP8727 is not set -# CONFIG_CHARGER_BQ2415X is not set -# CONFIG_CHARGER_SMB347 is not set -# CONFIG_POWER_RESET is not set -# CONFIG_POWER_AVS is not set -CONFIG_HWMON=m -CONFIG_HWMON_VID=m -# CONFIG_HWMON_DEBUG_CHIP is not set - -# -# Native drivers -# -CONFIG_SENSORS_ABITUGURU=m -CONFIG_SENSORS_ABITUGURU3=m -CONFIG_SENSORS_AD7414=m -CONFIG_SENSORS_AD7418=m -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM1026=m -CONFIG_SENSORS_ADM1029=m -CONFIG_SENSORS_ADM1031=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_ADT7X10=m -CONFIG_SENSORS_ADT7410=m -CONFIG_SENSORS_ADT7411=m -CONFIG_SENSORS_ADT7462=m -CONFIG_SENSORS_ADT7470=m -CONFIG_SENSORS_ADT7475=m -CONFIG_SENSORS_ASC7621=m -CONFIG_SENSORS_K8TEMP=m -CONFIG_SENSORS_K10TEMP=m -CONFIG_SENSORS_FAM15H_POWER=m -CONFIG_SENSORS_APPLESMC=m -CONFIG_SENSORS_ASB100=m -CONFIG_SENSORS_ATXP1=m -CONFIG_SENSORS_DS620=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_I5K_AMB=m -CONFIG_SENSORS_F71805F=m -CONFIG_SENSORS_F71882FG=m -CONFIG_SENSORS_F75375S=m -CONFIG_SENSORS_FSCHMD=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_G760A=m -CONFIG_SENSORS_G762=m -CONFIG_SENSORS_HIH6130=m -CONFIG_SENSORS_CORETEMP=m -CONFIG_SENSORS_IT87=m -# CONFIG_SENSORS_JC42 is not set -CONFIG_SENSORS_LINEAGE=m -CONFIG_SENSORS_LTC2945=m -CONFIG_SENSORS_LTC4151=m -CONFIG_SENSORS_LTC4215=m -CONFIG_SENSORS_LTC4222=m -CONFIG_SENSORS_LTC4245=m -CONFIG_SENSORS_LTC4260=m -CONFIG_SENSORS_LTC4261=m -CONFIG_SENSORS_MAX16065=m -CONFIG_SENSORS_MAX1619=m -CONFIG_SENSORS_MAX1668=m -CONFIG_SENSORS_MAX197=m -CONFIG_SENSORS_MAX6639=m -CONFIG_SENSORS_MAX6642=m -CONFIG_SENSORS_MAX6650=m -CONFIG_SENSORS_MAX6697=m -CONFIG_SENSORS_HTU21=m -CONFIG_SENSORS_MCP3021=m -CONFIG_SENSORS_LM63=m -CONFIG_SENSORS_LM73=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM77=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM83=m -CONFIG_SENSORS_LM85=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM90=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_LM93=m -CONFIG_SENSORS_LM95234=m -CONFIG_SENSORS_LM95241=m -CONFIG_SENSORS_LM95245=m -CONFIG_SENSORS_PC87360=m -CONFIG_SENSORS_PC87427=m -CONFIG_SENSORS_NTC_THERMISTOR=m -CONFIG_SENSORS_NCT6775=m -CONFIG_SENSORS_PCF8591=m -CONFIG_PMBUS=m -CONFIG_SENSORS_PMBUS=m -CONFIG_SENSORS_ADM1275=m -CONFIG_SENSORS_LM25066=m -CONFIG_SENSORS_LTC2978=m -CONFIG_SENSORS_MAX16064=m -CONFIG_SENSORS_MAX34440=m -CONFIG_SENSORS_MAX8688=m -CONFIG_SENSORS_UCD9000=m -CONFIG_SENSORS_UCD9200=m -CONFIG_SENSORS_ZL6100=m -CONFIG_SENSORS_SHT21=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_DME1737=m -CONFIG_SENSORS_EMC1403=m -CONFIG_SENSORS_EMC2103=m -CONFIG_SENSORS_EMC6W201=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_SMSC47M192=m -CONFIG_SENSORS_SMSC47B397=m -CONFIG_SENSORS_SCH56XX_COMMON=m -CONFIG_SENSORS_SCH5627=m -CONFIG_SENSORS_SCH5636=m -CONFIG_SENSORS_SMM665=m -CONFIG_SENSORS_ADC128D818=m -CONFIG_SENSORS_ADS1015=m -CONFIG_SENSORS_ADS7828=m -CONFIG_SENSORS_AMC6821=m -CONFIG_SENSORS_INA209=m -CONFIG_SENSORS_INA2XX=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_TMP102=m -CONFIG_SENSORS_TMP401=m -CONFIG_SENSORS_TMP421=m -CONFIG_SENSORS_VIA_CPUTEMP=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_W83791D=m -CONFIG_SENSORS_W83792D=m -CONFIG_SENSORS_W83793=m -CONFIG_SENSORS_W83795=m -CONFIG_SENSORS_W83795_FANCTRL=y -CONFIG_SENSORS_W83L785TS=m -CONFIG_SENSORS_W83L786NG=m -CONFIG_SENSORS_W83627HF=m -CONFIG_SENSORS_W83627EHF=m - -# -# ACPI drivers -# -CONFIG_SENSORS_ACPI_POWER=m -CONFIG_SENSORS_ATK0110=m -CONFIG_THERMAL=y -CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y -# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set -# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set -# CONFIG_THERMAL_GOV_FAIR_SHARE is not set -CONFIG_THERMAL_GOV_STEP_WISE=y -CONFIG_THERMAL_GOV_USER_SPACE=y -# CONFIG_THERMAL_EMULATION is not set -CONFIG_INTEL_POWERCLAMP=m -CONFIG_X86_PKG_TEMP_THERMAL=m -CONFIG_ACPI_INT3403_THERMAL=m - -# -# Texas Instruments thermal drivers -# -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_CORE=y -# CONFIG_WATCHDOG_NOWAYOUT is not set - -# -# Watchdog Device Drivers -# -CONFIG_SOFT_WATCHDOG=m -CONFIG_XILINX_WATCHDOG=m -CONFIG_DW_WATCHDOG=m -CONFIG_RETU_WATCHDOG=m -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -# CONFIG_F71808E_WDT is not set -CONFIG_SP5100_TCO=m -CONFIG_SBC_FITPC2_WATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_IBMASR=m -CONFIG_WAFER_WDT=m -CONFIG_I6300ESB_WDT=m -CONFIG_IE6XX_WDT=m -CONFIG_ITCO_WDT=m -CONFIG_ITCO_VENDOR_SUPPORT=y -CONFIG_IT8712F_WDT=m -CONFIG_IT87_WDT=m -CONFIG_HP_WATCHDOG=m -# CONFIG_HPWDT_NMI_DECODING is not set -CONFIG_SC1200_WDT=m -CONFIG_PC87413_WDT=m -CONFIG_NV_TCO=m -CONFIG_60XX_WDT=m -CONFIG_SBC8360_WDT=m -CONFIG_SBC7240_WDT=m -CONFIG_CPU5_WDT=m -CONFIG_SMSC_SCH311X_WDT=m -CONFIG_SMSC37B787_WDT=m -CONFIG_VIA_WDT=m -CONFIG_W83627HF_WDT=m -CONFIG_W83697HF_WDT=m -CONFIG_W83697UG_WDT=m -CONFIG_W83877F_WDT=m -CONFIG_W83977F_WDT=m -CONFIG_MACHZ_WDT=m -CONFIG_SBC_EPX_C3_WATCHDOG=m - -# -# ISA-based Watchdog Cards -# -# CONFIG_PCWATCHDOG is not set -# CONFIG_MIXCOMWD is not set -# CONFIG_WDT is not set - -# -# PCI-based Watchdog Cards -# -CONFIG_PCIPCWATCHDOG=m -CONFIG_WDTPCI=m - -# -# USB-based Watchdog Cards -# -CONFIG_USBPCWATCHDOG=m -CONFIG_SSB_POSSIBLE=y - -# -# Sonics Silicon Backplane -# -CONFIG_SSB=m -CONFIG_SSB_SPROM=y -CONFIG_SSB_BLOCKIO=y -CONFIG_SSB_PCIHOST_POSSIBLE=y -CONFIG_SSB_PCIHOST=y -CONFIG_SSB_B43_PCI_BRIDGE=y -CONFIG_SSB_PCMCIAHOST_POSSIBLE=y -CONFIG_SSB_PCMCIAHOST=y -CONFIG_SSB_SDIOHOST_POSSIBLE=y -CONFIG_SSB_SDIOHOST=y -# CONFIG_SSB_DEBUG is not set -CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y -CONFIG_SSB_DRIVER_PCICORE=y -CONFIG_BCMA_POSSIBLE=y - -# -# Broadcom specific AMBA -# -CONFIG_BCMA=m -CONFIG_BCMA_BLOCKIO=y -CONFIG_BCMA_HOST_PCI_POSSIBLE=y -CONFIG_BCMA_HOST_PCI=y -CONFIG_BCMA_HOST_SOC=y -# CONFIG_BCMA_DRIVER_GMAC_CMN is not set -# CONFIG_BCMA_DEBUG is not set - -# -# Multifunction device drivers -# -CONFIG_MFD_CORE=m -# CONFIG_MFD_CS5535 is not set -CONFIG_MFD_BCM590XX=m -# CONFIG_MFD_CROS_EC is not set -# CONFIG_MFD_MC13XXX_I2C is not set -# CONFIG_HTC_PASIC3 is not set -CONFIG_LPC_ICH=m -CONFIG_LPC_SCH=m -# CONFIG_MFD_JANZ_CMODIO is not set -# CONFIG_MFD_KEMPLD is not set -CONFIG_MFD_VIPERBOARD=m -CONFIG_MFD_RETU=m -# CONFIG_MFD_PCF50633 is not set -# CONFIG_MFD_RDC321X is not set -# CONFIG_MFD_RTSX_PCI is not set -CONFIG_MFD_RTSX_USB=m -# CONFIG_MFD_SI476X_CORE is not set -# CONFIG_MFD_SM501 is not set -# CONFIG_ABX500_CORE is not set -CONFIG_MFD_SYSCON=y -# CONFIG_MFD_TI_AM335X_TSCADC is not set -# CONFIG_MFD_LP3943 is not set -# CONFIG_TPS6105X is not set -# CONFIG_TPS6507X is not set -# CONFIG_MFD_TPS65217 is not set -CONFIG_MFD_TPS65218=m -CONFIG_MFD_WL1273_CORE=m -# CONFIG_MFD_LM3533 is not set -# CONFIG_MFD_TMIO is not set -# CONFIG_MFD_VX855 is not set -# CONFIG_MFD_ARIZONA_I2C is not set -# CONFIG_REGULATOR is not set -CONFIG_MEDIA_SUPPORT=m - -# -# Multimedia core support -# -CONFIG_MEDIA_CAMERA_SUPPORT=y -CONFIG_MEDIA_ANALOG_TV_SUPPORT=y -CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y -# CONFIG_MEDIA_RADIO_SUPPORT is not set -# CONFIG_MEDIA_RC_SUPPORT is not set -# CONFIG_MEDIA_CONTROLLER is not set -CONFIG_VIDEO_DEV=m -CONFIG_VIDEO_V4L2=m -# CONFIG_VIDEO_ADV_DEBUG is not set -# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set -CONFIG_VIDEO_TUNER=m -CONFIG_VIDEOBUF_GEN=m -CONFIG_VIDEOBUF_DMA_SG=m -CONFIG_VIDEOBUF_VMALLOC=m -CONFIG_VIDEOBUF_DVB=m -CONFIG_VIDEOBUF2_CORE=m -CONFIG_VIDEOBUF2_MEMOPS=m -CONFIG_VIDEOBUF2_VMALLOC=m -CONFIG_DVB_CORE=m -CONFIG_DVB_NET=y -# CONFIG_TTPCI_EEPROM is not set -CONFIG_DVB_MAX_ADAPTERS=8 -# CONFIG_DVB_DYNAMIC_MINORS is not set - -# -# Media drivers -# -CONFIG_MEDIA_USB_SUPPORT=y - -# -# Webcam devices -# -CONFIG_USB_VIDEO_CLASS=m -CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y -CONFIG_USB_GSPCA=m -CONFIG_USB_M5602=m -CONFIG_USB_STV06XX=m -CONFIG_USB_GL860=m -CONFIG_USB_GSPCA_BENQ=m -CONFIG_USB_GSPCA_CONEX=m -CONFIG_USB_GSPCA_CPIA1=m -CONFIG_USB_GSPCA_ETOMS=m -CONFIG_USB_GSPCA_FINEPIX=m -CONFIG_USB_GSPCA_JEILINJ=m -CONFIG_USB_GSPCA_JL2005BCD=m -CONFIG_USB_GSPCA_KINECT=m -# CONFIG_USB_GSPCA_KONICA is not set -CONFIG_USB_GSPCA_MARS=m -CONFIG_USB_GSPCA_MR97310A=m -CONFIG_USB_GSPCA_NW80X=m -CONFIG_USB_GSPCA_OV519=m -CONFIG_USB_GSPCA_OV534=m -CONFIG_USB_GSPCA_OV534_9=m -CONFIG_USB_GSPCA_PAC207=m -CONFIG_USB_GSPCA_PAC7302=m -CONFIG_USB_GSPCA_PAC7311=m -CONFIG_USB_GSPCA_SE401=m -CONFIG_USB_GSPCA_SN9C2028=m -CONFIG_USB_GSPCA_SN9C20X=m -CONFIG_USB_GSPCA_SONIXB=m -CONFIG_USB_GSPCA_SONIXJ=m -CONFIG_USB_GSPCA_SPCA500=m -CONFIG_USB_GSPCA_SPCA501=m -CONFIG_USB_GSPCA_SPCA505=m -CONFIG_USB_GSPCA_SPCA506=m -CONFIG_USB_GSPCA_SPCA508=m -CONFIG_USB_GSPCA_SPCA561=m -CONFIG_USB_GSPCA_SPCA1528=m -CONFIG_USB_GSPCA_SQ905=m -CONFIG_USB_GSPCA_SQ905C=m -CONFIG_USB_GSPCA_SQ930X=m -CONFIG_USB_GSPCA_STK014=m -CONFIG_USB_GSPCA_STK1135=m -CONFIG_USB_GSPCA_STV0680=m -CONFIG_USB_GSPCA_SUNPLUS=m -CONFIG_USB_GSPCA_T613=m -CONFIG_USB_GSPCA_TOPRO=m -CONFIG_USB_GSPCA_TV8532=m -CONFIG_USB_GSPCA_VC032X=m -CONFIG_USB_GSPCA_VICAM=m -# CONFIG_USB_GSPCA_XIRLINK_CIT is not set -CONFIG_USB_GSPCA_ZC3XX=m -CONFIG_USB_PWC=m -# CONFIG_USB_PWC_DEBUG is not set -CONFIG_USB_PWC_INPUT_EVDEV=y -CONFIG_VIDEO_CPIA2=m -CONFIG_USB_ZR364XX=m -CONFIG_USB_STKWEBCAM=m -CONFIG_USB_S2255=m -CONFIG_VIDEO_USBTV=m - -# -# Analog TV USB devices -# -CONFIG_VIDEO_PVRUSB2=m -CONFIG_VIDEO_PVRUSB2_SYSFS=y -CONFIG_VIDEO_PVRUSB2_DVB=y -# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set -CONFIG_VIDEO_HDPVR=m -CONFIG_VIDEO_USBVISION=m -CONFIG_VIDEO_STK1160_COMMON=m -CONFIG_VIDEO_STK1160_AC97=y -CONFIG_VIDEO_STK1160=m - -# -# Analog/digital TV USB devices -# -CONFIG_VIDEO_AU0828=m -CONFIG_VIDEO_AU0828_V4L2=y - -# -# Digital TV USB devices -# -CONFIG_DVB_USB_V2=m -CONFIG_DVB_USB_AF9015=m -CONFIG_DVB_USB_AF9035=m -CONFIG_DVB_USB_ANYSEE=m -CONFIG_DVB_USB_AU6610=m -CONFIG_DVB_USB_AZ6007=m -CONFIG_DVB_USB_CE6230=m -CONFIG_DVB_USB_EC168=m -CONFIG_DVB_USB_GL861=m -CONFIG_DVB_USB_MXL111SF=m -CONFIG_DVB_USB_RTL28XXU=m -# CONFIG_DVB_TTUSB_BUDGET is not set -# CONFIG_DVB_TTUSB_DEC is not set -# CONFIG_SMS_USB_DRV is not set -# CONFIG_DVB_B2C2_FLEXCOP_USB is not set - -# -# Webcam, TV (analog/digital) USB devices -# -CONFIG_VIDEO_EM28XX=m -CONFIG_VIDEO_EM28XX_V4L2=m -CONFIG_VIDEO_EM28XX_ALSA=m -CONFIG_VIDEO_EM28XX_DVB=m -CONFIG_MEDIA_PCI_SUPPORT=y - -# -# Media capture support -# -CONFIG_VIDEO_MEYE=m - -# -# Media capture/analog TV support -# -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_DC30=m -CONFIG_VIDEO_ZORAN_ZR36060=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZORAN_LML33R10=m -CONFIG_VIDEO_ZORAN_AVS6EYES=m -CONFIG_VIDEO_HEXIUM_GEMINI=m -CONFIG_VIDEO_HEXIUM_ORION=m -CONFIG_VIDEO_MXB=m - -# -# Media capture/analog/hybrid TV support -# -# CONFIG_VIDEO_CX25821 is not set -CONFIG_VIDEO_SAA7134=m -CONFIG_VIDEO_SAA7134_ALSA=m -CONFIG_VIDEO_SAA7134_DVB=m -CONFIG_VIDEO_SAA7164=m - -# -# Media digital TV PCI Adapters -# -# CONFIG_DVB_AV7110 is not set -# CONFIG_DVB_BUDGET_CORE is not set -# CONFIG_DVB_B2C2_FLEXCOP_PCI is not set -# CONFIG_DVB_PLUTO2 is not set -# CONFIG_DVB_PT1 is not set -# CONFIG_DVB_NGENE is not set -# CONFIG_DVB_DDBRIDGE is not set -# CONFIG_V4L_PLATFORM_DRIVERS is not set -# CONFIG_V4L_MEM2MEM_DRIVERS is not set -# CONFIG_V4L_TEST_DRIVERS is not set - -# -# Supported MMC/SDIO adapters -# -CONFIG_SMS_SDIO_DRV=m -# CONFIG_MEDIA_PARPORT_SUPPORT is not set - -# -# Supported FireWire (IEEE 1394) Adapters -# -# CONFIG_DVB_FIREDTV is not set -CONFIG_MEDIA_COMMON_OPTIONS=y - -# -# common driver options -# -CONFIG_VIDEO_CX2341X=m -CONFIG_VIDEO_TVEEPROM=m -CONFIG_CYPRESS_FIRMWARE=m -CONFIG_VIDEO_SAA7146=m -CONFIG_VIDEO_SAA7146_VV=m -CONFIG_SMS_SIANO_MDTV=m - -# -# Media ancillary drivers (tuners, sensors, i2c, frontends) -# -CONFIG_MEDIA_SUBDRV_AUTOSELECT=y -CONFIG_MEDIA_ATTACH=y - -# -# Audio decoders, processors and mixers -# -CONFIG_VIDEO_TDA9840=m -CONFIG_VIDEO_TEA6415C=m -CONFIG_VIDEO_TEA6420=m -CONFIG_VIDEO_MSP3400=m -CONFIG_VIDEO_CS53L32A=m -CONFIG_VIDEO_WM8775=m - -# -# RDS decoders -# -CONFIG_VIDEO_SAA6588=m - -# -# Video decoders -# -CONFIG_VIDEO_BT819=m -CONFIG_VIDEO_BT856=m -CONFIG_VIDEO_BT866=m -CONFIG_VIDEO_KS0127=m -CONFIG_VIDEO_SAA7110=m -CONFIG_VIDEO_SAA711X=m -CONFIG_VIDEO_TVP5150=m -CONFIG_VIDEO_VPX3220=m - -# -# Video and audio decoders -# -CONFIG_VIDEO_CX25840=m - -# -# Video encoders -# -CONFIG_VIDEO_SAA7185=m -CONFIG_VIDEO_ADV7170=m -CONFIG_VIDEO_ADV7175=m - -# -# Camera sensor devices -# -CONFIG_VIDEO_MT9V011=m - -# -# Flash devices -# - -# -# Video improvement chips -# - -# -# Audio/Video compression chips -# -CONFIG_VIDEO_SAA6752HS=m - -# -# Miscellaneous helper chips -# - -# -# Sensors used on soc_camera driver -# -CONFIG_MEDIA_TUNER=m -CONFIG_MEDIA_TUNER_SIMPLE=m -CONFIG_MEDIA_TUNER_TDA8290=m -CONFIG_MEDIA_TUNER_TDA827X=m -CONFIG_MEDIA_TUNER_TDA18271=m -CONFIG_MEDIA_TUNER_TDA9887=m -CONFIG_MEDIA_TUNER_MT20XX=m -CONFIG_MEDIA_TUNER_MT2060=m -CONFIG_MEDIA_TUNER_MT2063=m -CONFIG_MEDIA_TUNER_QT1010=m -CONFIG_MEDIA_TUNER_XC2028=m -CONFIG_MEDIA_TUNER_XC5000=m -CONFIG_MEDIA_TUNER_XC4000=m -CONFIG_MEDIA_TUNER_MXL5005S=m -CONFIG_MEDIA_TUNER_MXL5007T=m -CONFIG_MEDIA_TUNER_MC44S803=m -CONFIG_MEDIA_TUNER_TDA18218=m -CONFIG_MEDIA_TUNER_FC0011=m -CONFIG_MEDIA_TUNER_FC0012=m -CONFIG_MEDIA_TUNER_FC0013=m -CONFIG_MEDIA_TUNER_TDA18212=m -CONFIG_MEDIA_TUNER_E4000=m -CONFIG_MEDIA_TUNER_FC2580=m -CONFIG_MEDIA_TUNER_M88TS2022=m -CONFIG_MEDIA_TUNER_TUA9001=m -CONFIG_MEDIA_TUNER_IT913X=m -CONFIG_MEDIA_TUNER_R820T=m - -# -# Multistandard (satellite) frontends -# -CONFIG_DVB_M88DS3103=m - -# -# Multistandard (cable + terrestrial) frontends -# -CONFIG_DVB_DRXK=m -CONFIG_DVB_TDA18271C2DD=m - -# -# DVB-S (satellite) frontends -# -CONFIG_DVB_MT312=m -CONFIG_DVB_ZL10036=m -CONFIG_DVB_ZL10039=m -CONFIG_DVB_STV6110=m -CONFIG_DVB_STV0900=m -CONFIG_DVB_TDA10086=m -CONFIG_DVB_TDA826X=m -CONFIG_DVB_CX24116=m -CONFIG_DVB_TDA10071=m - -# -# DVB-T (terrestrial) frontends -# -CONFIG_DVB_DRXD=m -CONFIG_DVB_TDA1004X=m -CONFIG_DVB_MT352=m -CONFIG_DVB_ZL10353=m -CONFIG_DVB_TDA10048=m -CONFIG_DVB_AF9013=m -CONFIG_DVB_EC100=m -CONFIG_DVB_CXD2820R=m -CONFIG_DVB_RTL2830=m -CONFIG_DVB_RTL2832=m - -# -# DVB-C (cable) frontends -# -CONFIG_DVB_TDA10023=m - -# -# ATSC (North American/Korean Terrestrial/Cable DTV) frontends -# -CONFIG_DVB_NXT200X=m -CONFIG_DVB_LGDT330X=m -CONFIG_DVB_LGDT3305=m -CONFIG_DVB_LG2160=m -CONFIG_DVB_S5H1409=m -CONFIG_DVB_AU8522=m -CONFIG_DVB_AU8522_DTV=m -CONFIG_DVB_AU8522_V4L=m -CONFIG_DVB_S5H1411=m - -# -# ISDB-T (terrestrial) frontends -# -CONFIG_DVB_S921=m -CONFIG_DVB_MB86A20S=m - -# -# Digital terrestrial only tuners/PLL -# -CONFIG_DVB_PLL=m - -# -# SEC control devices for DVB-S -# -CONFIG_DVB_DRX39XYJ=m -CONFIG_DVB_LNBP21=m -CONFIG_DVB_ISL6405=m -CONFIG_DVB_ISL6421=m -CONFIG_DVB_ISL6423=m -CONFIG_DVB_A8293=m -CONFIG_DVB_AF9033=m - -# -# Tools to develop new frontends -# -# CONFIG_DVB_DUMMY_FE is not set - -# -# Graphics support -# -CONFIG_AGP=y -# CONFIG_AGP_ALI is not set -CONFIG_AGP_ATI=m -CONFIG_AGP_AMD=m -CONFIG_AGP_AMD64=m -CONFIG_AGP_INTEL=m -CONFIG_AGP_NVIDIA=m -CONFIG_AGP_SIS=m -# CONFIG_AGP_SWORKS is not set -CONFIG_AGP_VIA=m -# CONFIG_AGP_EFFICEON is not set -CONFIG_INTEL_GTT=m -CONFIG_VGA_ARB=y -CONFIG_VGA_ARB_MAX_GPUS=16 -CONFIG_VGA_SWITCHEROO=y - -# -# Direct Rendering Manager -# -CONFIG_DRM=m -CONFIG_DRM_USB=m -CONFIG_DRM_KMS_HELPER=m -CONFIG_DRM_KMS_FB_HELPER=y -# CONFIG_DRM_LOAD_EDID_FIRMWARE is not set -CONFIG_DRM_TTM=m - -# -# I2C encoder or helper chips -# -CONFIG_DRM_I2C_CH7006=m -CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_I2C_NXP_TDA998X=m -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -# CONFIG_DRM_RADEON_UMS is not set -CONFIG_DRM_NOUVEAU=m -CONFIG_NOUVEAU_DEBUG=5 -CONFIG_NOUVEAU_DEBUG_DEFAULT=3 -CONFIG_DRM_NOUVEAU_BACKLIGHT=y -CONFIG_DRM_I915=m -CONFIG_DRM_I915_KMS=y -CONFIG_DRM_I915_FBDEV=y -CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT=y -# CONFIG_DRM_I915_UMS is not set -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m -CONFIG_DRM_VIA=m -CONFIG_DRM_SAVAGE=m -# CONFIG_DRM_VMWGFX is not set -CONFIG_DRM_GMA500=m -CONFIG_DRM_GMA600=y -CONFIG_DRM_GMA3600=y -CONFIG_DRM_UDL=m -# CONFIG_DRM_AST is not set -# CONFIG_DRM_MGAG200 is not set -# CONFIG_DRM_CIRRUS_QEMU is not set -# CONFIG_DRM_QXL is not set -# CONFIG_DRM_BOCHS is not set -CONFIG_DRM_PTN3460=m - -# -# Frame buffer Devices -# -CONFIG_FB=y -# CONFIG_FIRMWARE_EDID is not set -# CONFIG_FB_DDC is not set -# CONFIG_FB_BOOT_VESA_SUPPORT is not set -CONFIG_FB_CFB_FILLRECT=m -CONFIG_FB_CFB_COPYAREA=m -CONFIG_FB_CFB_IMAGEBLIT=m -# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set -CONFIG_FB_SYS_FILLRECT=m -CONFIG_FB_SYS_COPYAREA=m -CONFIG_FB_SYS_IMAGEBLIT=m -# CONFIG_FB_FOREIGN_ENDIAN is not set -CONFIG_FB_SYS_FOPS=m -CONFIG_FB_DEFERRED_IO=y -# CONFIG_FB_SVGALIB is not set -# CONFIG_FB_MACMODES is not set -CONFIG_FB_BACKLIGHT=y -CONFIG_FB_MODE_HELPERS=y -# CONFIG_FB_TILEBLITTING is not set - -# -# Frame buffer hardware drivers -# -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ARC is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_VGA16 is not set -CONFIG_FB_UVESA=m -# CONFIG_FB_VESA is not set -# CONFIG_FB_EFI is not set -# CONFIG_FB_N411 is not set -# CONFIG_FB_HGA is not set -# CONFIG_FB_OPENCORES is not set -# CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_I740 is not set -# CONFIG_FB_I810 is not set -# CONFIG_FB_LE80578 is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_S3 is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_VIA is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_VT8623 is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_ARK is not set -# CONFIG_FB_PM3 is not set -# CONFIG_FB_CARMINE is not set -# CONFIG_FB_GEODE is not set -# CONFIG_FB_TMIO is not set -# CONFIG_FB_SMSCUFX is not set -# CONFIG_FB_UDL is not set -# CONFIG_FB_GOLDFISH is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FB_METRONOME is not set -# CONFIG_FB_MB862XX is not set -# CONFIG_FB_BROADSHEET is not set -# CONFIG_FB_AUO_K190X is not set -# CONFIG_FB_SIMPLE is not set -# CONFIG_EXYNOS_VIDEO is not set -CONFIG_BACKLIGHT_LCD_SUPPORT=y -CONFIG_LCD_CLASS_DEVICE=m -# CONFIG_LCD_PLATFORM is not set -CONFIG_BACKLIGHT_CLASS_DEVICE=y -CONFIG_BACKLIGHT_GENERIC=m -CONFIG_BACKLIGHT_APPLE=m -# CONFIG_BACKLIGHT_SAHARA is not set -# CONFIG_BACKLIGHT_ADP8860 is not set -CONFIG_BACKLIGHT_ADP8870=m -CONFIG_BACKLIGHT_LM3630A=m -CONFIG_BACKLIGHT_LM3639=m -CONFIG_BACKLIGHT_LP855X=m -CONFIG_BACKLIGHT_LV5207LP=m -CONFIG_BACKLIGHT_BD6107=m -# CONFIG_VGASTATE is not set -CONFIG_HDMI=y - -# -# Console display driver support -# -CONFIG_VGA_CONSOLE=y -# CONFIG_VGACON_SOFT_SCROLLBACK is not set -# CONFIG_MDA_CONSOLE is not set -CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set -CONFIG_FB_CON_DECOR=y -# CONFIG_LOGO is not set -CONFIG_SOUND=y -CONFIG_SOUND_OSS_CORE=y -CONFIG_SOUND_OSS_CORE_PRECLAIM=y -CONFIG_SND=m -CONFIG_SND_TIMER=m -CONFIG_SND_PCM=m -CONFIG_SND_HWDEP=m -CONFIG_SND_RAWMIDI=m -CONFIG_SND_JACK=y -CONFIG_SND_SEQUENCER=m -# CONFIG_SND_SEQ_DUMMY is not set -CONFIG_SND_OSSEMUL=y -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_PCM_OSS_PLUGINS=y -# CONFIG_SND_SEQUENCER_OSS is not set -# CONFIG_SND_HRTIMER is not set -CONFIG_SND_DYNAMIC_MINORS=y -CONFIG_SND_MAX_CARDS=32 -# CONFIG_SND_SUPPORT_OLD_API is not set -CONFIG_SND_VERBOSE_PROCFS=y -# CONFIG_SND_VERBOSE_PRINTK is not set -# CONFIG_SND_DEBUG is not set -CONFIG_SND_VMASTER=y -CONFIG_SND_KCTL_JACK=y -CONFIG_SND_DMA_SGBUF=y -CONFIG_SND_RAWMIDI_SEQ=m -CONFIG_SND_OPL3_LIB_SEQ=m -CONFIG_SND_OPL4_LIB_SEQ=m -CONFIG_SND_SBAWE_SEQ=m -CONFIG_SND_EMU10K1_SEQ=m -CONFIG_SND_MPU401_UART=m -CONFIG_SND_OPL3_LIB=m -CONFIG_SND_OPL4_LIB=m -CONFIG_SND_VX_LIB=m -CONFIG_SND_AC97_CODEC=m -CONFIG_SND_DRIVERS=y -# CONFIG_SND_PCSP is not set -# CONFIG_SND_DUMMY is not set -# CONFIG_SND_ALOOP is not set -# CONFIG_SND_VIRMIDI is not set -# CONFIG_SND_MTPAV is not set -# CONFIG_SND_MTS64 is not set -# CONFIG_SND_SERIAL_U16550 is not set -# CONFIG_SND_MPU401 is not set -# CONFIG_SND_PORTMAN2X4 is not set -CONFIG_SND_AC97_POWER_SAVE=y -CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 -CONFIG_SND_WSS_LIB=m -CONFIG_SND_SB_COMMON=m -CONFIG_SND_SB8_DSP=m -CONFIG_SND_SB16_DSP=m -CONFIG_SND_ISA=y -CONFIG_SND_ADLIB=m -CONFIG_SND_AD1816A=m -CONFIG_SND_AD1848=m -CONFIG_SND_ALS100=m -CONFIG_SND_AZT1605=m -CONFIG_SND_AZT2316=m -CONFIG_SND_AZT2320=m -CONFIG_SND_CMI8328=m -CONFIG_SND_CMI8330=m -CONFIG_SND_CS4231=m -CONFIG_SND_CS4236=m -CONFIG_SND_ES1688=m -CONFIG_SND_ES18XX=m -CONFIG_SND_SC6000=m -CONFIG_SND_GUSCLASSIC=m -CONFIG_SND_GUSEXTREME=m -CONFIG_SND_GUSMAX=m -CONFIG_SND_INTERWAVE=m -CONFIG_SND_INTERWAVE_STB=m -CONFIG_SND_JAZZ16=m -CONFIG_SND_OPL3SA2=m -CONFIG_SND_OPTI92X_AD1848=m -CONFIG_SND_OPTI92X_CS4231=m -CONFIG_SND_OPTI93X=m -CONFIG_SND_MIRO=m -CONFIG_SND_SB8=m -CONFIG_SND_SB16=m -CONFIG_SND_SBAWE=m -CONFIG_SND_SB16_CSP=y -CONFIG_SND_SSCAPE=m -CONFIG_SND_WAVEFRONT=m -CONFIG_SND_MSND_PINNACLE=m -CONFIG_SND_MSND_CLASSIC=m -CONFIG_SND_PCI=y -CONFIG_SND_AD1889=m -CONFIG_SND_ALS300=m -CONFIG_SND_ALS4000=m -CONFIG_SND_ALI5451=m -CONFIG_SND_ASIHPI=m -CONFIG_SND_ATIIXP=m -CONFIG_SND_ATIIXP_MODEM=m -CONFIG_SND_AU8810=m -CONFIG_SND_AU8820=m -CONFIG_SND_AU8830=m -CONFIG_SND_AW2=m -CONFIG_SND_AZT3328=m -CONFIG_SND_BT87X=m -CONFIG_SND_BT87X_OVERCLOCK=y -CONFIG_SND_CA0106=m -CONFIG_SND_CMIPCI=m -CONFIG_SND_OXYGEN_LIB=m -CONFIG_SND_OXYGEN=m -CONFIG_SND_CS4281=m -CONFIG_SND_CS46XX=m -CONFIG_SND_CS46XX_NEW_DSP=y -CONFIG_SND_CS5530=m -CONFIG_SND_CS5535AUDIO=m -CONFIG_SND_CTXFI=m -CONFIG_SND_DARLA20=m -CONFIG_SND_GINA20=m -CONFIG_SND_LAYLA20=m -CONFIG_SND_DARLA24=m -CONFIG_SND_GINA24=m -CONFIG_SND_LAYLA24=m -CONFIG_SND_MONA=m -CONFIG_SND_MIA=m -CONFIG_SND_ECHO3G=m -CONFIG_SND_INDIGO=m -CONFIG_SND_INDIGOIO=m -CONFIG_SND_INDIGODJ=m -CONFIG_SND_INDIGOIOX=m -CONFIG_SND_INDIGODJX=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_EMU10K1X=m -CONFIG_SND_ENS1370=m -CONFIG_SND_ENS1371=m -CONFIG_SND_ES1938=m -CONFIG_SND_ES1968=m -CONFIG_SND_ES1968_INPUT=y -CONFIG_SND_FM801=m -CONFIG_SND_HDSP=m -CONFIG_SND_HDSPM=m -CONFIG_SND_ICE1712=m -CONFIG_SND_ICE1724=m -CONFIG_SND_INTEL8X0=m -CONFIG_SND_INTEL8X0M=m -CONFIG_SND_KORG1212=m -CONFIG_SND_LOLA=m -CONFIG_SND_LX6464ES=m -CONFIG_SND_MAESTRO3=m -CONFIG_SND_MAESTRO3_INPUT=y -CONFIG_SND_MIXART=m -CONFIG_SND_NM256=m -CONFIG_SND_PCXHR=m -CONFIG_SND_RIPTIDE=m -CONFIG_SND_RME32=m -CONFIG_SND_RME96=m -CONFIG_SND_RME9652=m -CONFIG_SND_SIS7019=m -CONFIG_SND_SONICVIBES=m -CONFIG_SND_TRIDENT=m -CONFIG_SND_VIA82XX=m -CONFIG_SND_VIA82XX_MODEM=m -CONFIG_SND_VIRTUOSO=m -CONFIG_SND_VX222=m -CONFIG_SND_YMFPCI=m - -# -# HD-Audio -# -CONFIG_SND_HDA=m -CONFIG_SND_HDA_INTEL=m -CONFIG_SND_HDA_DSP_LOADER=y -CONFIG_SND_HDA_PREALLOC_SIZE=4096 -CONFIG_SND_HDA_HWDEP=y -CONFIG_SND_HDA_RECONFIG=y -CONFIG_SND_HDA_INPUT_BEEP=y -CONFIG_SND_HDA_INPUT_BEEP_MODE=1 -CONFIG_SND_HDA_INPUT_JACK=y -CONFIG_SND_HDA_PATCH_LOADER=y -CONFIG_SND_HDA_CODEC_REALTEK=m -CONFIG_SND_HDA_CODEC_ANALOG=m -CONFIG_SND_HDA_CODEC_SIGMATEL=m -CONFIG_SND_HDA_CODEC_VIA=m -CONFIG_SND_HDA_CODEC_HDMI=m -CONFIG_SND_HDA_I915=y -CONFIG_SND_HDA_CODEC_CIRRUS=m -CONFIG_SND_HDA_CODEC_CONEXANT=m -CONFIG_SND_HDA_CODEC_CA0110=m -CONFIG_SND_HDA_CODEC_CA0132=m -CONFIG_SND_HDA_CODEC_CA0132_DSP=y -CONFIG_SND_HDA_CODEC_CMEDIA=m -CONFIG_SND_HDA_CODEC_SI3054=m -CONFIG_SND_HDA_GENERIC=m -CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 -CONFIG_SND_USB=y -CONFIG_SND_USB_AUDIO=m -CONFIG_SND_USB_UA101=m -CONFIG_SND_USB_USX2Y=m -CONFIG_SND_USB_CAIAQ=m -CONFIG_SND_USB_CAIAQ_INPUT=y -CONFIG_SND_USB_US122L=m -CONFIG_SND_USB_6FIRE=m -CONFIG_SND_USB_HIFACE=m -CONFIG_SND_FIREWIRE=y -CONFIG_SND_FIREWIRE_LIB=m -# CONFIG_SND_DICE is not set -CONFIG_SND_FIREWIRE_SPEAKERS=m -CONFIG_SND_ISIGHT=m -CONFIG_SND_SCS1X=m -CONFIG_SND_PCMCIA=y -# CONFIG_SND_VXPOCKET is not set -# CONFIG_SND_PDAUDIOCF is not set -# CONFIG_SND_SOC is not set -# CONFIG_SOUND_PRIME is not set -CONFIG_AC97_BUS=m - -# -# HID support -# -CONFIG_HID=y -# CONFIG_HID_BATTERY_STRENGTH is not set -CONFIG_HIDRAW=y -CONFIG_UHID=m -CONFIG_HID_GENERIC=m - -# -# Special HID drivers -# -CONFIG_HID_A4TECH=y -CONFIG_HID_ACRUX=m -CONFIG_HID_ACRUX_FF=y -CONFIG_HID_APPLE=y -# CONFIG_HID_APPLEIR is not set -CONFIG_HID_AUREAL=m -CONFIG_HID_BELKIN=y -CONFIG_HID_CHERRY=y -CONFIG_HID_CHICONY=y -CONFIG_HID_PRODIKEYS=m -CONFIG_HID_CYPRESS=y -CONFIG_HID_DRAGONRISE=m -CONFIG_DRAGONRISE_FF=y -CONFIG_HID_EMS_FF=m -CONFIG_HID_ELECOM=m -CONFIG_HID_ELO=m -CONFIG_HID_EZKEY=y -CONFIG_HID_HOLTEK=m -CONFIG_HOLTEK_FF=y -CONFIG_HID_HUION=m -CONFIG_HID_KEYTOUCH=m -CONFIG_HID_KYE=y -CONFIG_HID_UCLOGIC=m -CONFIG_HID_WALTOP=m -CONFIG_HID_GYRATION=m -CONFIG_HID_ICADE=m -CONFIG_HID_TWINHAN=m -CONFIG_HID_KENSINGTON=y -CONFIG_HID_LCPOWER=m -CONFIG_HID_LENOVO_TPKBD=m -CONFIG_HID_LOGITECH=y -CONFIG_HID_LOGITECH_DJ=m -CONFIG_LOGITECH_FF=y -CONFIG_LOGIRUMBLEPAD2_FF=y -CONFIG_LOGIG940_FF=y -CONFIG_LOGIWHEELS_FF=y -CONFIG_HID_MAGICMOUSE=m -CONFIG_HID_MICROSOFT=y -CONFIG_HID_MONTEREY=y -CONFIG_HID_MULTITOUCH=m -CONFIG_HID_NTRIG=m -CONFIG_HID_ORTEK=m -CONFIG_HID_PANTHERLORD=m -CONFIG_PANTHERLORD_FF=y -CONFIG_HID_PETALYNX=m -CONFIG_HID_PICOLCD=m -CONFIG_HID_PICOLCD_FB=y -CONFIG_HID_PICOLCD_BACKLIGHT=y -CONFIG_HID_PICOLCD_LCD=y -CONFIG_HID_PICOLCD_LEDS=y -CONFIG_HID_PRIMAX=m -CONFIG_HID_ROCCAT=m -CONFIG_HID_SAITEK=m -CONFIG_HID_SAMSUNG=m -CONFIG_HID_SONY=m -# CONFIG_SONY_FF is not set -CONFIG_HID_SPEEDLINK=m -CONFIG_HID_STEELSERIES=m -CONFIG_HID_SUNPLUS=m -CONFIG_HID_GREENASIA=m -CONFIG_GREENASIA_FF=y -CONFIG_HID_SMARTJOYPLUS=y -CONFIG_SMARTJOYPLUS_FF=y -CONFIG_HID_TIVO=m -CONFIG_HID_TOPSEED=y -CONFIG_HID_THINGM=m -CONFIG_HID_THRUSTMASTER=y -CONFIG_THRUSTMASTER_FF=y -CONFIG_HID_WACOM=m -CONFIG_HID_WIIMOTE=m -CONFIG_HID_XINMO=m -CONFIG_HID_ZEROPLUS=y -CONFIG_ZEROPLUS_FF=y -CONFIG_HID_ZYDACRON=m -CONFIG_HID_SENSOR_HUB=m - -# -# USB HID support -# -CONFIG_USB_HID=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y - -# -# I2C HID support -# -CONFIG_I2C_HID=m -CONFIG_USB_OHCI_LITTLE_ENDIAN=y -CONFIG_USB_SUPPORT=y -CONFIG_USB_COMMON=y -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB=y -# CONFIG_USB_DEBUG is not set -CONFIG_USB_ANNOUNCE_NEW_DEVICES=y - -# -# Miscellaneous USB options -# -CONFIG_USB_DEFAULT_PERSIST=y -# CONFIG_USB_DYNAMIC_MINORS is not set -CONFIG_USB_OTG=y -CONFIG_USB_OTG_WHITELIST=y -# CONFIG_USB_OTG_BLACKLIST_HUB is not set -CONFIG_USB_MON=y -# CONFIG_USB_WUSB_CBAF is not set - -# -# USB Host Controller Drivers -# -# CONFIG_USB_C67X00_HCD is not set -CONFIG_USB_XHCI_HCD=m -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_EHCI_TT_NEWSCHED=y -CONFIG_USB_EHCI_PCI=y -CONFIG_USB_EHCI_HCD_PLATFORM=y -# CONFIG_USB_OXU210HP_HCD is not set -CONFIG_USB_ISP116X_HCD=m -# CONFIG_USB_ISP1760_HCD is not set -# CONFIG_USB_ISP1362_HCD is not set -# CONFIG_USB_FUSBH200_HCD is not set -# CONFIG_USB_FOTG210_HCD is not set -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_OHCI_HCD_PCI=m -# CONFIG_USB_OHCI_HCD_SSB is not set -CONFIG_USB_OHCI_HCD_PLATFORM=m -CONFIG_USB_UHCI_HCD=m -# CONFIG_USB_SL811_HCD is not set -# CONFIG_USB_R8A66597_HCD is not set -CONFIG_USB_HCD_BCMA=m -CONFIG_USB_HCD_SSB=m -# CONFIG_USB_HCD_TEST_MODE is not set -# CONFIG_USB_RENESAS_USBHS is not set - -# -# USB Device Class drivers -# -CONFIG_USB_ACM=m -# CONFIG_USB_PRINTER is not set -CONFIG_USB_WDM=m -# CONFIG_USB_TMC is not set - -# -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may -# - -# -# also be needed; see USB_STORAGE Help for more info -# -CONFIG_USB_STORAGE=y -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_REALTEK=m -CONFIG_REALTEK_AUTOPM=y -# CONFIG_USB_STORAGE_DATAFAB is not set -# CONFIG_USB_STORAGE_FREECOM is not set -# CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_USBAT is not set -# CONFIG_USB_STORAGE_SDDR09 is not set -# CONFIG_USB_STORAGE_SDDR55 is not set -# CONFIG_USB_STORAGE_JUMPSHOT is not set -# CONFIG_USB_STORAGE_ALAUDA is not set -# CONFIG_USB_STORAGE_ONETOUCH is not set -# CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set -# CONFIG_USB_STORAGE_ENE_UB6250 is not set -CONFIG_USB_UAS=m - -# -# USB Imaging devices -# -# CONFIG_USB_MDC800 is not set -# CONFIG_USB_MICROTEK is not set -CONFIG_USB_MUSB_HDRC=m -# CONFIG_USB_MUSB_HOST is not set -# CONFIG_USB_MUSB_GADGET is not set -CONFIG_USB_MUSB_DUAL_ROLE=y -CONFIG_USB_MUSB_TUSB6010=m -CONFIG_USB_MUSB_UX500=m -# CONFIG_USB_UX500_DMA is not set -CONFIG_MUSB_PIO_ONLY=y -# CONFIG_USB_DWC3 is not set -# CONFIG_USB_DWC2 is not set -CONFIG_USB_CHIPIDEA=m -CONFIG_USB_CHIPIDEA_UDC=y -CONFIG_USB_CHIPIDEA_HOST=y -# CONFIG_USB_CHIPIDEA_DEBUG is not set - -# -# USB port drivers -# -# CONFIG_USB_USS720 is not set -CONFIG_USB_SERIAL=m -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_SIMPLE=m -CONFIG_USB_SERIAL_AIRCABLE=m -CONFIG_USB_SERIAL_ARK3116=m -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_CH341=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_CP210X=m -CONFIG_USB_SERIAL_CYPRESS_M8=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_F81232=m -CONFIG_USB_SERIAL_GARMIN=m -CONFIG_USB_SERIAL_IPW=m -CONFIG_USB_SERIAL_IUU=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_METRO=m -CONFIG_USB_SERIAL_MOS7720=m -CONFIG_USB_SERIAL_MOS7715_PARPORT=y -CONFIG_USB_SERIAL_MOS7840=m -CONFIG_USB_SERIAL_MXUPORT=m -CONFIG_USB_SERIAL_NAVMAN=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_OTI6858=m -CONFIG_USB_SERIAL_QCAUX=m -CONFIG_USB_SERIAL_QUALCOMM=m -CONFIG_USB_SERIAL_SPCP8X5=m -CONFIG_USB_SERIAL_SAFE=m -CONFIG_USB_SERIAL_SAFE_PADDED=y -CONFIG_USB_SERIAL_SIERRAWIRELESS=m -CONFIG_USB_SERIAL_SYMBOL=m -CONFIG_USB_SERIAL_TI=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_WWAN=m -CONFIG_USB_SERIAL_OPTION=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_SERIAL_OPTICON=m -CONFIG_USB_SERIAL_XSENS_MT=m -CONFIG_USB_SERIAL_WISHBONE=m -CONFIG_USB_SERIAL_ZTE=m -CONFIG_USB_SERIAL_SSU100=m -CONFIG_USB_SERIAL_QT2=m -CONFIG_USB_SERIAL_DEBUG=m - -# -# USB Miscellaneous drivers -# -# CONFIG_USB_EMI62 is not set -# CONFIG_USB_EMI26 is not set -# CONFIG_USB_ADUTUX is not set -# CONFIG_USB_SEVSEG is not set -# CONFIG_USB_RIO500 is not set -# CONFIG_USB_LEGOTOWER is not set -# CONFIG_USB_LCD is not set -# CONFIG_USB_LED is not set -# CONFIG_USB_CYPRESS_CY7C63 is not set -# CONFIG_USB_CYTHERM is not set -# CONFIG_USB_IDMOUSE is not set -# CONFIG_USB_FTDI_ELAN is not set -# CONFIG_USB_APPLEDISPLAY is not set -# CONFIG_USB_SISUSBVGA is not set -# CONFIG_USB_LD is not set -# CONFIG_USB_TRANCEVIBRATOR is not set -# CONFIG_USB_IOWARRIOR is not set -# CONFIG_USB_TEST is not set -# CONFIG_USB_EHSET_TEST_FIXTURE is not set -# CONFIG_USB_ISIGHTFW is not set -# CONFIG_USB_YUREX is not set -CONFIG_USB_EZUSB_FX2=m -# CONFIG_USB_HSIC_USB3503 is not set - -# -# USB Physical Layer drivers -# -CONFIG_USB_PHY=y -CONFIG_USB_OTG_FSM=m -# CONFIG_NOP_USB_XCEIV is not set -# CONFIG_SAMSUNG_USB2PHY is not set -# CONFIG_SAMSUNG_USB3PHY is not set -# CONFIG_USB_ISP1301 is not set -# CONFIG_USB_RCAR_PHY is not set -CONFIG_USB_GADGET=m -# CONFIG_USB_GADGET_DEBUG is not set -# CONFIG_USB_GADGET_DEBUG_FILES is not set -CONFIG_USB_GADGET_VBUS_DRAW=2 -CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 - -# -# USB Peripheral Controller -# -CONFIG_USB_FUSB300=m -# CONFIG_USB_FOTG210_UDC is not set -CONFIG_USB_GR_UDC=m -CONFIG_USB_R8A66597=m -CONFIG_USB_PXA27X=m -CONFIG_USB_S3C_HSOTG=m -CONFIG_USB_MV_UDC=m -CONFIG_USB_MV_U3D=m -CONFIG_USB_M66592=m -CONFIG_USB_AMD5536UDC=m -CONFIG_USB_NET2272=m -CONFIG_USB_NET2272_DMA=y -CONFIG_USB_NET2280=m -CONFIG_USB_GOKU=m -CONFIG_USB_EG20T=m -CONFIG_USB_DUMMY_HCD=m -CONFIG_USB_LIBCOMPOSITE=m -CONFIG_USB_U_ETHER=m -CONFIG_USB_F_NCM=m -CONFIG_USB_F_ECM=m -CONFIG_USB_F_EEM=m -CONFIG_USB_F_SUBSET=m -CONFIG_USB_F_RNDIS=m -CONFIG_USB_CONFIGFS=m -# CONFIG_USB_CONFIGFS_SERIAL is not set -# CONFIG_USB_CONFIGFS_ACM is not set -# CONFIG_USB_CONFIGFS_OBEX is not set -# CONFIG_USB_CONFIGFS_NCM is not set -# CONFIG_USB_CONFIGFS_ECM is not set -# CONFIG_USB_CONFIGFS_ECM_SUBSET is not set -# CONFIG_USB_CONFIGFS_RNDIS is not set -# CONFIG_USB_CONFIGFS_EEM is not set -# CONFIG_USB_CONFIGFS_MASS_STORAGE is not set -# CONFIG_USB_CONFIGFS_F_LB_SS is not set -# CONFIG_USB_CONFIGFS_F_FS is not set -# CONFIG_USB_ZERO is not set -# CONFIG_USB_AUDIO is not set -CONFIG_USB_ETH=m -CONFIG_USB_ETH_RNDIS=y -CONFIG_USB_ETH_EEM=y -CONFIG_USB_G_NCM=m -# CONFIG_USB_GADGETFS is not set -# CONFIG_USB_FUNCTIONFS is not set -# CONFIG_USB_MASS_STORAGE is not set -# CONFIG_USB_G_SERIAL is not set -# CONFIG_USB_MIDI_GADGET is not set -# CONFIG_USB_G_PRINTER is not set -# CONFIG_USB_CDC_COMPOSITE is not set -# CONFIG_USB_G_ACM_MS is not set -# CONFIG_USB_G_MULTI is not set -# CONFIG_USB_G_HID is not set -# CONFIG_USB_G_DBGP is not set -# CONFIG_USB_G_WEBCAM is not set -# CONFIG_UWB is not set -CONFIG_MMC=m -# CONFIG_MMC_DEBUG is not set -# CONFIG_MMC_CLKGATE is not set - -# -# MMC/SD/SDIO Card Drivers -# -CONFIG_MMC_BLOCK=m -CONFIG_MMC_BLOCK_MINORS=8 -CONFIG_MMC_BLOCK_BOUNCE=y -CONFIG_SDIO_UART=m -# CONFIG_MMC_TEST is not set - -# -# MMC/SD/SDIO Host Controller Drivers -# -CONFIG_MMC_SDHCI=m -CONFIG_MMC_SDHCI_PCI=m -CONFIG_MMC_RICOH_MMC=y -CONFIG_MMC_SDHCI_ACPI=m -# CONFIG_MMC_SDHCI_PLTFM is not set -CONFIG_MMC_WBSD=m -CONFIG_MMC_TIFM_SD=m -CONFIG_MMC_SDRICOH_CS=m -CONFIG_MMC_CB710=m -CONFIG_MMC_VIA_SDMMC=m -CONFIG_MMC_VUB300=m -# CONFIG_MMC_USHC is not set -CONFIG_MEMSTICK=m -# CONFIG_MEMSTICK_DEBUG is not set - -# -# MemoryStick drivers -# -# CONFIG_MEMSTICK_UNSAFE_RESUME is not set -CONFIG_MSPRO_BLOCK=m -CONFIG_MS_BLOCK=m - -# -# MemoryStick Host Controller Drivers -# -CONFIG_MEMSTICK_TIFM_MS=m -CONFIG_MEMSTICK_JMICRON_38X=m -CONFIG_MEMSTICK_R592=m -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y - -# -# LED drivers -# -CONFIG_LEDS_LM3530=m -CONFIG_LEDS_LM3642=m -CONFIG_LEDS_PCA9532=m -CONFIG_LEDS_LP3944=m -CONFIG_LEDS_LP55XX_COMMON=m -CONFIG_LEDS_LP5521=m -CONFIG_LEDS_LP5523=m -CONFIG_LEDS_LP5562=m -CONFIG_LEDS_LP8501=m -CONFIG_LEDS_CLEVO_MAIL=m -CONFIG_LEDS_PCA955X=m -CONFIG_LEDS_PCA963X=m -CONFIG_LEDS_PCA9685=m -CONFIG_LEDS_BD2802=m -CONFIG_LEDS_INTEL_SS4200=m -CONFIG_LEDS_DELL_NETBOOKS=m -CONFIG_LEDS_TCA6507=m -CONFIG_LEDS_LM355x=m -CONFIG_LEDS_OT200=m -CONFIG_LEDS_BLINKM=m - -# -# LED Triggers -# -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=m -CONFIG_LEDS_TRIGGER_ONESHOT=m -CONFIG_LEDS_TRIGGER_HEARTBEAT=m -CONFIG_LEDS_TRIGGER_BACKLIGHT=m -# CONFIG_LEDS_TRIGGER_CPU is not set -CONFIG_LEDS_TRIGGER_DEFAULT_ON=m - -# -# iptables trigger is under Netfilter config (LED target) -# -CONFIG_LEDS_TRIGGER_TRANSIENT=m -CONFIG_LEDS_TRIGGER_CAMERA=m -# CONFIG_ACCESSIBILITY is not set -# CONFIG_INFINIBAND is not set -# CONFIG_EDAC is not set -CONFIG_RTC_LIB=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_HCTOSYS=y -CONFIG_RTC_SYSTOHC=y -CONFIG_RTC_HCTOSYS_DEVICE="rtc0" -# CONFIG_RTC_DEBUG is not set - -# -# RTC interfaces -# -CONFIG_RTC_INTF_SYSFS=y -CONFIG_RTC_INTF_PROC=y -CONFIG_RTC_INTF_DEV=y -# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set -# CONFIG_RTC_DRV_TEST is not set - -# -# I2C RTC drivers -# -# CONFIG_RTC_DRV_DS1307 is not set -# CONFIG_RTC_DRV_DS1374 is not set -# CONFIG_RTC_DRV_DS1672 is not set -# CONFIG_RTC_DRV_DS3232 is not set -# CONFIG_RTC_DRV_MAX6900 is not set -# CONFIG_RTC_DRV_RS5C372 is not set -# CONFIG_RTC_DRV_ISL1208 is not set -# CONFIG_RTC_DRV_ISL12022 is not set -# CONFIG_RTC_DRV_ISL12057 is not set -# CONFIG_RTC_DRV_X1205 is not set -# CONFIG_RTC_DRV_PCF2127 is not set -# CONFIG_RTC_DRV_PCF8523 is not set -# CONFIG_RTC_DRV_PCF8563 is not set -# CONFIG_RTC_DRV_PCF8583 is not set -# CONFIG_RTC_DRV_M41T80 is not set -# CONFIG_RTC_DRV_BQ32K is not set -# CONFIG_RTC_DRV_S35390A is not set -# CONFIG_RTC_DRV_FM3130 is not set -# CONFIG_RTC_DRV_RX8581 is not set -# CONFIG_RTC_DRV_RX8025 is not set -# CONFIG_RTC_DRV_EM3027 is not set -# CONFIG_RTC_DRV_RV3029C2 is not set - -# -# SPI RTC drivers -# - -# -# Platform RTC drivers -# -CONFIG_RTC_DRV_CMOS=y -# CONFIG_RTC_DRV_DS1286 is not set -# CONFIG_RTC_DRV_DS1511 is not set -# CONFIG_RTC_DRV_DS1553 is not set -# CONFIG_RTC_DRV_DS1742 is not set -# CONFIG_RTC_DRV_STK17TA8 is not set -# CONFIG_RTC_DRV_M48T86 is not set -# CONFIG_RTC_DRV_M48T35 is not set -# CONFIG_RTC_DRV_M48T59 is not set -# CONFIG_RTC_DRV_MSM6242 is not set -# CONFIG_RTC_DRV_BQ4802 is not set -# CONFIG_RTC_DRV_RP5C01 is not set -# CONFIG_RTC_DRV_V3020 is not set -# CONFIG_RTC_DRV_DS2404 is not set - -# -# on-CPU RTC drivers -# -# CONFIG_RTC_DRV_MOXART is not set - -# -# HID Sensor RTC drivers -# -# CONFIG_RTC_DRV_HID_SENSOR_TIME is not set -CONFIG_DMADEVICES=y -# CONFIG_DMADEVICES_DEBUG is not set - -# -# DMA Devices -# -CONFIG_INTEL_MID_DMAC=m -CONFIG_INTEL_IOATDMA=m -CONFIG_DW_DMAC_CORE=m -CONFIG_DW_DMAC=m -CONFIG_DW_DMAC_PCI=m -CONFIG_PCH_DMA=m -CONFIG_DMA_ENGINE=y -CONFIG_DMA_ACPI=y - -# -# DMA Clients -# -# CONFIG_ASYNC_TX_DMA is not set -# CONFIG_DMATEST is not set -CONFIG_DMA_ENGINE_RAID=y -CONFIG_DCA=m -# CONFIG_AUXDISPLAY is not set -CONFIG_UIO=m -# CONFIG_UIO_CIF is not set -# CONFIG_UIO_PDRV_GENIRQ is not set -# CONFIG_UIO_DMEM_GENIRQ is not set -# CONFIG_UIO_AEC is not set -# CONFIG_UIO_SERCOS3 is not set -CONFIG_UIO_PCI_GENERIC=m -# CONFIG_UIO_NETX is not set -# CONFIG_UIO_MF624 is not set -CONFIG_VIRT_DRIVERS=y -CONFIG_VIRTIO=m - -# -# Virtio drivers -# -CONFIG_VIRTIO_PCI=m -CONFIG_VIRTIO_BALLOON=m -CONFIG_VIRTIO_MMIO=m -# CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set - -# -# Microsoft Hyper-V guest support -# -CONFIG_STAGING=y -# CONFIG_ET131X is not set -# CONFIG_SLICOSS is not set -# CONFIG_USBIP_CORE is not set -# CONFIG_W35UND is not set -# CONFIG_PRISM2_USB is not set -# CONFIG_COMEDI is not set -# CONFIG_PANEL is not set -CONFIG_RTL8192U=m -CONFIG_RTLLIB=m -CONFIG_RTLLIB_CRYPTO_CCMP=m -CONFIG_RTLLIB_CRYPTO_TKIP=m -CONFIG_RTLLIB_CRYPTO_WEP=m -CONFIG_RTL8192E=m -CONFIG_R8712U=m -CONFIG_R8188EU=m -CONFIG_88EU_AP_MODE=y -CONFIG_88EU_P2P=y -CONFIG_R8723AU=m -CONFIG_8723AU_AP_MODE=y -CONFIG_8723AU_P2P=y -CONFIG_8723AU_BT_COEXIST=y -CONFIG_R8821AE=m -CONFIG_RTS5139=m -# CONFIG_RTS5139_DEBUG is not set -CONFIG_RTS5208=m -# CONFIG_RTS5208_DEBUG is not set -# CONFIG_TRANZPORT is not set -# CONFIG_IDE_PHISON is not set -# CONFIG_LINE6_USB is not set -# CONFIG_USB_SERIAL_QUATECH2 is not set -# CONFIG_VT6655 is not set -# CONFIG_VT6656 is not set -# CONFIG_DX_SEP is not set -# CONFIG_WLAGS49_H2 is not set -# CONFIG_WLAGS49_H25 is not set -# CONFIG_CRYSTALHD is not set -# CONFIG_FB_XGI is not set -# CONFIG_ACPI_QUICKSTART is not set -# CONFIG_USB_ENESTORAGE is not set -# CONFIG_BCM_WIMAX is not set -# CONFIG_FT1000 is not set - -# -# Speakup console speech -# -# CONFIG_SPEAKUP is not set -CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4=m -# CONFIG_STAGING_MEDIA is not set - -# -# Android -# -# CONFIG_ANDROID is not set -# CONFIG_USB_WPAN_HCD is not set -CONFIG_WIMAX_GDM72XX=m -CONFIG_WIMAX_GDM72XX_QOS=y -CONFIG_WIMAX_GDM72XX_K_MODE=y -CONFIG_WIMAX_GDM72XX_WIMAX2=y -CONFIG_WIMAX_GDM72XX_USB=y -# CONFIG_WIMAX_GDM72XX_SDIO is not set -CONFIG_WIMAX_GDM72XX_USB_PM=y -CONFIG_LTE_GDM724X=m -CONFIG_NET_VENDOR_SILICOM=y -# CONFIG_SBYPASS is not set -# CONFIG_BPCTL is not set -# CONFIG_CED1401 is not set -# CONFIG_DGRP is not set -# CONFIG_FIREWIRE_SERIAL is not set -# CONFIG_LUSTRE_FS is not set -# CONFIG_XILLYBUS is not set -# CONFIG_DGNC is not set -# CONFIG_DGAP is not set -CONFIG_GS_FPGABOOT=m -CONFIG_X86_PLATFORM_DEVICES=y -CONFIG_ACER_WMI=m -CONFIG_ACERHDF=m -CONFIG_ALIENWARE_WMI=m -CONFIG_ASUS_LAPTOP=m -CONFIG_DELL_LAPTOP=m -CONFIG_DELL_WMI=m -CONFIG_DELL_WMI_AIO=m -CONFIG_FUJITSU_LAPTOP=m -# CONFIG_FUJITSU_LAPTOP_DEBUG is not set -CONFIG_FUJITSU_TABLET=m -CONFIG_AMILO_RFKILL=m -CONFIG_TC1100_WMI=m -CONFIG_HP_ACCEL=m -CONFIG_HP_WIRELESS=m -CONFIG_HP_WMI=m -CONFIG_MSI_LAPTOP=m -CONFIG_PANASONIC_LAPTOP=m -CONFIG_COMPAL_LAPTOP=m -CONFIG_SONY_LAPTOP=m -CONFIG_SONYPI_COMPAT=y -CONFIG_IDEAPAD_LAPTOP=m -CONFIG_THINKPAD_ACPI=m -CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y -# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set -# CONFIG_THINKPAD_ACPI_DEBUG is not set -CONFIG_THINKPAD_ACPI_UNSAFE_LEDS=y -CONFIG_THINKPAD_ACPI_VIDEO=y -CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y -CONFIG_SENSORS_HDAPS=m -CONFIG_INTEL_MENLOW=m -CONFIG_EEEPC_LAPTOP=m -CONFIG_ASUS_WMI=m -CONFIG_ASUS_NB_WMI=m -CONFIG_EEEPC_WMI=m -CONFIG_ACPI_WMI=m -CONFIG_MSI_WMI=m -CONFIG_TOPSTAR_LAPTOP=m -CONFIG_ACPI_TOSHIBA=m -CONFIG_TOSHIBA_BT_RFKILL=m -CONFIG_ACPI_CMPC=m -CONFIG_INTEL_IPS=m -CONFIG_IBM_RTL=m -CONFIG_XO15_EBOOK=m -CONFIG_SAMSUNG_LAPTOP=m -CONFIG_MXM_WMI=m -CONFIG_INTEL_OAKTRAIL=m -CONFIG_SAMSUNG_Q10=m -CONFIG_APPLE_GMUX=m -CONFIG_INTEL_RST=m -CONFIG_INTEL_SMARTCONNECT=m -CONFIG_PVPANIC=m -# CONFIG_CHROME_PLATFORMS is not set - -# -# Hardware Spinlock drivers -# -CONFIG_CLKSRC_I8253=y -CONFIG_CLKEVT_I8253=y -CONFIG_I8253_LOCK=y -CONFIG_CLKBLD_I8253=y -# CONFIG_SH_TIMER_CMT is not set -# CONFIG_SH_TIMER_MTU2 is not set -# CONFIG_SH_TIMER_TMU is not set -# CONFIG_EM_TIMER_STI is not set -# CONFIG_MAILBOX is not set -CONFIG_IOMMU_SUPPORT=y -# CONFIG_INTEL_IOMMU is not set - -# -# Remoteproc drivers -# -# CONFIG_STE_MODEM_RPROC is not set - -# -# Rpmsg drivers -# -# CONFIG_PM_DEVFREQ is not set -# CONFIG_EXTCON is not set -# CONFIG_MEMORY is not set -# CONFIG_IIO is not set -# CONFIG_NTB is not set -# CONFIG_VME_BUS is not set -# CONFIG_PWM is not set -# CONFIG_IPACK_BUS is not set -CONFIG_RESET_CONTROLLER=y -# CONFIG_FMC is not set - -# -# PHY Subsystem -# -CONFIG_GENERIC_PHY=y -# CONFIG_BCM_KONA_USB2_PHY is not set -CONFIG_PHY_SAMSUNG_USB2=m -# CONFIG_POWERCAP is not set -# CONFIG_MCB is not set - -# -# Firmware Drivers -# -# CONFIG_EDD is not set -CONFIG_FIRMWARE_MEMMAP=y -# CONFIG_DELL_RBU is not set -CONFIG_DCDBAS=m -CONFIG_DMIID=y -# CONFIG_DMI_SYSFS is not set -CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y -# CONFIG_ISCSI_IBFT_FIND is not set -# CONFIG_GOOGLE_FIRMWARE is not set - -# -# EFI (Extensible Firmware Interface) Support -# -# CONFIG_EFI_VARS is not set -CONFIG_EFI_RUNTIME_MAP=y - -# -# File systems -# -CONFIG_DCACHE_WORD_ACCESS=y -CONFIG_EXT2_FS=m -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_DEFAULTS_TO_ORDERED=y -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=m -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -# CONFIG_EXT4_DEBUG is not set -CONFIG_FS_XIP=y -CONFIG_JBD=m -CONFIG_JBD2=m -# CONFIG_JBD2_DEBUG is not set -CONFIG_FS_MBCACHE=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -# CONFIG_JFS_DEBUG is not set -CONFIG_JFS_STATISTICS=y -CONFIG_XFS_FS=m -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_XFS_RT=y -# CONFIG_XFS_WARN is not set -# CONFIG_XFS_DEBUG is not set -CONFIG_GFS2_FS=m -CONFIG_GFS2_FS_LOCKING_DLM=y -CONFIG_OCFS2_FS=m -CONFIG_OCFS2_FS_O2CB=m -CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m -CONFIG_OCFS2_DEBUG_MASKLOG=y -# CONFIG_OCFS2_DEBUG_FS is not set -CONFIG_BTRFS_FS=m -CONFIG_BTRFS_FS_POSIX_ACL=y -# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set -# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set -# CONFIG_BTRFS_DEBUG is not set -# CONFIG_BTRFS_ASSERT is not set -CONFIG_NILFS2_FS=m -CONFIG_FS_POSIX_ACL=y -CONFIG_EXPORTFS=y -CONFIG_FILE_LOCKING=y -CONFIG_FSNOTIFY=y -CONFIG_DNOTIFY=y -CONFIG_INOTIFY_USER=y -CONFIG_FANOTIFY=y -CONFIG_QUOTA=y -# CONFIG_QUOTA_NETLINK_INTERFACE is not set -CONFIG_PRINT_QUOTA_WARNING=y -# CONFIG_QUOTA_DEBUG is not set -CONFIG_QUOTA_TREE=m -# CONFIG_QFMT_V1 is not set -# CONFIG_QFMT_V2 is not set -CONFIG_QUOTACTL=y -CONFIG_AUTOFS4_FS=y -CONFIG_FUSE_FS=m -# CONFIG_CUSE is not set - -# -# Caches -# -CONFIG_FSCACHE=m -CONFIG_FSCACHE_STATS=y -# CONFIG_FSCACHE_HISTOGRAM is not set -# CONFIG_FSCACHE_DEBUG is not set -# CONFIG_FSCACHE_OBJECT_LIST is not set -CONFIG_CACHEFILES=m -# CONFIG_CACHEFILES_DEBUG is not set -# CONFIG_CACHEFILES_HISTOGRAM is not set - -# -# CD-ROM/DVD Filesystems -# -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_UDF_NLS=y - -# -# DOS/FAT/NT Filesystems -# -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FAT_DEFAULT_CODEPAGE=866 -CONFIG_FAT_DEFAULT_IOCHARSET="utf8" -CONFIG_NTFS_FS=m -# CONFIG_NTFS_DEBUG is not set -CONFIG_NTFS_RW=y - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_KCORE=y -CONFIG_PROC_SYSCTL=y -CONFIG_PROC_PAGE_MONITOR=y -CONFIG_KERNFS=y -CONFIG_SYSFS=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_TMPFS_XATTR=y -# CONFIG_HUGETLBFS is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_CONFIGFS_FS=m -CONFIG_MISC_FILESYSTEMS=y -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -CONFIG_ECRYPT_FS=m -# CONFIG_ECRYPT_FS_MESSAGING is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_LOGFS is not set -# CONFIG_CRAMFS is not set -CONFIG_SQUASHFS=m -# CONFIG_SQUASHFS_FILE_CACHE is not set -CONFIG_SQUASHFS_FILE_DIRECT=y -# CONFIG_SQUASHFS_DECOMP_SINGLE is not set -# CONFIG_SQUASHFS_DECOMP_MULTI is not set -CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y -# CONFIG_SQUASHFS_XATTR is not set -CONFIG_SQUASHFS_ZLIB=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS_XZ=y -CONFIG_SQUASHFS_4K_DEVBLK_SIZE=y -# CONFIG_SQUASHFS_EMBEDDED is not set -CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 -# CONFIG_VXFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_OMFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX6FS_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_PSTORE is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set -CONFIG_F2FS_FS=m -CONFIG_F2FS_FS_XATTR=y -CONFIG_F2FS_FS_POSIX_ACL=y -CONFIG_F2FS_FS_SECURITY=y -# CONFIG_F2FS_CHECK_FS is not set -CONFIG_EFIVAR_FS=m -CONFIG_AUFS_FS=y -CONFIG_AUFS_BRANCH_MAX_127=y -# CONFIG_AUFS_BRANCH_MAX_511 is not set -# CONFIG_AUFS_BRANCH_MAX_1023 is not set -# CONFIG_AUFS_BRANCH_MAX_32767 is not set -CONFIG_AUFS_SBILIST=y -# CONFIG_AUFS_HNOTIFY is not set -# CONFIG_AUFS_EXPORT is not set -CONFIG_AUFS_RDU=y -CONFIG_AUFS_SHWH=y -CONFIG_AUFS_BR_RAMFS=y -CONFIG_AUFS_BR_FUSE=y -CONFIG_AUFS_POLL=y -CONFIG_AUFS_BDEV_LOOP=y -# CONFIG_AUFS_DEBUG is not set -CONFIG_NETWORK_FILESYSTEMS=y -CONFIG_NFS_FS=m -CONFIG_NFS_V2=m -CONFIG_NFS_V3=m -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=m -# CONFIG_NFS_SWAP is not set -# CONFIG_NFS_V4_1 is not set -CONFIG_NFS_FSCACHE=y -# CONFIG_NFS_USE_LEGACY_DNS is not set -CONFIG_NFS_USE_KERNEL_DNS=y -CONFIG_NFSD=m -CONFIG_NFSD_V2_ACL=y -CONFIG_NFSD_V3=y -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V4=y -# CONFIG_NFSD_FAULT_INJECTION is not set -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_NFS_ACL_SUPPORT=m -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=m -CONFIG_SUNRPC_GSS=m -# CONFIG_SUNRPC_DEBUG is not set -# CONFIG_CEPH_FS is not set -CONFIG_CIFS=m -# CONFIG_CIFS_STATS is not set -# CONFIG_CIFS_WEAK_PW_HASH is not set -# CONFIG_CIFS_UPCALL is not set -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -# CONFIG_CIFS_ACL is not set -# CONFIG_CIFS_DEBUG is not set -# CONFIG_CIFS_DFS_UPCALL is not set -# CONFIG_CIFS_SMB2 is not set -# CONFIG_CIFS_FSCACHE is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set -CONFIG_NLS=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=m -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -CONFIG_NLS_CODEPAGE_850=m -# CONFIG_NLS_CODEPAGE_852 is not set -CONFIG_NLS_CODEPAGE_855=m -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -CONFIG_NLS_CODEPAGE_866=m -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -CONFIG_NLS_ISO8859_5=m -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -# CONFIG_NLS_MAC_ROMAN is not set -# CONFIG_NLS_MAC_CELTIC is not set -# CONFIG_NLS_MAC_CENTEURO is not set -# CONFIG_NLS_MAC_CROATIAN is not set -# CONFIG_NLS_MAC_CYRILLIC is not set -# CONFIG_NLS_MAC_GAELIC is not set -# CONFIG_NLS_MAC_GREEK is not set -# CONFIG_NLS_MAC_ICELAND is not set -# CONFIG_NLS_MAC_INUIT is not set -# CONFIG_NLS_MAC_ROMANIAN is not set -# CONFIG_NLS_MAC_TURKISH is not set -CONFIG_NLS_UTF8=m -CONFIG_DLM=m -# CONFIG_DLM_DEBUG is not set - -# -# Kernel hacking -# -CONFIG_TRACE_IRQFLAGS_SUPPORT=y - -# -# printk and dmesg options -# -# CONFIG_PRINTK_TIME is not set -CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4 -# CONFIG_BOOT_PRINTK_DELAY is not set - -# -# Compile-time checks and compiler options -# -# CONFIG_DEBUG_INFO is not set -# CONFIG_ENABLE_WARN_DEPRECATED is not set -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_FRAME_WARN=1024 -# CONFIG_STRIP_ASM_SYMS is not set -# CONFIG_READABLE_ASM is not set -# CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_FS is not set -# CONFIG_HEADERS_CHECK is not set -# CONFIG_DEBUG_SECTION_MISMATCH is not set -CONFIG_ARCH_WANT_FRAME_POINTERS=y -CONFIG_FRAME_POINTER=y -# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set -CONFIG_MAGIC_SYSRQ=y -CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 -CONFIG_DEBUG_KERNEL=y - -# -# Memory Debugging -# -# CONFIG_DEBUG_PAGEALLOC is not set -# CONFIG_DEBUG_OBJECTS is not set -# CONFIG_SLUB_DEBUG_ON is not set -# CONFIG_SLUB_STATS is not set -CONFIG_HAVE_DEBUG_KMEMLEAK=y -# CONFIG_DEBUG_KMEMLEAK is not set -# CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_VIRTUAL is not set -CONFIG_DEBUG_MEMORY_INIT=y -# CONFIG_DEBUG_PER_CPU_MAPS is not set -# CONFIG_DEBUG_HIGHMEM is not set -CONFIG_HAVE_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_STACKOVERFLOW is not set -CONFIG_HAVE_ARCH_KMEMCHECK=y -# CONFIG_KMEMCHECK is not set -# CONFIG_DEBUG_SHIRQ is not set - -# -# Debug Lockups and Hangs -# -# CONFIG_LOCKUP_DETECTOR is not set -# CONFIG_DETECT_HUNG_TASK is not set -# CONFIG_PANIC_ON_OOPS is not set -CONFIG_PANIC_ON_OOPS_VALUE=0 -CONFIG_PANIC_TIMEOUT=0 -CONFIG_SCHED_DEBUG=y -# CONFIG_SCHEDSTATS is not set -CONFIG_TIMER_STATS=y -# CONFIG_DEBUG_PREEMPT is not set - -# -# Lock Debugging (spinlocks, mutexes, etc...) -# -# CONFIG_DEBUG_RT_MUTEXES is not set -# CONFIG_RT_MUTEX_TESTER is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_MUTEXES is not set -# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set -# CONFIG_DEBUG_LOCK_ALLOC is not set -# CONFIG_PROVE_LOCKING is not set -# CONFIG_LOCK_STAT is not set -# CONFIG_DEBUG_ATOMIC_SLEEP is not set -# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set -# CONFIG_LOCK_TORTURE_TEST is not set -# CONFIG_DEBUG_KOBJECT is not set -CONFIG_DEBUG_BUGVERBOSE=y -# CONFIG_DEBUG_LIST is not set -# CONFIG_DEBUG_SG is not set -# CONFIG_DEBUG_NOTIFIERS is not set -# CONFIG_DEBUG_CREDENTIALS is not set - -# -# RCU Debugging -# -# CONFIG_PROVE_RCU_DELAY is not set -# CONFIG_SPARSE_RCU_POINTER is not set -# CONFIG_TORTURE_TEST is not set -# CONFIG_RCU_TORTURE_TEST is not set -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -# CONFIG_RCU_CPU_STALL_VERBOSE is not set -# CONFIG_RCU_CPU_STALL_INFO is not set -# CONFIG_RCU_TRACE is not set -# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set -# CONFIG_NOTIFIER_ERROR_INJECTION is not set -# CONFIG_FAULT_INJECTION is not set -# CONFIG_LATENCYTOP is not set -CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS=y -# CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is not set -CONFIG_USER_STACKTRACE_SUPPORT=y -CONFIG_HAVE_FUNCTION_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y -CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y -CONFIG_HAVE_DYNAMIC_FTRACE=y -CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y -CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y -CONFIG_HAVE_SYSCALL_TRACEPOINTS=y -CONFIG_HAVE_C_RECORDMCOUNT=y -CONFIG_TRACING_SUPPORT=y -# CONFIG_FTRACE is not set - -# -# Runtime Testing -# -# CONFIG_TEST_LIST_SORT is not set -# CONFIG_BACKTRACE_SELF_TEST is not set -# CONFIG_RBTREE_TEST is not set -# CONFIG_INTERVAL_TREE_TEST is not set -# CONFIG_PERCPU_TEST is not set -# CONFIG_ATOMIC64_SELFTEST is not set -# CONFIG_ASYNC_RAID6_TEST is not set -# CONFIG_TEST_STRING_HELPERS is not set -# CONFIG_TEST_KSTRTOX is not set -# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set -# CONFIG_DMA_API_DEBUG is not set -# CONFIG_TEST_MODULE is not set -# CONFIG_TEST_USER_COPY is not set -# CONFIG_SAMPLES is not set -CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_KGDB is not set -# CONFIG_STRICT_DEVMEM is not set -CONFIG_X86_VERBOSE_BOOTUP=y -CONFIG_EARLY_PRINTK=y -# CONFIG_EARLY_PRINTK_DBGP is not set -# CONFIG_EARLY_PRINTK_EFI is not set -# CONFIG_X86_PTDUMP is not set -CONFIG_DEBUG_RODATA=y -CONFIG_DEBUG_RODATA_TEST=y -# CONFIG_DEBUG_SET_MODULE_RONX is not set -# CONFIG_DEBUG_NX_TEST is not set -CONFIG_DOUBLEFAULT=y -# CONFIG_DEBUG_TLBFLUSH is not set -# CONFIG_IOMMU_STRESS is not set -CONFIG_HAVE_MMIOTRACE_SUPPORT=y -CONFIG_IO_DELAY_TYPE_0X80=0 -CONFIG_IO_DELAY_TYPE_0XED=1 -CONFIG_IO_DELAY_TYPE_UDELAY=2 -CONFIG_IO_DELAY_TYPE_NONE=3 -CONFIG_IO_DELAY_0X80=y -# CONFIG_IO_DELAY_0XED is not set -# CONFIG_IO_DELAY_UDELAY is not set -# CONFIG_IO_DELAY_NONE is not set -CONFIG_DEFAULT_IO_DELAY_TYPE=0 -# CONFIG_CPA_DEBUG is not set -# CONFIG_OPTIMIZE_INLINING is not set -# CONFIG_DEBUG_NMI_SELFTEST is not set -# CONFIG_X86_DEBUG_STATIC_CPU_HAS is not set - -# -# Security options -# -CONFIG_KEYS=y -# CONFIG_PERSISTENT_KEYRINGS is not set -# CONFIG_BIG_KEYS is not set -# CONFIG_ENCRYPTED_KEYS is not set -CONFIG_KEYS_DEBUG_PROC_KEYS=y -# CONFIG_SECURITY_DMESG_RESTRICT is not set -# CONFIG_SECURITY is not set -# CONFIG_SECURITYFS is not set -CONFIG_DEFAULT_SECURITY_DAC=y -CONFIG_DEFAULT_SECURITY="" -CONFIG_XOR_BLOCKS=m -CONFIG_ASYNC_CORE=m -CONFIG_ASYNC_MEMCPY=m -CONFIG_ASYNC_XOR=m -CONFIG_ASYNC_PQ=m -CONFIG_ASYNC_RAID6_RECOV=m -CONFIG_CRYPTO=y - -# -# Crypto core or helper -# -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_ALGAPI2=y -CONFIG_CRYPTO_AEAD=m -CONFIG_CRYPTO_AEAD2=y -CONFIG_CRYPTO_BLKCIPHER=m -CONFIG_CRYPTO_BLKCIPHER2=y -CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_HASH2=y -CONFIG_CRYPTO_RNG=m -CONFIG_CRYPTO_RNG2=y -CONFIG_CRYPTO_PCOMP2=y -CONFIG_CRYPTO_MANAGER=m -CONFIG_CRYPTO_MANAGER2=y -# CONFIG_CRYPTO_USER is not set -CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y -CONFIG_CRYPTO_GF128MUL=m -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_PCRYPT is not set -CONFIG_CRYPTO_WORKQUEUE=y -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_AUTHENC=m -# CONFIG_CRYPTO_TEST is not set -CONFIG_CRYPTO_ABLK_HELPER=m -CONFIG_CRYPTO_GLUE_HELPER_X86=m - -# -# Authenticated Encryption with Associated Data -# -CONFIG_CRYPTO_CCM=m -# CONFIG_CRYPTO_GCM is not set -CONFIG_CRYPTO_SEQIV=m - -# -# Block modes -# -CONFIG_CRYPTO_CBC=m -CONFIG_CRYPTO_CTR=m -# CONFIG_CRYPTO_CTS is not set -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m - -# -# Hash modes -# -CONFIG_CRYPTO_CMAC=m -CONFIG_CRYPTO_HMAC=m -# CONFIG_CRYPTO_XCBC is not set -# CONFIG_CRYPTO_VMAC is not set - -# -# Digest -# -CONFIG_CRYPTO_CRC32C=m -# CONFIG_CRYPTO_CRC32C_INTEL is not set -# CONFIG_CRYPTO_CRC32 is not set -CONFIG_CRYPTO_CRC32_PCLMUL=m -CONFIG_CRYPTO_CRCT10DIF=y -# CONFIG_CRYPTO_GHASH is not set -CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_MD5=m -CONFIG_CRYPTO_MICHAEL_MIC=m -# CONFIG_CRYPTO_RMD128 is not set -CONFIG_CRYPTO_RMD160=m -# CONFIG_CRYPTO_RMD256 is not set -# CONFIG_CRYPTO_RMD320 is not set -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -# CONFIG_CRYPTO_TGR192 is not set -CONFIG_CRYPTO_WP512=m - -# -# Ciphers -# -CONFIG_CRYPTO_AES=y -CONFIG_CRYPTO_AES_586=m -CONFIG_CRYPTO_AES_NI_INTEL=m -# CONFIG_CRYPTO_ANUBIS is not set -CONFIG_CRYPTO_ARC4=m -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_CAMELLIA is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -CONFIG_CRYPTO_DES=m -# CONFIG_CRYPTO_FCRYPT is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_SALSA20 is not set -# CONFIG_CRYPTO_SALSA20_586 is not set -# CONFIG_CRYPTO_SEED is not set -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SERPENT_SSE2_586=m -# CONFIG_CRYPTO_TEA is not set -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_TWOFISH_COMMON=m -CONFIG_CRYPTO_TWOFISH_586=m - -# -# Compression -# -CONFIG_CRYPTO_DEFLATE=m -# CONFIG_CRYPTO_ZLIB is not set -CONFIG_CRYPTO_LZO=y -CONFIG_CRYPTO_LZ4=y -CONFIG_CRYPTO_LZ4HC=y - -# -# Random Number Generation -# -# CONFIG_CRYPTO_ANSI_CPRNG is not set -# CONFIG_CRYPTO_USER_API_HASH is not set -# CONFIG_CRYPTO_USER_API_SKCIPHER is not set -CONFIG_CRYPTO_HW=y -# CONFIG_CRYPTO_DEV_PADLOCK is not set -CONFIG_CRYPTO_DEV_GEODE=m -# CONFIG_CRYPTO_DEV_HIFN_795X is not set -CONFIG_CRYPTO_DEV_CCP=y -CONFIG_CRYPTO_DEV_CCP_DD=m -CONFIG_CRYPTO_DEV_CCP_CRYPTO=m -# CONFIG_ASYMMETRIC_KEY_TYPE is not set -CONFIG_HAVE_KVM=y -CONFIG_HAVE_KVM_IRQCHIP=y -CONFIG_HAVE_KVM_IRQ_ROUTING=y -CONFIG_HAVE_KVM_EVENTFD=y -CONFIG_KVM_APIC_ARCHITECTURE=y -CONFIG_KVM_MMIO=y -CONFIG_KVM_ASYNC_PF=y -CONFIG_HAVE_KVM_MSI=y -CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y -CONFIG_KVM_VFIO=y -CONFIG_VIRTUALIZATION=y -CONFIG_KVM=m -CONFIG_KVM_INTEL=m -CONFIG_KVM_AMD=m -CONFIG_LGUEST=m -# CONFIG_BINARY_PRINTF is not set - -# -# Library routines -# -CONFIG_RAID6_PQ=m -CONFIG_BITREVERSE=y -CONFIG_GENERIC_STRNCPY_FROM_USER=y -CONFIG_GENERIC_STRNLEN_USER=y -CONFIG_GENERIC_NET_UTILS=y -CONFIG_GENERIC_FIND_FIRST_BIT=y -CONFIG_GENERIC_PCI_IOMAP=y -CONFIG_GENERIC_IOMAP=y -CONFIG_GENERIC_IO=y -CONFIG_CRC_CCITT=m -CONFIG_CRC16=m -CONFIG_CRC_T10DIF=y -CONFIG_CRC_ITU_T=m -CONFIG_CRC32=y -# CONFIG_CRC32_SELFTEST is not set -CONFIG_CRC32_SLICEBY8=y -# CONFIG_CRC32_SLICEBY4 is not set -# CONFIG_CRC32_SARWATE is not set -# CONFIG_CRC32_BIT is not set -CONFIG_CRC7=m -CONFIG_LIBCRC32C=m -CONFIG_CRC8=m -CONFIG_AUDIT_GENERIC=y -# CONFIG_AUDIT_ARCH_COMPAT_GENERIC is not set -# CONFIG_RANDOM32_SELFTEST is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -CONFIG_LZO_COMPRESS=y -CONFIG_LZO_DECOMPRESS=y -CONFIG_LZ4_COMPRESS=y -CONFIG_LZ4HC_COMPRESS=y -CONFIG_LZ4_DECOMPRESS=y -CONFIG_XZ_DEC=y -CONFIG_XZ_DEC_X86=y -CONFIG_XZ_DEC_POWERPC=y -CONFIG_XZ_DEC_IA64=y -CONFIG_XZ_DEC_ARM=y -CONFIG_XZ_DEC_ARMTHUMB=y -CONFIG_XZ_DEC_SPARC=y -CONFIG_XZ_DEC_BCJ=y -# CONFIG_XZ_DEC_TEST is not set -CONFIG_DECOMPRESS_GZIP=y -CONFIG_DECOMPRESS_BZIP2=y -CONFIG_DECOMPRESS_LZMA=y -CONFIG_DECOMPRESS_XZ=y -CONFIG_DECOMPRESS_LZO=y -CONFIG_DECOMPRESS_LZ4=y -CONFIG_TEXTSEARCH=y -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m -CONFIG_ASSOCIATIVE_ARRAY=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT_MAP=y -CONFIG_HAS_DMA=y -CONFIG_CHECK_SIGNATURE=y -CONFIG_CPU_RMAP=y -CONFIG_DQL=y -CONFIG_NLATTR=y -CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE=y -CONFIG_AVERAGE=y -CONFIG_CORDIC=m -# CONFIG_DDR is not set -CONFIG_OID_REGISTRY=m -CONFIG_UCS2_STRING=y -CONFIG_FONT_SUPPORT=y -# CONFIG_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0_config-x86_64 b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0_config-x86_64 deleted file mode 100644 index 4a6afc05f..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/0_config-x86_64 +++ /dev/null @@ -1,4960 +0,0 @@ -# Calculate name=.config os_arch_machine==x86_64 -# -# Automatically generated file; DO NOT EDIT. -# Linux/x86 3.15.4-calculate Kernel Configuration -# -CONFIG_64BIT=y -CONFIG_X86_64=y -CONFIG_X86=y -CONFIG_INSTRUCTION_DECODER=y -CONFIG_OUTPUT_FORMAT="elf64-x86-64" -CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" -CONFIG_LOCKDEP_SUPPORT=y -CONFIG_STACKTRACE_SUPPORT=y -CONFIG_HAVE_LATENCYTOP_SUPPORT=y -CONFIG_MMU=y -CONFIG_NEED_DMA_MAP_STATE=y -CONFIG_NEED_SG_DMA_LENGTH=y -CONFIG_GENERIC_ISA_DMA=y -CONFIG_GENERIC_BUG=y -CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y -CONFIG_GENERIC_HWEIGHT=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_ARCH_HAS_CPU_RELAX=y -CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y -CONFIG_HAVE_SETUP_PER_CPU_AREA=y -CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y -CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y -CONFIG_ARCH_HIBERNATION_POSSIBLE=y -CONFIG_ARCH_SUSPEND_POSSIBLE=y -CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y -CONFIG_ARCH_WANT_GENERAL_HUGETLB=y -CONFIG_ZONE_DMA32=y -CONFIG_AUDIT_ARCH=y -CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y -CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y -CONFIG_X86_64_SMP=y -CONFIG_X86_HT=y -CONFIG_ARCH_HWEIGHT_CFLAGS="-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" -CONFIG_ARCH_SUPPORTS_UPROBES=y -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" -CONFIG_IRQ_WORK=y -CONFIG_BUILDTIME_EXTABLE_SORT=y - -# -# General setup -# -CONFIG_INIT_ENV_ARG_LIMIT=32 -CONFIG_CROSS_COMPILE="" -# CONFIG_COMPILE_TEST is not set -CONFIG_LOCALVERSION="" -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_HAVE_KERNEL_GZIP=y -CONFIG_HAVE_KERNEL_BZIP2=y -CONFIG_HAVE_KERNEL_LZMA=y -CONFIG_HAVE_KERNEL_XZ=y -CONFIG_HAVE_KERNEL_LZO=y -CONFIG_HAVE_KERNEL_LZ4=y -# CONFIG_KERNEL_GZIP is not set -# CONFIG_KERNEL_BZIP2 is not set -# CONFIG_KERNEL_LZMA is not set -CONFIG_KERNEL_XZ=y -# CONFIG_KERNEL_LZO is not set -# CONFIG_KERNEL_LZ4 is not set -CONFIG_DEFAULT_HOSTNAME="calculate" -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -CONFIG_SYSVIPC_SYSCTL=y -CONFIG_POSIX_MQUEUE=y -CONFIG_POSIX_MQUEUE_SYSCTL=y -CONFIG_FHANDLE=y -CONFIG_USELIB=y -CONFIG_AUDIT=y -CONFIG_HAVE_ARCH_AUDITSYSCALL=y -CONFIG_AUDITSYSCALL=y -CONFIG_AUDIT_WATCH=y -CONFIG_AUDIT_TREE=y - -# -# IRQ subsystem -# -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_IRQ_SHOW=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_IRQ_DOMAIN=y -CONFIG_IRQ_FORCED_THREADING=y -CONFIG_SPARSE_IRQ=y -CONFIG_CLOCKSOURCE_WATCHDOG=y -CONFIG_ARCH_CLOCKSOURCE_DATA=y -CONFIG_GENERIC_TIME_VSYSCALL=y -CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_GENERIC_CLOCKEVENTS_BUILD=y -CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y -CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y -CONFIG_GENERIC_CMOS_UPDATE=y - -# -# Timers subsystem -# -CONFIG_TICK_ONESHOT=y -CONFIG_NO_HZ_COMMON=y -# CONFIG_HZ_PERIODIC is not set -CONFIG_NO_HZ_IDLE=y -# CONFIG_NO_HZ_FULL is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y - -# -# CPU/Task time and stats accounting -# -CONFIG_TICK_CPU_ACCOUNTING=y -# CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set -# CONFIG_IRQ_TIME_ACCOUNTING is not set -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y - -# -# RCU Subsystem -# -CONFIG_TREE_PREEMPT_RCU=y -CONFIG_PREEMPT_RCU=y -CONFIG_RCU_STALL_COMMON=y -# CONFIG_RCU_USER_QS is not set -CONFIG_RCU_FANOUT=32 -CONFIG_RCU_FANOUT_LEAF=16 -# CONFIG_RCU_FANOUT_EXACT is not set -# CONFIG_RCU_FAST_NO_HZ is not set -# CONFIG_TREE_RCU_TRACE is not set -# CONFIG_RCU_BOOST is not set -# CONFIG_RCU_NOCB_CPU is not set -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=15 -CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y -CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y -CONFIG_ARCH_SUPPORTS_INT128=y -CONFIG_ARCH_WANTS_PROT_NUMA_PROT_NONE=y -CONFIG_CGROUPS=y -# CONFIG_CGROUP_DEBUG is not set -# CONFIG_CGROUP_FREEZER is not set -# CONFIG_CGROUP_DEVICE is not set -# CONFIG_CPUSETS is not set -CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y -CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y -# CONFIG_MEMCG_SWAP_ENABLED is not set -# CONFIG_MEMCG_KMEM is not set -# CONFIG_CGROUP_PERF is not set -CONFIG_CGROUP_SCHED=y -CONFIG_FAIR_GROUP_SCHED=y -CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y -CONFIG_BLK_CGROUP=y -# CONFIG_DEBUG_BLK_CGROUP is not set -# CONFIG_CHECKPOINT_RESTORE is not set -CONFIG_NAMESPACES=y -CONFIG_UTS_NS=y -CONFIG_IPC_NS=y -CONFIG_USER_NS=y -CONFIG_PID_NS=y -CONFIG_NET_NS=y -CONFIG_SCHED_AUTOGROUP=y -CONFIG_MM_OWNER=y -# CONFIG_SYSFS_DEPRECATED is not set -# CONFIG_RELAY is not set -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="/usr/share/v86d/initramfs " -CONFIG_INITRAMFS_ROOT_UID=0 -CONFIG_INITRAMFS_ROOT_GID=0 -CONFIG_RD_GZIP=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_RD_XZ=y -CONFIG_RD_LZO=y -CONFIG_RD_LZ4=y -# CONFIG_INITRAMFS_COMPRESSION_NONE is not set -# CONFIG_INITRAMFS_COMPRESSION_GZIP is not set -# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set -# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_XZ=y -# CONFIG_INITRAMFS_COMPRESSION_LZO is not set -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SYSCTL=y -CONFIG_ANON_INODES=y -CONFIG_HAVE_UID16=y -CONFIG_SYSCTL_EXCEPTION_TRACE=y -CONFIG_HAVE_PCSPKR_PLATFORM=y -# CONFIG_EXPERT is not set -CONFIG_UID16=y -CONFIG_SYSFS_SYSCALL=y -# CONFIG_SYSCTL_SYSCALL is not set -CONFIG_KALLSYMS=y -# CONFIG_KALLSYMS_ALL is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_PCSPKR_PLATFORM=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -CONFIG_EPOLL=y -CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y -CONFIG_EVENTFD=y -CONFIG_SHMEM=y -CONFIG_AIO=y -CONFIG_PCI_QUIRKS=y -# CONFIG_EMBEDDED is not set -CONFIG_HAVE_PERF_EVENTS=y - -# -# Kernel Performance Events And Counters -# -CONFIG_PERF_EVENTS=y -# CONFIG_DEBUG_PERF_USE_VMALLOC is not set -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLUB_DEBUG=y -# CONFIG_COMPAT_BRK is not set -# CONFIG_SLAB is not set -CONFIG_SLUB=y -CONFIG_SLUB_CPU_PARTIAL=y -# CONFIG_SYSTEM_TRUSTED_KEYRING is not set -# CONFIG_PROFILING is not set -CONFIG_HAVE_OPROFILE=y -CONFIG_OPROFILE_NMI_TIMER=y -# CONFIG_KPROBES is not set -# CONFIG_JUMP_LABEL is not set -# CONFIG_UPROBES is not set -# CONFIG_HAVE_64BIT_ALIGNED_ACCESS is not set -CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y -CONFIG_ARCH_USE_BUILTIN_BSWAP=y -CONFIG_USER_RETURN_NOTIFIER=y -CONFIG_HAVE_IOREMAP_PROT=y -CONFIG_HAVE_KPROBES=y -CONFIG_HAVE_KRETPROBES=y -CONFIG_HAVE_OPTPROBES=y -CONFIG_HAVE_KPROBES_ON_FTRACE=y -CONFIG_HAVE_ARCH_TRACEHOOK=y -CONFIG_HAVE_DMA_ATTRS=y -CONFIG_GENERIC_SMP_IDLE_THREAD=y -CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y -CONFIG_HAVE_DMA_API_DEBUG=y -CONFIG_HAVE_HW_BREAKPOINT=y -CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y -CONFIG_HAVE_USER_RETURN_NOTIFIER=y -CONFIG_HAVE_PERF_EVENTS_NMI=y -CONFIG_HAVE_PERF_REGS=y -CONFIG_HAVE_PERF_USER_STACK_DUMP=y -CONFIG_HAVE_ARCH_JUMP_LABEL=y -CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y -CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y -CONFIG_HAVE_CMPXCHG_LOCAL=y -CONFIG_HAVE_CMPXCHG_DOUBLE=y -CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y -CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y -CONFIG_HAVE_ARCH_SECCOMP_FILTER=y -CONFIG_SECCOMP_FILTER=y -CONFIG_HAVE_CC_STACKPROTECTOR=y -# CONFIG_CC_STACKPROTECTOR is not set -CONFIG_CC_STACKPROTECTOR_NONE=y -# CONFIG_CC_STACKPROTECTOR_REGULAR is not set -# CONFIG_CC_STACKPROTECTOR_STRONG is not set -CONFIG_HAVE_CONTEXT_TRACKING=y -CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y -CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y -CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y -CONFIG_HAVE_ARCH_SOFT_DIRTY=y -CONFIG_MODULES_USE_ELF_RELA=y -CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y -CONFIG_OLD_SIGSUSPEND3=y -CONFIG_COMPAT_OLD_SIGACTION=y - -# -# GCOV-based kernel profiling -# -# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set -CONFIG_SLABINFO=y -CONFIG_RT_MUTEXES=y -CONFIG_BASE_SMALL=0 -CONFIG_MODULES=y -# CONFIG_MODULE_FORCE_LOAD is not set -CONFIG_MODULE_UNLOAD=y -# CONFIG_MODULE_FORCE_UNLOAD is not set -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_MODULE_SIG is not set -CONFIG_STOP_MACHINE=y -CONFIG_BLOCK=y -CONFIG_BLK_DEV_BSG=y -CONFIG_BLK_DEV_BSGLIB=y -CONFIG_BLK_DEV_INTEGRITY=y -# CONFIG_BLK_DEV_THROTTLING is not set -# CONFIG_BLK_CMDLINE_PARSER is not set - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_AIX_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_KARMA_PARTITION is not set -CONFIG_EFI_PARTITION=y -# CONFIG_SYSV68_PARTITION is not set -# CONFIG_CMDLINE_PARTITION is not set -CONFIG_BLOCK_COMPAT=y - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_CFQ_GROUP_IOSCHED is not set -CONFIG_IOSCHED_BFQ=y -CONFIG_CGROUP_BFQIO=y -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_BFQ is not set -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_PREEMPT_NOTIFIERS=y -CONFIG_UNINLINE_SPIN_UNLOCK=y -CONFIG_MUTEX_SPIN_ON_OWNER=y -CONFIG_FREEZER=y - -# -# Processor type and features -# -CONFIG_ZONE_DMA=y -CONFIG_SMP=y -CONFIG_X86_MPPARSE=y -# CONFIG_X86_EXTENDED_PLATFORM is not set -# CONFIG_X86_INTEL_LPSS is not set -CONFIG_X86_SUPPORTS_MEMORY_FAILURE=y -CONFIG_SCHED_OMIT_FRAME_POINTER=y -# CONFIG_HYPERVISOR_GUEST is not set -CONFIG_NO_BOOTMEM=y -# CONFIG_MEMTEST is not set -# CONFIG_MK8 is not set -# CONFIG_MPSC is not set -# CONFIG_MCORE2 is not set -# CONFIG_MATOM is not set -CONFIG_GENERIC_CPU=y -CONFIG_X86_INTERNODE_CACHE_SHIFT=6 -CONFIG_X86_L1_CACHE_SHIFT=6 -CONFIG_X86_TSC=y -CONFIG_X86_CMPXCHG64=y -CONFIG_X86_CMOV=y -CONFIG_X86_MINIMUM_CPU_FAMILY=64 -CONFIG_X86_DEBUGCTLMSR=y -CONFIG_CPU_SUP_INTEL=y -CONFIG_CPU_SUP_AMD=y -CONFIG_CPU_SUP_CENTAUR=y -CONFIG_HPET_TIMER=y -CONFIG_HPET_EMULATE_RTC=y -CONFIG_DMI=y -CONFIG_GART_IOMMU=y -# CONFIG_CALGARY_IOMMU is not set -CONFIG_SWIOTLB=y -CONFIG_IOMMU_HELPER=y -# CONFIG_MAXSMP is not set -CONFIG_NR_CPUS=64 -CONFIG_SCHED_SMT=y -CONFIG_SCHED_MC=y -# CONFIG_PREEMPT_NONE is not set -# CONFIG_PREEMPT_VOLUNTARY is not set -CONFIG_PREEMPT=y -CONFIG_PREEMPT_COUNT=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_X86_IO_APIC=y -# CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS is not set -CONFIG_X86_MCE=y -CONFIG_X86_MCE_INTEL=y -CONFIG_X86_MCE_AMD=y -CONFIG_X86_MCE_THRESHOLD=y -# CONFIG_X86_MCE_INJECT is not set -CONFIG_X86_THERMAL_VECTOR=y -CONFIG_I8K=m -CONFIG_MICROCODE=y -CONFIG_MICROCODE_INTEL=y -CONFIG_MICROCODE_AMD=y -CONFIG_MICROCODE_OLD_INTERFACE=y -CONFIG_MICROCODE_INTEL_EARLY=y -CONFIG_MICROCODE_AMD_EARLY=y -CONFIG_MICROCODE_EARLY=y -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -CONFIG_ARCH_PHYS_ADDR_T_64BIT=y -CONFIG_ARCH_DMA_ADDR_T_64BIT=y -CONFIG_DIRECT_GBPAGES=y -# CONFIG_NUMA is not set -CONFIG_ARCH_SPARSEMEM_ENABLE=y -CONFIG_ARCH_SPARSEMEM_DEFAULT=y -CONFIG_ARCH_SELECT_MEMORY_MODEL=y -CONFIG_ARCH_PROC_KCORE_TEXT=y -CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_SPARSEMEM_MANUAL=y -CONFIG_SPARSEMEM=y -CONFIG_HAVE_MEMORY_PRESENT=y -CONFIG_SPARSEMEM_EXTREME=y -CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y -CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER=y -CONFIG_SPARSEMEM_VMEMMAP=y -CONFIG_HAVE_MEMBLOCK=y -CONFIG_HAVE_MEMBLOCK_NODE_MAP=y -CONFIG_ARCH_DISCARD_MEMBLOCK=y -# CONFIG_HAVE_BOOTMEM_INFO_NODE is not set -# CONFIG_MEMORY_HOTPLUG is not set -CONFIG_PAGEFLAGS_EXTENDED=y -CONFIG_SPLIT_PTLOCK_CPUS=4 -CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y -# CONFIG_COMPACTION is not set -CONFIG_PHYS_ADDR_T_64BIT=y -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_BOUNCE=y -CONFIG_NEED_BOUNCE_POOL=y -CONFIG_VIRT_TO_BUS=y -CONFIG_MMU_NOTIFIER=y -CONFIG_KSM=y -CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 -CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y -# CONFIG_MEMORY_FAILURE is not set -# CONFIG_TRANSPARENT_HUGEPAGE is not set -CONFIG_CROSS_MEMORY_ATTACH=y -CONFIG_CLEANCACHE=y -CONFIG_FRONTSWAP=y -# CONFIG_CMA is not set -# CONFIG_ZBUD is not set -# CONFIG_ZSWAP is not set -CONFIG_ZSMALLOC=y -# CONFIG_PGTABLE_MAPPING is not set -CONFIG_GENERIC_EARLY_IOREMAP=y -# CONFIG_X86_CHECK_BIOS_CORRUPTION is not set -CONFIG_X86_RESERVE_LOW=64 -CONFIG_MTRR=y -CONFIG_MTRR_SANITIZER=y -CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0 -CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 -CONFIG_X86_PAT=y -CONFIG_ARCH_USES_PG_UNCACHED=y -CONFIG_ARCH_RANDOM=y -CONFIG_X86_SMAP=y -CONFIG_EFI=y -CONFIG_EFI_STUB=y -CONFIG_EFI_MIXED=y -CONFIG_SECCOMP=y -# CONFIG_HZ_100 is not set -# CONFIG_HZ_250 is not set -# CONFIG_HZ_300 is not set -CONFIG_HZ_1000=y -CONFIG_HZ=1000 -CONFIG_SCHED_HRTICK=y -CONFIG_KEXEC=y -# CONFIG_CRASH_DUMP is not set -# CONFIG_KEXEC_JUMP is not set -CONFIG_PHYSICAL_START=0x1000000 -CONFIG_RELOCATABLE=y -CONFIG_PHYSICAL_ALIGN=0x1000000 -CONFIG_HOTPLUG_CPU=y -# CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set -# CONFIG_DEBUG_HOTPLUG_CPU0 is not set -CONFIG_COMPAT_VDSO=y -# CONFIG_CMDLINE_BOOL is not set -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y - -# -# Power management and ACPI options -# -CONFIG_ARCH_HIBERNATION_HEADER=y -CONFIG_SUSPEND=y -CONFIG_SUSPEND_FREEZER=y -CONFIG_HIBERNATE_CALLBACKS=y -CONFIG_HIBERNATION=y -CONFIG_PM_STD_PARTITION="" -CONFIG_TOI_CORE=y - -# -# Image Storage (you need at least one allocator) -# -CONFIG_TOI_FILE=y -CONFIG_TOI_SWAP=y - -# -# General Options -# -CONFIG_TOI_CRYPTO=y -# CONFIG_TOI_USERUI is not set -CONFIG_TOI_DEFAULT_IMAGE_SIZE_LIMIT=-2 -# CONFIG_TOI_KEEP_IMAGE is not set -# CONFIG_TOI_INCREMENTAL is not set - -# -# No incremental image support available without Keep Image support. -# -CONFIG_TOI_REPLACE_SWSUSP=y -CONFIG_TOI_IGNORE_LATE_INITCALL=y -CONFIG_TOI_DEFAULT_WAIT=25 -CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE=2000 -CONFIG_TOI_CHECKSUM=y -CONFIG_TOI=y -CONFIG_TOI_ZRAM_SUPPORT=y -CONFIG_PM_SLEEP=y -CONFIG_PM_SLEEP_SMP=y -# CONFIG_PM_AUTOSLEEP is not set -# CONFIG_PM_WAKELOCKS is not set -CONFIG_PM_RUNTIME=y -CONFIG_PM=y -CONFIG_PM_DEBUG=y -CONFIG_PM_ADVANCED_DEBUG=y -# CONFIG_PM_TEST_SUSPEND is not set -CONFIG_PM_SLEEP_DEBUG=y -# CONFIG_PM_TRACE_RTC is not set -# CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set -CONFIG_ACPI=y -CONFIG_ACPI_SLEEP=y -# CONFIG_ACPI_PROCFS_POWER is not set -# CONFIG_ACPI_EC_DEBUGFS is not set -CONFIG_ACPI_AC=m -CONFIG_ACPI_BATTERY=m -CONFIG_ACPI_BUTTON=m -CONFIG_ACPI_VIDEO=m -CONFIG_ACPI_FAN=m -CONFIG_ACPI_DOCK=y -CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_HOTPLUG_CPU=y -CONFIG_ACPI_PROCESSOR_AGGREGATOR=m -CONFIG_ACPI_THERMAL=m -# CONFIG_ACPI_CUSTOM_DSDT is not set -# CONFIG_ACPI_INITRD_TABLE_OVERRIDE is not set -# CONFIG_ACPI_DEBUG is not set -# CONFIG_ACPI_PCI_SLOT is not set -CONFIG_X86_PM_TIMER=y -CONFIG_ACPI_CONTAINER=y -CONFIG_ACPI_SBS=m -# CONFIG_ACPI_HED is not set -# CONFIG_ACPI_BGRT is not set -# CONFIG_ACPI_REDUCED_HARDWARE_ONLY is not set -# CONFIG_ACPI_APEI is not set -# CONFIG_ACPI_EXTLOG is not set -CONFIG_SFI=y - -# -# CPU Frequency scaling -# -CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_GOV_COMMON=y -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set -# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set -CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y -# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set -CONFIG_CPU_FREQ_GOV_PERFORMANCE=y -CONFIG_CPU_FREQ_GOV_POWERSAVE=m -CONFIG_CPU_FREQ_GOV_USERSPACE=m -CONFIG_CPU_FREQ_GOV_ONDEMAND=y -CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m - -# -# x86 CPU frequency scaling drivers -# -CONFIG_X86_INTEL_PSTATE=y -CONFIG_X86_PCC_CPUFREQ=m -CONFIG_X86_ACPI_CPUFREQ=m -CONFIG_X86_ACPI_CPUFREQ_CPB=y -CONFIG_X86_POWERNOW_K8=m -CONFIG_X86_AMD_FREQ_SENSITIVITY=m -CONFIG_X86_SPEEDSTEP_CENTRINO=m -CONFIG_X86_P4_CLOCKMOD=m - -# -# shared options -# -CONFIG_X86_SPEEDSTEP_LIB=m - -# -# CPU Idle -# -CONFIG_CPU_IDLE=y -# CONFIG_CPU_IDLE_MULTIPLE_DRIVERS is not set -CONFIG_CPU_IDLE_GOV_LADDER=y -CONFIG_CPU_IDLE_GOV_MENU=y -# CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED is not set -# CONFIG_INTEL_IDLE is not set - -# -# Memory power savings -# -# CONFIG_I7300_IDLE is not set - -# -# Bus options (PCI etc.) -# -CONFIG_PCI=y -CONFIG_PCI_DIRECT=y -CONFIG_PCI_MMCONFIG=y -CONFIG_PCI_DOMAINS=y -CONFIG_PCIEPORTBUS=y -# CONFIG_HOTPLUG_PCI_PCIE is not set -CONFIG_PCIEAER=y -CONFIG_PCIE_ECRC=y -# CONFIG_PCIEAER_INJECT is not set -CONFIG_PCIEASPM=y -# CONFIG_PCIEASPM_DEBUG is not set -CONFIG_PCIEASPM_DEFAULT=y -# CONFIG_PCIEASPM_POWERSAVE is not set -# CONFIG_PCIEASPM_PERFORMANCE is not set -CONFIG_PCIE_PME=y -CONFIG_PCI_MSI=y -# CONFIG_PCI_DEBUG is not set -# CONFIG_PCI_REALLOC_ENABLE_AUTO is not set -# CONFIG_PCI_STUB is not set -CONFIG_HT_IRQ=y -# CONFIG_PCI_IOV is not set -# CONFIG_PCI_PRI is not set -# CONFIG_PCI_PASID is not set -CONFIG_PCI_IOAPIC=y -CONFIG_PCI_LABEL=y - -# -# PCI host controller drivers -# -CONFIG_ISA_DMA_API=y -CONFIG_AMD_NB=y -CONFIG_PCCARD=m -CONFIG_PCMCIA=m -CONFIG_PCMCIA_LOAD_CIS=y -CONFIG_CARDBUS=y - -# -# PC-card bridges -# -CONFIG_YENTA=m -CONFIG_YENTA_O2=y -CONFIG_YENTA_RICOH=y -CONFIG_YENTA_TI=y -CONFIG_YENTA_ENE_TUNE=y -CONFIG_YENTA_TOSHIBA=y -CONFIG_PD6729=m -CONFIG_I82092=m -CONFIG_PCCARD_NONSTATIC=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_ACPI=y -CONFIG_HOTPLUG_PCI_ACPI_IBM=m -CONFIG_HOTPLUG_PCI_CPCI=y -CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m -CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m -CONFIG_HOTPLUG_PCI_SHPC=m -# CONFIG_RAPIDIO is not set -# CONFIG_X86_SYSFB is not set - -# -# Executable file formats / Emulations -# -CONFIG_BINFMT_ELF=y -CONFIG_COMPAT_BINFMT_ELF=y -CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_BINFMT_SCRIPT=y -# CONFIG_HAVE_AOUT is not set -CONFIG_BINFMT_MISC=y -CONFIG_COREDUMP=y -CONFIG_IA32_EMULATION=y -# CONFIG_IA32_AOUT is not set -# CONFIG_X86_X32 is not set -CONFIG_COMPAT=y -CONFIG_COMPAT_FOR_U64_ALIGNMENT=y -CONFIG_SYSVIPC_COMPAT=y -CONFIG_KEYS_COMPAT=y -CONFIG_X86_DEV_DMA_OPS=y -CONFIG_NET=y -CONFIG_COMPAT_NETLINK_MESSAGES=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_DIAG is not set -CONFIG_UNIX=y -# CONFIG_UNIX_DIAG is not set -CONFIG_XFRM=y -CONFIG_XFRM_ALGO=m -# CONFIG_XFRM_USER is not set -# CONFIG_XFRM_SUB_POLICY is not set -# CONFIG_XFRM_MIGRATE is not set -# CONFIG_XFRM_STATISTICS is not set -CONFIG_XFRM_IPCOMP=m -CONFIG_NET_KEY=m -# CONFIG_NET_KEY_MIGRATE is not set -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -# CONFIG_IP_FIB_TRIE_STATS is not set -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_CLASSID=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -# CONFIG_NET_IPGRE_DEMUX is not set -CONFIG_NET_IP_TUNNEL=m -CONFIG_IP_MROUTE=y -# CONFIG_IP_MROUTE_MULTIPLE_TABLES is not set -# CONFIG_IP_PIMSM_V1 is not set -# CONFIG_IP_PIMSM_V2 is not set -CONFIG_SYN_COOKIES=y -# CONFIG_NET_IPVTI is not set -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_TUNNEL=m -CONFIG_INET_TUNNEL=m -CONFIG_INET_XFRM_MODE_TRANSPORT=y -CONFIG_INET_XFRM_MODE_TUNNEL=y -CONFIG_INET_XFRM_MODE_BEET=y -CONFIG_INET_LRO=m -# CONFIG_INET_DIAG is not set -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_CUBIC=y -CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set -CONFIG_IPV6=m -CONFIG_IPV6_ROUTER_PREF=y -# CONFIG_IPV6_ROUTE_INFO is not set -# CONFIG_IPV6_OPTIMISTIC_DAD is not set -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_TUNNEL=m -CONFIG_INET6_TUNNEL=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_SIT=m -# CONFIG_IPV6_SIT_6RD is not set -CONFIG_IPV6_NDISC_NODETYPE=y -CONFIG_IPV6_TUNNEL=m -CONFIG_IPV6_GRE=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -# CONFIG_IPV6_MROUTE is not set -# CONFIG_NETWORK_SECMARK is not set -CONFIG_NET_PTP_CLASSIFY=y -# CONFIG_NETWORK_PHY_TIMESTAMPING is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_NETFILTER_ADVANCED=y -CONFIG_BRIDGE_NETFILTER=y - -# -# Core Netfilter Configuration -# -CONFIG_NETFILTER_NETLINK=m -CONFIG_NETFILTER_NETLINK_ACCT=m -CONFIG_NETFILTER_NETLINK_QUEUE=m -CONFIG_NETFILTER_NETLINK_LOG=m -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_MARK=y -CONFIG_NF_CONNTRACK_ZONES=y -CONFIG_NF_CONNTRACK_PROCFS=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_TIMEOUT=y -CONFIG_NF_CONNTRACK_TIMESTAMP=y -CONFIG_NF_CONNTRACK_LABELS=y -CONFIG_NF_CT_PROTO_DCCP=m -CONFIG_NF_CT_PROTO_GRE=m -CONFIG_NF_CT_PROTO_SCTP=m -CONFIG_NF_CT_PROTO_UDPLITE=m -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_BROADCAST=m -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_SNMP=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_CT_NETLINK_HELPER=m -CONFIG_NETFILTER_NETLINK_QUEUE_CT=y -CONFIG_NF_NAT=m -CONFIG_NF_NAT_NEEDED=y -CONFIG_NF_NAT_PROTO_DCCP=m -CONFIG_NF_NAT_PROTO_UDPLITE=m -CONFIG_NF_NAT_PROTO_SCTP=m -CONFIG_NF_NAT_AMANDA=m -CONFIG_NF_NAT_FTP=m -CONFIG_NF_NAT_IRC=m -CONFIG_NF_NAT_SIP=m -CONFIG_NF_NAT_TFTP=m -CONFIG_NETFILTER_SYNPROXY=m -CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NFT_EXTHDR=m -CONFIG_NFT_META=m -CONFIG_NFT_CT=m -CONFIG_NFT_RBTREE=m -CONFIG_NFT_HASH=m -CONFIG_NFT_COUNTER=m -CONFIG_NFT_LOG=m -CONFIG_NFT_LIMIT=m -CONFIG_NFT_NAT=m -CONFIG_NFT_QUEUE=m -CONFIG_NFT_REJECT=m -CONFIG_NFT_REJECT_INET=m -CONFIG_NFT_COMPAT=m -CONFIG_NETFILTER_XTABLES=m - -# -# Xtables combined modules -# -CONFIG_NETFILTER_XT_MARK=m -CONFIG_NETFILTER_XT_CONNMARK=m -CONFIG_NETFILTER_XT_SET=m - -# -# Xtables targets -# -# CONFIG_NETFILTER_XT_TARGET_AUDIT is not set -CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CT=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_HL=m -CONFIG_NETFILTER_XT_TARGET_HMARK=m -CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m -CONFIG_NETFILTER_XT_TARGET_LED=m -CONFIG_NETFILTER_XT_TARGET_LOG=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NETMAP=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m -CONFIG_NETFILTER_XT_TARGET_RATEEST=m -CONFIG_NETFILTER_XT_TARGET_REDIRECT=m -CONFIG_NETFILTER_XT_TARGET_TEE=m -CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m - -# -# Xtables matches -# -CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_CGROUP=m -CONFIG_NETFILTER_XT_MATCH_CLUSTER=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_CPU=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ECN=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_HL=m -CONFIG_NETFILTER_XT_MATCH_IPCOMP=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_L2TP=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_NFACCT=m -CONFIG_NETFILTER_XT_MATCH_OSF=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_SOCKET=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_IP_SET=m -CONFIG_IP_SET_MAX=256 -CONFIG_IP_SET_BITMAP_IP=m -CONFIG_IP_SET_BITMAP_IPMAC=m -CONFIG_IP_SET_BITMAP_PORT=m -CONFIG_IP_SET_HASH_IP=m -CONFIG_IP_SET_HASH_IPMARK=m -CONFIG_IP_SET_HASH_IPPORT=m -CONFIG_IP_SET_HASH_IPPORTIP=m -CONFIG_IP_SET_HASH_IPPORTNET=m -CONFIG_IP_SET_HASH_NETPORTNET=m -CONFIG_IP_SET_HASH_NET=m -CONFIG_IP_SET_HASH_NETNET=m -CONFIG_IP_SET_HASH_NETPORT=m -CONFIG_IP_SET_HASH_NETIFACE=m -CONFIG_IP_SET_LIST_SET=m -# CONFIG_IP_VS is not set - -# -# IP: Netfilter Configuration -# -CONFIG_NF_DEFRAG_IPV4=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_CONNTRACK_PROC_COMPAT=y -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NFT_CHAIN_NAT_IPV4=m -CONFIG_NFT_REJECT_IPV4=m -CONFIG_NF_TABLES_ARP=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_SYNPROXY=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_NF_NAT_IPV4=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_NF_NAT_SNMP_BASIC=m -CONFIG_NF_NAT_PROTO_GRE=m -CONFIG_NF_NAT_PPTP=m -CONFIG_NF_NAT_H323=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m - -# -# IPv6: Netfilter Configuration -# -CONFIG_NF_DEFRAG_IPV6=m -CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_NFT_REJECT_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_SYNPROXY=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_NF_NAT_IPV6=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m -# CONFIG_BRIDGE_NF_EBTABLES is not set -# CONFIG_IP_DCCP is not set -CONFIG_IP_SCTP=m -# CONFIG_SCTP_DBG_OBJCNT is not set -CONFIG_SCTP_DEFAULT_COOKIE_HMAC_MD5=y -# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_SHA1 is not set -# CONFIG_SCTP_DEFAULT_COOKIE_HMAC_NONE is not set -CONFIG_SCTP_COOKIE_HMAC_MD5=y -CONFIG_SCTP_COOKIE_HMAC_SHA1=y -CONFIG_RDS=m -CONFIG_RDS_TCP=m -# CONFIG_RDS_DEBUG is not set -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_L2TP is not set -CONFIG_STP=m -CONFIG_BRIDGE=m -CONFIG_BRIDGE_IGMP_SNOOPING=y -CONFIG_BRIDGE_VLAN_FILTERING=y -CONFIG_HAVE_NET_DSA=y -CONFIG_NET_DSA=m -CONFIG_NET_DSA_TAG_DSA=y -CONFIG_NET_DSA_TAG_EDSA=y -CONFIG_NET_DSA_TAG_TRAILER=y -CONFIG_VLAN_8021Q=m -# CONFIG_VLAN_8021Q_GVRP is not set -# CONFIG_VLAN_8021Q_MVRP is not set -# CONFIG_DECNET is not set -CONFIG_LLC=m -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_PHONET is not set -# CONFIG_IEEE802154 is not set -CONFIG_6LOWPAN_IPHC=m -CONFIG_NET_SCHED=y - -# -# Queueing/Scheduling -# -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_MULTIQ=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFB=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_MQPRIO=m -CONFIG_NET_SCH_CHOKE=m -CONFIG_NET_SCH_QFQ=m -CONFIG_NET_SCH_CODEL=m -CONFIG_NET_SCH_FQ_CODEL=m -CONFIG_NET_SCH_FQ=m -CONFIG_NET_SCH_HHF=m -CONFIG_NET_SCH_PIE=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_SCH_PLUG=m - -# -# Classification -# -CONFIG_NET_CLS=y -CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_CLS_U32_PERF=y -CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_FLOW=m -CONFIG_NET_CLS_CGROUP=m -CONFIG_NET_CLS_BPF=m -CONFIG_NET_EMATCH=y -CONFIG_NET_EMATCH_STACK=32 -CONFIG_NET_EMATCH_CMP=m -CONFIG_NET_EMATCH_NBYTE=m -CONFIG_NET_EMATCH_U32=m -CONFIG_NET_EMATCH_META=m -CONFIG_NET_EMATCH_TEXT=m -CONFIG_NET_EMATCH_IPSET=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=m -CONFIG_NET_ACT_GACT=m -CONFIG_GACT_PROB=y -CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m -CONFIG_NET_ACT_NAT=m -CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_ACT_SIMP=m -CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_ACT_CSUM=m -CONFIG_NET_CLS_IND=y -CONFIG_NET_SCH_FIFO=y -# CONFIG_DCB is not set -CONFIG_DNS_RESOLVER=y -CONFIG_BATMAN_ADV=m -CONFIG_BATMAN_ADV_BLA=y -CONFIG_BATMAN_ADV_DAT=y -CONFIG_BATMAN_ADV_NC=y -CONFIG_BATMAN_ADV_MCAST=y -# CONFIG_BATMAN_ADV_DEBUG is not set -CONFIG_OPENVSWITCH=m -CONFIG_OPENVSWITCH_VXLAN=y -CONFIG_VSOCKETS=m -# CONFIG_NETLINK_MMAP is not set -# CONFIG_NETLINK_DIAG is not set -CONFIG_NET_MPLS_GSO=m -CONFIG_HSR=m -CONFIG_RPS=y -CONFIG_RFS_ACCEL=y -CONFIG_XPS=y -# CONFIG_CGROUP_NET_PRIO is not set -CONFIG_CGROUP_NET_CLASSID=y -CONFIG_NET_RX_BUSY_POLL=y -CONFIG_BQL=y -# CONFIG_BPF_JIT is not set -CONFIG_NET_FLOW_LIMIT=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set -# CONFIG_CAN is not set -# CONFIG_IRDA is not set -CONFIG_BT=m -CONFIG_BT_6LOWPAN=y -CONFIG_BT_RFCOMM=m -CONFIG_BT_RFCOMM_TTY=y -CONFIG_BT_BNEP=m -CONFIG_BT_BNEP_MC_FILTER=y -CONFIG_BT_BNEP_PROTO_FILTER=y -CONFIG_BT_HIDP=m - -# -# Bluetooth device drivers -# -CONFIG_BT_HCIBTUSB=m -# CONFIG_BT_HCIBTSDIO is not set -# CONFIG_BT_HCIUART is not set -CONFIG_BT_HCIBCM203X=m -CONFIG_BT_HCIBPA10X=m -CONFIG_BT_HCIBFUSB=m -CONFIG_BT_HCIDTL1=m -CONFIG_BT_HCIBT3C=m -CONFIG_BT_HCIBLUECARD=m -CONFIG_BT_HCIBTUART=m -# CONFIG_BT_HCIVHCI is not set -CONFIG_BT_MRVL=m -CONFIG_BT_MRVL_SDIO=m -CONFIG_BT_ATH3K=m -# CONFIG_AF_RXRPC is not set -CONFIG_FIB_RULES=y -CONFIG_WIRELESS=y -CONFIG_WIRELESS_EXT=y -CONFIG_WEXT_CORE=y -CONFIG_WEXT_PROC=y -CONFIG_WEXT_SPY=y -CONFIG_WEXT_PRIV=y -CONFIG_CFG80211=m -# CONFIG_NL80211_TESTMODE is not set -# CONFIG_CFG80211_DEVELOPER_WARNINGS is not set -# CONFIG_CFG80211_REG_DEBUG is not set -CONFIG_CFG80211_DEFAULT_PS=y -# CONFIG_CFG80211_INTERNAL_REGDB is not set -CONFIG_CFG80211_WEXT=y -CONFIG_LIB80211=m -CONFIG_LIB80211_CRYPT_WEP=m -CONFIG_LIB80211_CRYPT_CCMP=m -CONFIG_LIB80211_CRYPT_TKIP=m -# CONFIG_LIB80211_DEBUG is not set -CONFIG_MAC80211=m -CONFIG_MAC80211_HAS_RC=y -CONFIG_MAC80211_RC_MINSTREL=y -CONFIG_MAC80211_RC_MINSTREL_HT=y -CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y -CONFIG_MAC80211_RC_DEFAULT="minstrel_ht" -CONFIG_MAC80211_MESH=y -CONFIG_MAC80211_LEDS=y -# CONFIG_MAC80211_MESSAGE_TRACING is not set -# CONFIG_MAC80211_DEBUG_MENU is not set -CONFIG_WIMAX=m -CONFIG_WIMAX_DEBUG_LEVEL=8 -CONFIG_RFKILL=m -CONFIG_RFKILL_LEDS=y -CONFIG_RFKILL_INPUT=y -# CONFIG_NET_9P is not set -# CONFIG_CAIF is not set -# CONFIG_CEPH_LIB is not set -# CONFIG_NFC is not set -CONFIG_HAVE_BPF_JIT=y - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_DEVTMPFS=y -CONFIG_DEVTMPFS_MOUNT=y -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -CONFIG_FW_LOADER=y -CONFIG_FIRMWARE_IN_KERNEL=y -CONFIG_EXTRA_FIRMWARE="" -CONFIG_FW_LOADER_USER_HELPER=y -# CONFIG_DEBUG_DRIVER is not set -# CONFIG_DEBUG_DEVRES is not set -# CONFIG_SYS_HYPERVISOR is not set -# CONFIG_GENERIC_CPU_DEVICES is not set -CONFIG_GENERIC_CPU_AUTOPROBE=y -CONFIG_REGMAP=y -CONFIG_REGMAP_I2C=m -CONFIG_REGMAP_MMIO=y -CONFIG_REGMAP_IRQ=y -CONFIG_DMA_SHARED_BUFFER=y - -# -# Bus devices -# -CONFIG_CONNECTOR=y -CONFIG_PROC_EVENTS=y -# CONFIG_MTD is not set -CONFIG_PARPORT=y -CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y -CONFIG_PARPORT_PC=y -# CONFIG_PARPORT_SERIAL is not set -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_PC_PCMCIA is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_AX88796 is not set -# CONFIG_PARPORT_1284 is not set -CONFIG_PARPORT_NOT_PC=y -CONFIG_PNP=y -CONFIG_PNP_DEBUG_MESSAGES=y - -# -# Protocols -# -CONFIG_PNPACPI=y -CONFIG_BLK_DEV=y -# CONFIG_BLK_DEV_NULL_BLK is not set -CONFIG_BLK_DEV_FD=m -# CONFIG_PARIDE is not set -CONFIG_BLK_DEV_PCIESSD_MTIP32XX=m -CONFIG_ZRAM=m -CONFIG_ZRAM_LZ4_COMPRESS=y -# CONFIG_ZRAM_DEBUG is not set -CONFIG_BLK_CPQ_CISS_DA=m -# CONFIG_CISS_SCSI_TAPE is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -# CONFIG_BLK_DEV_COW_COMMON is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -# CONFIG_BLK_DEV_DRBD is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_NVME is not set -# CONFIG_BLK_DEV_SKD is not set -CONFIG_BLK_DEV_SX8=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=4096 -# CONFIG_BLK_DEV_XIP is not set -CONFIG_CDROM_PKTCDVD=m -CONFIG_CDROM_PKTCDVD_BUFFERS=8 -# CONFIG_CDROM_PKTCDVD_WCACHE is not set -# CONFIG_ATA_OVER_ETH is not set -CONFIG_VIRTIO_BLK=m -# CONFIG_BLK_DEV_HD is not set -# CONFIG_BLK_DEV_RBD is not set -CONFIG_BLK_DEV_RSXX=m - -# -# Misc devices -# -CONFIG_SENSORS_LIS3LV02D=m -# CONFIG_AD525X_DPOT is not set -# CONFIG_DUMMY_IRQ is not set -CONFIG_IBM_ASM=m -CONFIG_PHANTOM=m -CONFIG_SGI_IOC4=m -CONFIG_TIFM_CORE=m -CONFIG_TIFM_7XX1=m -# CONFIG_ICS932S401 is not set -# CONFIG_ATMEL_SSC is not set -CONFIG_ENCLOSURE_SERVICES=m -# CONFIG_HP_ILO is not set -# CONFIG_APDS9802ALS is not set -# CONFIG_ISL29003 is not set -# CONFIG_ISL29020 is not set -# CONFIG_SENSORS_TSL2550 is not set -# CONFIG_SENSORS_BH1780 is not set -# CONFIG_SENSORS_BH1770 is not set -# CONFIG_SENSORS_APDS990X is not set -# CONFIG_HMC6352 is not set -# CONFIG_DS1682 is not set -# CONFIG_BMP085_I2C is not set -# CONFIG_PCH_PHUB is not set -# CONFIG_USB_SWITCH_FSA9480 is not set -# CONFIG_SRAM is not set -# CONFIG_C2PORT is not set - -# -# EEPROM support -# -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set -# CONFIG_EEPROM_MAX6875 is not set -CONFIG_EEPROM_93CX6=m -CONFIG_CB710_CORE=m -# CONFIG_CB710_DEBUG is not set -CONFIG_CB710_DEBUG_ASSUMPTIONS=y - -# -# Texas Instruments shared transport line discipline -# -CONFIG_SENSORS_LIS3_I2C=m - -# -# Altera FPGA firmware download module -# -CONFIG_ALTERA_STAPL=m -CONFIG_INTEL_MEI=m -CONFIG_INTEL_MEI_ME=m -CONFIG_INTEL_MEI_TXE=m -# CONFIG_VMWARE_VMCI is not set - -# -# Intel MIC Host Driver -# -# CONFIG_INTEL_MIC_HOST is not set - -# -# Intel MIC Card Driver -# -# CONFIG_INTEL_MIC_CARD is not set -CONFIG_GENWQE=m -# CONFIG_ECHO is not set -CONFIG_HAVE_IDE=y -# CONFIG_IDE is not set - -# -# SCSI device support -# -CONFIG_SCSI_MOD=y -CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y -CONFIG_SCSI_DMA=y -CONFIG_SCSI_TGT=m -CONFIG_SCSI_NETLINK=y -CONFIG_SCSI_PROC_FS=y - -# -# SCSI support type (disk, tape, CD-ROM) -# -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=m -# CONFIG_CHR_DEV_OSST is not set -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=m -# CONFIG_CHR_DEV_SCH is not set -# CONFIG_SCSI_ENCLOSURE is not set -CONFIG_SCSI_MULTI_LUN=y -# CONFIG_SCSI_CONSTANTS is not set -# CONFIG_SCSI_LOGGING is not set -# CONFIG_SCSI_SCAN_ASYNC is not set - -# -# SCSI Transports -# -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=m -# CONFIG_SCSI_FC_TGT_ATTRS is not set -CONFIG_SCSI_ISCSI_ATTRS=m -CONFIG_SCSI_SAS_ATTRS=m -CONFIG_SCSI_SAS_LIBSAS=m -# CONFIG_SCSI_SAS_ATA is not set -CONFIG_SCSI_SAS_HOST_SMP=y -# CONFIG_SCSI_SRP_ATTRS is not set -CONFIG_SCSI_LOWLEVEL=y -CONFIG_ISCSI_TCP=m -CONFIG_ISCSI_BOOT_SYSFS=m -CONFIG_SCSI_CXGB3_ISCSI=m -CONFIG_SCSI_CXGB4_ISCSI=m -CONFIG_SCSI_BNX2_ISCSI=m -CONFIG_SCSI_BNX2X_FCOE=m -CONFIG_BE2ISCSI=m -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_HPSA=m -CONFIG_SCSI_3W_9XXX=m -CONFIG_SCSI_3W_SAS=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_RESET_DELAY_MS=5000 -# CONFIG_AIC7XXX_DEBUG_ENABLE is not set -CONFIG_AIC7XXX_DEBUG_MASK=0 -# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=32 -CONFIG_AIC79XX_RESET_DELAY_MS=5000 -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_AIC79XX_REG_PRETTY_PRINT=y -CONFIG_SCSI_AIC94XX=m -# CONFIG_AIC94XX_DEBUG is not set -CONFIG_SCSI_MVSAS=m -CONFIG_SCSI_MVSAS_DEBUG=y -# CONFIG_SCSI_MVSAS_TASKLET is not set -CONFIG_SCSI_MVUMI=m -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_ARCMSR=m -CONFIG_SCSI_ESAS2R=m -CONFIG_MEGARAID_NEWGEN=y -CONFIG_MEGARAID_MM=m -CONFIG_MEGARAID_MAILBOX=m -CONFIG_MEGARAID_LEGACY=m -CONFIG_MEGARAID_SAS=m -CONFIG_SCSI_MPT2SAS=m -CONFIG_SCSI_MPT2SAS_MAX_SGE=128 -CONFIG_SCSI_MPT2SAS_LOGGING=y -CONFIG_SCSI_MPT3SAS=m -CONFIG_SCSI_MPT3SAS_MAX_SGE=128 -CONFIG_SCSI_MPT3SAS_LOGGING=y -CONFIG_SCSI_UFSHCD=m -CONFIG_SCSI_UFSHCD_PCI=m -CONFIG_SCSI_UFSHCD_PLATFORM=m -CONFIG_SCSI_HPTIOP=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_FLASHPOINT is not set -CONFIG_VMWARE_PVSCSI=m -CONFIG_LIBFC=m -CONFIG_LIBFCOE=m -CONFIG_FCOE=m -CONFIG_FCOE_FNIC=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -CONFIG_SCSI_EATA_LINKED_COMMANDS=y -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_ISCI=m -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -CONFIG_SCSI_IZIP_EPP16=y -CONFIG_SCSI_IZIP_SLOW_CTR=y -CONFIG_SCSI_STEX=m -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -CONFIG_SCSI_SYM53C8XX_MMIO=y -CONFIG_SCSI_IPR=m -CONFIG_SCSI_IPR_TRACE=y -CONFIG_SCSI_IPR_DUMP=y -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_QLA_FC=m -CONFIG_SCSI_QLA_ISCSI=m -CONFIG_SCSI_LPFC=m -CONFIG_SCSI_DC395x=m -CONFIG_SCSI_DC390T=m -CONFIG_SCSI_DEBUG=m -CONFIG_SCSI_PMCRAID=m -CONFIG_SCSI_PM8001=m -CONFIG_SCSI_SRP=m -CONFIG_SCSI_BFA_FC=m -CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_CHELSIO_FCOE=m -# CONFIG_SCSI_LOWLEVEL_PCMCIA is not set -# CONFIG_SCSI_DH is not set -# CONFIG_SCSI_OSD_INITIATOR is not set -CONFIG_ATA=y -# CONFIG_ATA_NONSTANDARD is not set -# CONFIG_ATA_VERBOSE_ERROR is not set -CONFIG_ATA_ACPI=y -# CONFIG_SATA_ZPODD is not set -CONFIG_SATA_PMP=y - -# -# Controllers with non-SFF native interface -# -CONFIG_SATA_AHCI=y -# CONFIG_SATA_AHCI_PLATFORM is not set -CONFIG_SATA_INIC162X=m -# CONFIG_SATA_ACARD_AHCI is not set -CONFIG_SATA_SIL24=m -CONFIG_ATA_SFF=y - -# -# SFF controllers with custom DMA interface -# -CONFIG_PDC_ADMA=m -CONFIG_SATA_QSTOR=m -CONFIG_SATA_SX4=m -CONFIG_ATA_BMDMA=y - -# -# SATA SFF controllers with BMDMA -# -CONFIG_ATA_PIIX=m -CONFIG_SATA_MV=m -CONFIG_SATA_NV=m -CONFIG_SATA_PROMISE=m -CONFIG_SATA_SIL=m -CONFIG_SATA_SIS=m -CONFIG_SATA_SVW=m -CONFIG_SATA_ULI=m -CONFIG_SATA_VIA=m -CONFIG_SATA_VITESSE=m - -# -# PATA SFF controllers with BMDMA -# -CONFIG_PATA_ALI=m -CONFIG_PATA_AMD=m -CONFIG_PATA_ARTOP=m -CONFIG_PATA_ATIIXP=m -CONFIG_PATA_ATP867X=m -CONFIG_PATA_CMD64X=m -CONFIG_PATA_CYPRESS=m -CONFIG_PATA_EFAR=m -CONFIG_PATA_HPT366=m -CONFIG_PATA_HPT37X=m -CONFIG_PATA_HPT3X2N=m -CONFIG_PATA_HPT3X3=m -CONFIG_PATA_HPT3X3_DMA=y -CONFIG_PATA_IT8213=m -CONFIG_PATA_IT821X=m -CONFIG_PATA_JMICRON=m -CONFIG_PATA_MARVELL=m -CONFIG_PATA_NETCELL=m -CONFIG_PATA_NINJA32=m -CONFIG_PATA_NS87415=m -CONFIG_PATA_OLDPIIX=m -CONFIG_PATA_OPTIDMA=m -CONFIG_PATA_PDC2027X=m -CONFIG_PATA_PDC_OLD=m -CONFIG_PATA_RADISYS=m -CONFIG_PATA_RDC=m -CONFIG_PATA_SCH=m -CONFIG_PATA_SERVERWORKS=m -CONFIG_PATA_SIL680=m -CONFIG_PATA_SIS=m -CONFIG_PATA_TOSHIBA=m -CONFIG_PATA_TRIFLEX=m -CONFIG_PATA_VIA=m -CONFIG_PATA_WINBOND=m - -# -# PIO-only SFF controllers -# -CONFIG_PATA_CMD640_PCI=m -CONFIG_PATA_MPIIX=m -CONFIG_PATA_NS87410=m -CONFIG_PATA_OPTI=m -CONFIG_PATA_PCMCIA=m -CONFIG_PATA_RZ1000=m - -# -# Generic fallback / legacy drivers -# -CONFIG_PATA_ACPI=m -CONFIG_ATA_GENERIC=m -CONFIG_PATA_LEGACY=m -CONFIG_MD=y -CONFIG_BLK_DEV_MD=m -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID10=m -CONFIG_MD_RAID456=m -# CONFIG_MD_MULTIPATH is not set -# CONFIG_MD_FAULTY is not set -CONFIG_BCACHE=m -# CONFIG_BCACHE_DEBUG is not set -# CONFIG_BCACHE_CLOSURES_DEBUG is not set -CONFIG_BLK_DEV_DM_BUILTIN=y -CONFIG_BLK_DEV_DM=m -# CONFIG_DM_DEBUG is not set -CONFIG_DM_BUFIO=m -CONFIG_DM_BIO_PRISON=m -CONFIG_DM_PERSISTENT_DATA=m -# CONFIG_DM_DEBUG_BLOCK_STACK_TRACING is not set -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_CACHE=m -CONFIG_DM_CACHE_MQ=m -CONFIG_DM_CACHE_CLEANER=m -CONFIG_DM_ERA=m -CONFIG_DM_MIRROR=m -# CONFIG_DM_LOG_USERSPACE is not set -CONFIG_DM_RAID=m -# CONFIG_DM_ZERO is not set -# CONFIG_DM_MULTIPATH is not set -# CONFIG_DM_DELAY is not set -# CONFIG_DM_UEVENT is not set -# CONFIG_DM_FLAKEY is not set -# CONFIG_DM_VERITY is not set -# CONFIG_DM_SWITCH is not set -# CONFIG_TARGET_CORE is not set -CONFIG_FUSION=y -CONFIG_FUSION_SPI=m -CONFIG_FUSION_FC=m -CONFIG_FUSION_SAS=m -CONFIG_FUSION_MAX_SGE=128 -CONFIG_FUSION_CTL=m -# CONFIG_FUSION_LOGGING is not set - -# -# IEEE 1394 (FireWire) support -# -CONFIG_FIREWIRE=m -CONFIG_FIREWIRE_OHCI=m -CONFIG_FIREWIRE_SBP2=m -# CONFIG_FIREWIRE_NET is not set -# CONFIG_FIREWIRE_NOSY is not set -# CONFIG_I2O is not set -CONFIG_MACINTOSH_DRIVERS=y -CONFIG_MAC_EMUMOUSEBTN=y -CONFIG_NETDEVICES=y -CONFIG_MII=m -CONFIG_NET_CORE=y -CONFIG_BONDING=m -CONFIG_DUMMY=m -# CONFIG_EQUALIZER is not set -# CONFIG_NET_FC is not set -CONFIG_IFB=m -CONFIG_NET_TEAM=m -CONFIG_NET_TEAM_MODE_BROADCAST=m -CONFIG_NET_TEAM_MODE_ROUNDROBIN=m -CONFIG_NET_TEAM_MODE_RANDOM=m -CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=m -CONFIG_NET_TEAM_MODE_LOADBALANCE=m -# CONFIG_MACVLAN is not set -CONFIG_VXLAN=m -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -CONFIG_TUN=m -# CONFIG_VETH is not set -CONFIG_VIRTIO_NET=m -CONFIG_NLMON=m -CONFIG_SUNGEM_PHY=m -# CONFIG_ARCNET is not set - -# -# CAIF transport drivers -# -CONFIG_VHOST_NET=m -CONFIG_VHOST_RING=m -CONFIG_VHOST=m - -# -# Distributed Switch Architecture drivers -# -CONFIG_NET_DSA_MV88E6XXX=m -CONFIG_NET_DSA_MV88E6060=m -CONFIG_NET_DSA_MV88E6XXX_NEED_PPU=y -CONFIG_NET_DSA_MV88E6131=m -CONFIG_NET_DSA_MV88E6123_61_65=m -CONFIG_ETHERNET=y -CONFIG_MDIO=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_3C589=m -CONFIG_VORTEX=m -CONFIG_TYPHOON=m -CONFIG_NET_VENDOR_ADAPTEC=y -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_NET_VENDOR_ALTEON=y -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_ALTERA_TSE=m -CONFIG_NET_VENDOR_AMD=y -CONFIG_AMD8111_ETH=m -CONFIG_PCNET32=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_NET_VENDOR_ARC=y -CONFIG_NET_VENDOR_ATHEROS=y -CONFIG_ATL2=m -CONFIG_ATL1=m -CONFIG_ATL1E=m -CONFIG_ATL1C=m -CONFIG_ALX=m -CONFIG_NET_VENDOR_BROADCOM=y -CONFIG_B44=m -CONFIG_B44_PCI_AUTOSELECT=y -CONFIG_B44_PCICORE_AUTOSELECT=y -CONFIG_B44_PCI=y -CONFIG_BNX2=m -CONFIG_CNIC=m -CONFIG_TIGON3=m -CONFIG_BNX2X=m -CONFIG_NET_VENDOR_BROCADE=y -CONFIG_BNA=m -CONFIG_NET_CALXEDA_XGMAC=m -CONFIG_NET_VENDOR_CHELSIO=y -CONFIG_CHELSIO_T1=m -CONFIG_CHELSIO_T1_1G=y -CONFIG_CHELSIO_T3=m -CONFIG_CHELSIO_T4=m -CONFIG_CHELSIO_T4VF=m -CONFIG_NET_VENDOR_CISCO=y -CONFIG_ENIC=m -CONFIG_CX_ECAT=m -CONFIG_DNET=m -CONFIG_NET_VENDOR_DEC=y -CONFIG_NET_TULIP=y -CONFIG_DE2104X=m -CONFIG_DE2104X_DSL=0 -CONFIG_TULIP=m -CONFIG_TULIP_MWI=y -CONFIG_TULIP_MMIO=y -CONFIG_TULIP_NAPI=y -CONFIG_TULIP_NAPI_HW_MITIGATION=y -CONFIG_DE4X5=m -CONFIG_WINBOND_840=m -CONFIG_DM9102=m -CONFIG_ULI526X=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_NET_VENDOR_DLINK=y -CONFIG_DL2K=m -CONFIG_SUNDANCE=m -CONFIG_SUNDANCE_MMIO=y -CONFIG_NET_VENDOR_EMULEX=y -CONFIG_BE2NET=m -CONFIG_BE2NET_VXLAN=y -CONFIG_NET_VENDOR_EXAR=y -CONFIG_S2IO=m -CONFIG_VXGE=m -# CONFIG_VXGE_DEBUG_TRACE_ALL is not set -CONFIG_NET_VENDOR_FUJITSU=y -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_NET_VENDOR_HP=y -CONFIG_HP100=m -CONFIG_NET_VENDOR_INTEL=y -CONFIG_E100=m -CONFIG_E1000=m -CONFIG_E1000E=m -CONFIG_IGB=m -CONFIG_IGB_HWMON=y -CONFIG_IGB_DCA=y -CONFIG_IGBVF=m -CONFIG_IXGB=m -CONFIG_IXGBE=m -CONFIG_IXGBE_HWMON=y -CONFIG_IXGBE_DCA=y -CONFIG_IXGBEVF=m -CONFIG_I40E=m -# CONFIG_I40E_VXLAN is not set -CONFIG_I40EVF=m -CONFIG_NET_VENDOR_I825XX=y -CONFIG_IP1000=m -CONFIG_JME=m -CONFIG_NET_VENDOR_MARVELL=y -CONFIG_MVMDIO=m -CONFIG_SKGE=m -CONFIG_SKGE_GENESIS=y -CONFIG_SKY2=m -CONFIG_NET_VENDOR_MELLANOX=y -CONFIG_MLX4_EN=m -CONFIG_MLX4_EN_VXLAN=y -CONFIG_MLX4_CORE=m -CONFIG_MLX4_DEBUG=y -# CONFIG_MLX5_CORE is not set -CONFIG_NET_VENDOR_MICREL=y -CONFIG_KS8842=m -CONFIG_KS8851_MLL=m -CONFIG_KSZ884X_PCI=m -CONFIG_NET_VENDOR_MYRI=y -CONFIG_MYRI10GE=m -CONFIG_MYRI10GE_DCA=y -CONFIG_FEALNX=m -CONFIG_NET_VENDOR_NATSEMI=y -CONFIG_NATSEMI=m -CONFIG_NS83820=m -CONFIG_NET_VENDOR_8390=y -CONFIG_PCMCIA_AXNET=m -CONFIG_NE2K_PCI=m -CONFIG_PCMCIA_PCNET=m -CONFIG_NET_VENDOR_NVIDIA=y -CONFIG_FORCEDETH=m -CONFIG_NET_VENDOR_OKI=y -CONFIG_PCH_GBE=m -CONFIG_ETHOC=m -CONFIG_NET_PACKET_ENGINE=y -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_NET_VENDOR_QLOGIC=y -CONFIG_QLA3XXX=m -CONFIG_QLCNIC=m -# CONFIG_QLCNIC_VXLAN is not set -CONFIG_QLGE=m -CONFIG_NETXEN_NIC=m -CONFIG_NET_VENDOR_REALTEK=y -CONFIG_ATP=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -CONFIG_8139TOO_PIO=y -CONFIG_8139TOO_TUNE_TWISTER=y -CONFIG_8139TOO_8129=y -CONFIG_8139_OLD_RX_RESET=y -CONFIG_R8169=m -CONFIG_SH_ETH=m -CONFIG_NET_VENDOR_RDC=y -CONFIG_R6040=m -CONFIG_NET_VENDOR_SAMSUNG=y -CONFIG_SXGBE_ETH=m -CONFIG_NET_VENDOR_SEEQ=y -CONFIG_NET_VENDOR_SILAN=y -CONFIG_SC92031=m -CONFIG_NET_VENDOR_SIS=y -CONFIG_SIS900=m -CONFIG_SIS190=m -CONFIG_SFC=m -CONFIG_SFC_MCDI_MON=y -CONFIG_NET_VENDOR_SMSC=y -CONFIG_PCMCIA_SMC91C92=m -CONFIG_EPIC100=m -CONFIG_SMSC911X=m -# CONFIG_SMSC911X_ARCH_HOOKS is not set -CONFIG_SMSC9420=m -CONFIG_NET_VENDOR_STMICRO=y -CONFIG_STMMAC_ETH=m -CONFIG_STMMAC_PLATFORM=y -# CONFIG_STMMAC_PCI is not set -CONFIG_STMMAC_DA=y -CONFIG_NET_VENDOR_SUN=y -CONFIG_HAPPYMEAL=m -CONFIG_SUNGEM=m -CONFIG_CASSINI=m -# CONFIG_NIU is not set -CONFIG_NET_VENDOR_TEHUTI=y -CONFIG_TEHUTI=m -CONFIG_NET_VENDOR_TI=y -CONFIG_TLAN=m -CONFIG_NET_VENDOR_VIA=y -CONFIG_VIA_RHINE=m -CONFIG_VIA_RHINE_MMIO=y -CONFIG_VIA_VELOCITY=m -CONFIG_NET_VENDOR_WIZNET=y -CONFIG_WIZNET_W5100=m -CONFIG_WIZNET_W5300=m -# CONFIG_WIZNET_BUS_DIRECT is not set -# CONFIG_WIZNET_BUS_INDIRECT is not set -CONFIG_WIZNET_BUS_ANY=y -CONFIG_NET_VENDOR_XIRCOM=y -CONFIG_PCMCIA_XIRC2PS=m -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_NET_SB1000 is not set -CONFIG_PHYLIB=m - -# -# MII PHY device drivers -# -CONFIG_AT803X_PHY=m -# CONFIG_AMD_PHY is not set -# CONFIG_MARVELL_PHY is not set -# CONFIG_DAVICOM_PHY is not set -# CONFIG_QSEMI_PHY is not set -# CONFIG_LXT_PHY is not set -# CONFIG_CICADA_PHY is not set -# CONFIG_VITESSE_PHY is not set -CONFIG_SMSC_PHY=m -CONFIG_BROADCOM_PHY=m -CONFIG_BCM7XXX_PHY=m -CONFIG_BCM87XX_PHY=m -# CONFIG_ICPLUS_PHY is not set -# CONFIG_REALTEK_PHY is not set -# CONFIG_NATIONAL_PHY is not set -# CONFIG_STE10XP is not set -# CONFIG_LSI_ET1011C_PHY is not set -# CONFIG_MICREL_PHY is not set -CONFIG_MDIO_BITBANG=m -# CONFIG_PLIP is not set -CONFIG_PPP=y -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_MPPE=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPPOE=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_SLIP=m -CONFIG_SLHC=y -# CONFIG_SLIP_COMPRESSED is not set -# CONFIG_SLIP_SMART is not set -# CONFIG_SLIP_MODE_SLIP6 is not set - -# -# USB Network Adapters -# -CONFIG_USB_CATC=m -CONFIG_USB_KAWETH=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_RTL8152=m -CONFIG_USB_USBNET=m -CONFIG_USB_NET_AX8817X=m -CONFIG_USB_NET_AX88179_178A=m -CONFIG_USB_NET_CDCETHER=m -CONFIG_USB_NET_CDC_EEM=m -CONFIG_USB_NET_CDC_NCM=m -CONFIG_USB_NET_HUAWEI_CDC_NCM=m -CONFIG_USB_NET_CDC_MBIM=m -CONFIG_USB_NET_DM9601=m -CONFIG_USB_NET_SR9700=m -CONFIG_USB_NET_SR9800=m -CONFIG_USB_NET_SMSC75XX=m -CONFIG_USB_NET_SMSC95XX=m -CONFIG_USB_NET_GL620A=m -CONFIG_USB_NET_NET1080=m -CONFIG_USB_NET_PLUSB=m -CONFIG_USB_NET_MCS7830=m -CONFIG_USB_NET_RNDIS_HOST=m -CONFIG_USB_NET_CDC_SUBSET=m -CONFIG_USB_ALI_M5632=y -CONFIG_USB_AN2720=y -CONFIG_USB_BELKIN=y -CONFIG_USB_ARMLINUX=y -CONFIG_USB_EPSON2888=y -CONFIG_USB_KC2190=y -CONFIG_USB_NET_ZAURUS=m -# CONFIG_USB_NET_CX82310_ETH is not set -CONFIG_USB_NET_KALMIA=m -CONFIG_USB_NET_QMI_WWAN=m -CONFIG_USB_HSO=m -CONFIG_USB_NET_INT51X1=m -CONFIG_USB_IPHETH=m -CONFIG_USB_SIERRA_NET=m -CONFIG_USB_VL600=m -CONFIG_WLAN=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_LIBERTAS_THINFIRM=m -# CONFIG_LIBERTAS_THINFIRM_DEBUG is not set -CONFIG_LIBERTAS_THINFIRM_USB=m -CONFIG_AIRO=m -CONFIG_ATMEL=m -CONFIG_PCI_ATMEL=m -CONFIG_PCMCIA_ATMEL=m -CONFIG_AT76C50X_USB=m -CONFIG_AIRO_CS=m -CONFIG_PCMCIA_WL3501=m -CONFIG_PRISM54=m -CONFIG_USB_ZD1201=m -CONFIG_USB_NET_RNDIS_WLAN=m -CONFIG_RTL8180=m -CONFIG_RTL8187=m -CONFIG_RTL8187_LEDS=y -CONFIG_ADM8211=m -CONFIG_MAC80211_HWSIM=m -CONFIG_MWL8K=m -CONFIG_ATH_COMMON=m -CONFIG_ATH_CARDS=m -# CONFIG_ATH_DEBUG is not set -CONFIG_ATH5K=m -# CONFIG_ATH5K_DEBUG is not set -CONFIG_ATH5K_PCI=y -CONFIG_ATH9K_HW=m -CONFIG_ATH9K_COMMON=m -CONFIG_ATH9K_BTCOEX_SUPPORT=y -CONFIG_ATH9K=m -CONFIG_ATH9K_PCI=y -CONFIG_ATH9K_AHB=y -# CONFIG_ATH9K_WOW is not set -CONFIG_ATH9K_RFKILL=y -CONFIG_ATH9K_HTC=m -# CONFIG_CARL9170 is not set -CONFIG_ATH6KL=m -CONFIG_ATH6KL_SDIO=m -CONFIG_ATH6KL_USB=m -# CONFIG_ATH6KL_DEBUG is not set -CONFIG_AR5523=m -CONFIG_WIL6210=m -CONFIG_WIL6210_ISR_COR=y -CONFIG_ATH10K=m -CONFIG_ATH10K_PCI=m -# CONFIG_ATH10K_DEBUG is not set -# CONFIG_ATH10K_DEBUGFS is not set -CONFIG_WCN36XX=m -# CONFIG_WCN36XX_DEBUGFS is not set -CONFIG_B43=m -CONFIG_B43_BCMA=y -CONFIG_B43_SSB=y -CONFIG_B43_PCI_AUTOSELECT=y -CONFIG_B43_PCICORE_AUTOSELECT=y -CONFIG_B43_PCMCIA=y -CONFIG_B43_SDIO=y -CONFIG_B43_BCMA_PIO=y -CONFIG_B43_PIO=y -CONFIG_B43_PHY_N=y -CONFIG_B43_PHY_LP=y -CONFIG_B43_PHY_HT=y -CONFIG_B43_LEDS=y -CONFIG_B43_HWRNG=y -# CONFIG_B43_DEBUG is not set -CONFIG_B43LEGACY=m -CONFIG_B43LEGACY_PCI_AUTOSELECT=y -CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y -CONFIG_B43LEGACY_LEDS=y -CONFIG_B43LEGACY_HWRNG=y -CONFIG_B43LEGACY_DEBUG=y -CONFIG_B43LEGACY_DMA=y -CONFIG_B43LEGACY_PIO=y -CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y -# CONFIG_B43LEGACY_DMA_MODE is not set -# CONFIG_B43LEGACY_PIO_MODE is not set -CONFIG_BRCMUTIL=m -CONFIG_BRCMSMAC=m -CONFIG_BRCMFMAC=m -CONFIG_BRCMFMAC_SDIO=y -CONFIG_BRCMFMAC_USB=y -# CONFIG_BRCM_TRACING is not set -# CONFIG_BRCMDBG is not set -CONFIG_HOSTAP=m -CONFIG_HOSTAP_FIRMWARE=y -CONFIG_HOSTAP_FIRMWARE_NVRAM=y -CONFIG_HOSTAP_PLX=m -CONFIG_HOSTAP_PCI=m -CONFIG_HOSTAP_CS=m -CONFIG_IPW2100=m -CONFIG_IPW2100_MONITOR=y -# CONFIG_IPW2100_DEBUG is not set -CONFIG_IPW2200=m -CONFIG_IPW2200_MONITOR=y -CONFIG_IPW2200_RADIOTAP=y -CONFIG_IPW2200_PROMISCUOUS=y -CONFIG_IPW2200_QOS=y -# CONFIG_IPW2200_DEBUG is not set -CONFIG_LIBIPW=m -# CONFIG_LIBIPW_DEBUG is not set -CONFIG_IWLWIFI=m -CONFIG_IWLDVM=m -CONFIG_IWLMVM=m -CONFIG_IWLWIFI_OPMODE_MODULAR=y -CONFIG_IWLWIFI_BCAST_FILTERING=y - -# -# Debugging Options -# -# CONFIG_IWLWIFI_DEBUG is not set -CONFIG_IWLEGACY=m -CONFIG_IWL4965=m -CONFIG_IWL3945=m - -# -# iwl3945 / iwl4965 Debugging Options -# -# CONFIG_IWLEGACY_DEBUG is not set -CONFIG_LIBERTAS=m -CONFIG_LIBERTAS_USB=m -CONFIG_LIBERTAS_CS=m -CONFIG_LIBERTAS_SDIO=m -# CONFIG_LIBERTAS_DEBUG is not set -# CONFIG_LIBERTAS_MESH is not set -CONFIG_HERMES=m -CONFIG_HERMES_PRISM=y -CONFIG_HERMES_CACHE_FW_ON_INIT=y -CONFIG_PLX_HERMES=m -CONFIG_TMD_HERMES=m -CONFIG_NORTEL_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_PCMCIA_SPECTRUM=m -CONFIG_ORINOCO_USB=m -CONFIG_P54_COMMON=m -CONFIG_P54_USB=m -CONFIG_P54_PCI=m -CONFIG_P54_LEDS=y -CONFIG_RT2X00=m -CONFIG_RT2400PCI=m -CONFIG_RT2500PCI=m -CONFIG_RT61PCI=m -CONFIG_RT2800PCI=m -CONFIG_RT2800PCI_RT33XX=y -CONFIG_RT2800PCI_RT35XX=y -CONFIG_RT2800PCI_RT53XX=y -CONFIG_RT2800PCI_RT3290=y -CONFIG_RT2500USB=m -CONFIG_RT73USB=m -CONFIG_RT2800USB=m -CONFIG_RT2800USB_RT33XX=y -CONFIG_RT2800USB_RT35XX=y -CONFIG_RT2800USB_RT3573=y -CONFIG_RT2800USB_RT53XX=y -CONFIG_RT2800USB_RT55XX=y -CONFIG_RT2800USB_UNKNOWN=y -CONFIG_RT2800_LIB=m -CONFIG_RT2800_LIB_MMIO=m -CONFIG_RT2X00_LIB_MMIO=m -CONFIG_RT2X00_LIB_PCI=m -CONFIG_RT2X00_LIB_USB=m -CONFIG_RT2X00_LIB=m -CONFIG_RT2X00_LIB_FIRMWARE=y -CONFIG_RT2X00_LIB_CRYPTO=y -CONFIG_RT2X00_LIB_LEDS=y -# CONFIG_RT2X00_DEBUG is not set -CONFIG_RTL_CARDS=m -CONFIG_RTL8192CE=m -CONFIG_RTL8192SE=m -CONFIG_RTL8192DE=m -CONFIG_RTL8723AE=m -CONFIG_RTL8723BE=m -CONFIG_RTL8188EE=m -CONFIG_RTL8192CU=m -CONFIG_RTLWIFI=m -CONFIG_RTLWIFI_PCI=m -CONFIG_RTLWIFI_USB=m -# CONFIG_RTLWIFI_DEBUG is not set -CONFIG_RTL8192C_COMMON=m -CONFIG_RTL8723_COMMON=m -CONFIG_RTLBTCOEXIST=m -# CONFIG_WL_TI is not set -CONFIG_ZD1211RW=m -# CONFIG_ZD1211RW_DEBUG is not set -CONFIG_MWIFIEX=m -CONFIG_MWIFIEX_SDIO=m -CONFIG_MWIFIEX_PCIE=m -CONFIG_MWIFIEX_USB=m -CONFIG_CW1200=m -CONFIG_CW1200_WLAN_SDIO=m -CONFIG_RSI_91X=m -# CONFIG_RSI_DEBUGFS is not set -CONFIG_RSI_SDIO=m -CONFIG_RSI_USB=m - -# -# WiMAX Wireless Broadband devices -# -CONFIG_WIMAX_I2400M=m -CONFIG_WIMAX_I2400M_USB=m -CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8 -# CONFIG_WAN is not set -CONFIG_VMXNET3=m -# CONFIG_ISDN is not set - -# -# Input device support -# -CONFIG_INPUT=y -CONFIG_INPUT_FF_MEMLESS=y -CONFIG_INPUT_POLLDEV=m -CONFIG_INPUT_SPARSEKMAP=m -CONFIG_INPUT_MATRIXKMAP=m - -# -# Userland interfaces -# -CONFIG_INPUT_MOUSEDEV=y -CONFIG_INPUT_MOUSEDEV_PSAUX=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -CONFIG_INPUT_KEYBOARD=y -# CONFIG_KEYBOARD_ADP5588 is not set -# CONFIG_KEYBOARD_ADP5589 is not set -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_KEYBOARD_QT1070 is not set -# CONFIG_KEYBOARD_QT2160 is not set -# CONFIG_KEYBOARD_LKKBD is not set -# CONFIG_KEYBOARD_TCA6416 is not set -# CONFIG_KEYBOARD_TCA8418 is not set -# CONFIG_KEYBOARD_LM8323 is not set -# CONFIG_KEYBOARD_LM8333 is not set -# CONFIG_KEYBOARD_MAX7359 is not set -# CONFIG_KEYBOARD_MCS is not set -# CONFIG_KEYBOARD_MPR121 is not set -# CONFIG_KEYBOARD_NEWTON is not set -# CONFIG_KEYBOARD_OPENCORES is not set -# CONFIG_KEYBOARD_STOWAWAY is not set -# CONFIG_KEYBOARD_SUNKBD is not set -# CONFIG_KEYBOARD_XTKBD is not set -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=y -CONFIG_MOUSE_PS2_ALPS=y -CONFIG_MOUSE_PS2_LOGIPS2PP=y -CONFIG_MOUSE_PS2_SYNAPTICS=y -CONFIG_MOUSE_PS2_CYPRESS=y -CONFIG_MOUSE_PS2_LIFEBOOK=y -CONFIG_MOUSE_PS2_TRACKPOINT=y -CONFIG_MOUSE_PS2_ELANTECH=y -CONFIG_MOUSE_PS2_SENTELIC=y -CONFIG_MOUSE_PS2_TOUCHKIT=y -# CONFIG_MOUSE_SERIAL is not set -CONFIG_MOUSE_APPLETOUCH=m -CONFIG_MOUSE_BCM5974=m -CONFIG_MOUSE_CYAPA=m -# CONFIG_MOUSE_VSXXXAA is not set -# CONFIG_MOUSE_SYNAPTICS_I2C is not set -CONFIG_MOUSE_SYNAPTICS_USB=m -CONFIG_INPUT_JOYSTICK=y -CONFIG_JOYSTICK_ANALOG=m -CONFIG_JOYSTICK_A3D=m -CONFIG_JOYSTICK_ADI=m -CONFIG_JOYSTICK_COBRA=m -CONFIG_JOYSTICK_GF2K=m -CONFIG_JOYSTICK_GRIP=m -CONFIG_JOYSTICK_GRIP_MP=m -CONFIG_JOYSTICK_GUILLEMOT=m -CONFIG_JOYSTICK_INTERACT=m -CONFIG_JOYSTICK_SIDEWINDER=m -CONFIG_JOYSTICK_TMDC=m -CONFIG_JOYSTICK_IFORCE=m -CONFIG_JOYSTICK_IFORCE_USB=y -CONFIG_JOYSTICK_IFORCE_232=y -CONFIG_JOYSTICK_WARRIOR=m -CONFIG_JOYSTICK_MAGELLAN=m -CONFIG_JOYSTICK_SPACEORB=m -CONFIG_JOYSTICK_SPACEBALL=m -CONFIG_JOYSTICK_STINGER=m -CONFIG_JOYSTICK_TWIDJOY=m -CONFIG_JOYSTICK_ZHENHUA=m -CONFIG_JOYSTICK_DB9=m -CONFIG_JOYSTICK_GAMECON=m -CONFIG_JOYSTICK_TURBOGRAFX=m -CONFIG_JOYSTICK_AS5011=m -CONFIG_JOYSTICK_JOYDUMP=m -CONFIG_JOYSTICK_XPAD=m -CONFIG_JOYSTICK_XPAD_FF=y -CONFIG_JOYSTICK_XPAD_LEDS=y -# CONFIG_JOYSTICK_WALKERA0701 is not set -CONFIG_INPUT_TABLET=y -CONFIG_TABLET_USB_ACECAD=m -CONFIG_TABLET_USB_AIPTEK=m -CONFIG_TABLET_USB_GTCO=m -# CONFIG_TABLET_USB_HANWANG is not set -CONFIG_TABLET_USB_KBTAB=m -CONFIG_TABLET_USB_WACOM=m -# CONFIG_INPUT_TOUCHSCREEN is not set -CONFIG_INPUT_MISC=y -# CONFIG_INPUT_AD714X is not set -CONFIG_INPUT_BMA150=m -CONFIG_INPUT_PCSPKR=m -# CONFIG_INPUT_MMA8450 is not set -# CONFIG_INPUT_MPU3050 is not set -# CONFIG_INPUT_APANEL is not set -# CONFIG_INPUT_ATLAS_BTNS is not set -# CONFIG_INPUT_ATI_REMOTE2 is not set -# CONFIG_INPUT_KEYSPAN_REMOTE is not set -# CONFIG_INPUT_KXTJ9 is not set -# CONFIG_INPUT_POWERMATE is not set -CONFIG_INPUT_YEALINK=m -# CONFIG_INPUT_CM109 is not set -# CONFIG_INPUT_RETU_PWRBUTTON is not set -CONFIG_INPUT_UINPUT=m -# CONFIG_INPUT_PCF8574 is not set -# CONFIG_INPUT_ADXL34X is not set -# CONFIG_INPUT_IMS_PCU is not set -# CONFIG_INPUT_CMA3000 is not set -# CONFIG_INPUT_IDEAPAD_SLIDEBAR is not set - -# -# Hardware I/O ports -# -CONFIG_SERIO=y -CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y -CONFIG_SERIO_I8042=y -CONFIG_SERIO_SERPORT=m -# CONFIG_SERIO_CT82C710 is not set -# CONFIG_SERIO_PARKBD is not set -# CONFIG_SERIO_PCIPS2 is not set -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set -# CONFIG_SERIO_ALTERA_PS2 is not set -# CONFIG_SERIO_PS2MULT is not set -# CONFIG_SERIO_ARC_PS2 is not set -CONFIG_GAMEPORT=m -# CONFIG_GAMEPORT_NS558 is not set -# CONFIG_GAMEPORT_L4 is not set -CONFIG_GAMEPORT_EMU10K1=m -# CONFIG_GAMEPORT_FM801 is not set - -# -# Character devices -# -CONFIG_TTY=y -CONFIG_VT=y -CONFIG_CONSOLE_TRANSLATIONS=y -CONFIG_VT_CONSOLE=y -CONFIG_VT_CONSOLE_SLEEP=y -CONFIG_HW_CONSOLE=y -CONFIG_VT_HW_CONSOLE_BINDING=y -CONFIG_UNIX98_PTYS=y -# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set -# CONFIG_LEGACY_PTYS is not set -CONFIG_SERIAL_NONSTANDARD=y -# CONFIG_ROCKETPORT is not set -# CONFIG_CYCLADES is not set -# CONFIG_MOXA_INTELLIO is not set -# CONFIG_MOXA_SMARTIO is not set -# CONFIG_SYNCLINK is not set -# CONFIG_SYNCLINKMP is not set -# CONFIG_SYNCLINK_GT is not set -# CONFIG_NOZOMI is not set -# CONFIG_ISI is not set -# CONFIG_N_HDLC is not set -# CONFIG_N_GSM is not set -# CONFIG_TRACE_SINK is not set -CONFIG_DEVKMEM=y - -# -# Serial drivers -# -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_DEPRECATED_OPTIONS=y -CONFIG_SERIAL_8250_PNP=y -# CONFIG_SERIAL_8250_CONSOLE is not set -CONFIG_FIX_EARLYCON_MEM=y -CONFIG_SERIAL_8250_DMA=y -CONFIG_SERIAL_8250_PCI=y -CONFIG_SERIAL_8250_CS=m -CONFIG_SERIAL_8250_NR_UARTS=4 -CONFIG_SERIAL_8250_RUNTIME_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set -# CONFIG_SERIAL_8250_DW is not set - -# -# Non-8250 serial port support -# -# CONFIG_SERIAL_MFD_HSU is not set -CONFIG_SERIAL_CORE=y -# CONFIG_SERIAL_JSM is not set -# CONFIG_SERIAL_SCCNXP is not set -# CONFIG_SERIAL_ALTERA_JTAGUART is not set -# CONFIG_SERIAL_ALTERA_UART is not set -# CONFIG_SERIAL_PCH_UART is not set -# CONFIG_SERIAL_ARC is not set -# CONFIG_SERIAL_RP2 is not set -# CONFIG_SERIAL_FSL_LPUART is not set -CONFIG_PRINTER=y -# CONFIG_LP_CONSOLE is not set -# CONFIG_PPDEV is not set -# CONFIG_VIRTIO_CONSOLE is not set -# CONFIG_IPMI_HANDLER is not set -CONFIG_HW_RANDOM=m -# CONFIG_HW_RANDOM_TIMERIOMEM is not set -CONFIG_HW_RANDOM_INTEL=m -CONFIG_HW_RANDOM_AMD=m -CONFIG_HW_RANDOM_VIA=m -# CONFIG_HW_RANDOM_VIRTIO is not set -CONFIG_NVRAM=m -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set - -# -# PCMCIA character devices -# -# CONFIG_SYNCLINK_CS is not set -# CONFIG_CARDMAN_4000 is not set -# CONFIG_CARDMAN_4040 is not set -# CONFIG_IPWIRELESS is not set -CONFIG_MWAVE=m -# CONFIG_RAW_DRIVER is not set -# CONFIG_HPET is not set -# CONFIG_HANGCHECK_TIMER is not set -# CONFIG_TCG_TPM is not set -# CONFIG_TELCLOCK is not set -CONFIG_DEVPORT=y -CONFIG_I2C=m -CONFIG_I2C_BOARDINFO=y -CONFIG_I2C_COMPAT=y -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_MUX=m - -# -# Multiplexer I2C Chip support -# -# CONFIG_I2C_MUX_PCA9541 is not set -# CONFIG_I2C_MUX_PCA954x is not set -CONFIG_I2C_HELPER_AUTO=y -CONFIG_I2C_SMBUS=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_ALGOPCA=m - -# -# I2C Hardware Bus support -# - -# -# PC SMBus host controller drivers -# -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI1563=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_AMD756=m -CONFIG_I2C_AMD756_S4882=m -CONFIG_I2C_AMD8111=m -CONFIG_I2C_I801=m -CONFIG_I2C_ISCH=m -CONFIG_I2C_ISMT=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_NFORCE2=m -CONFIG_I2C_NFORCE2_S4985=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_SIS630=m -CONFIG_I2C_SIS96X=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m - -# -# ACPI drivers -# -CONFIG_I2C_SCMI=m - -# -# I2C system bus drivers (mostly embedded / system-on-chip) -# -# CONFIG_I2C_DESIGNWARE_PLATFORM is not set -# CONFIG_I2C_DESIGNWARE_PCI is not set -CONFIG_I2C_EG20T=m -CONFIG_I2C_OCORES=m -CONFIG_I2C_PCA_PLATFORM=m -# CONFIG_I2C_PXA_PCI is not set -CONFIG_I2C_SIMTEC=m -CONFIG_I2C_XILINX=m - -# -# External I2C/SMBus adapter drivers -# -CONFIG_I2C_DIOLAN_U2C=m -CONFIG_I2C_PARPORT=m -CONFIG_I2C_PARPORT_LIGHT=m -# CONFIG_I2C_ROBOTFUZZ_OSIF is not set -CONFIG_I2C_TAOS_EVM=m -CONFIG_I2C_TINY_USB=m -CONFIG_I2C_VIPERBOARD=m - -# -# Other I2C/SMBus bus drivers -# -CONFIG_I2C_STUB=m -# CONFIG_I2C_DEBUG_CORE is not set -# CONFIG_I2C_DEBUG_ALGO is not set -# CONFIG_I2C_DEBUG_BUS is not set -# CONFIG_SPI is not set -CONFIG_SPMI=m -# CONFIG_HSI is not set - -# -# PPS support -# -CONFIG_PPS=m -# CONFIG_PPS_DEBUG is not set - -# -# PPS clients support -# -# CONFIG_PPS_CLIENT_KTIMER is not set -# CONFIG_PPS_CLIENT_LDISC is not set -# CONFIG_PPS_CLIENT_PARPORT is not set -# CONFIG_PPS_CLIENT_GPIO is not set - -# -# PPS generators support -# - -# -# PTP clock support -# -CONFIG_PTP_1588_CLOCK=m - -# -# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. -# -CONFIG_PTP_1588_CLOCK_PCH=m -CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y -# CONFIG_GPIOLIB is not set -# CONFIG_W1 is not set -CONFIG_POWER_SUPPLY=y -# CONFIG_POWER_SUPPLY_DEBUG is not set -# CONFIG_PDA_POWER is not set -# CONFIG_TEST_POWER is not set -# CONFIG_BATTERY_DS2780 is not set -# CONFIG_BATTERY_DS2781 is not set -# CONFIG_BATTERY_DS2782 is not set -# CONFIG_BATTERY_SBS is not set -# CONFIG_BATTERY_BQ27x00 is not set -# CONFIG_BATTERY_MAX17040 is not set -# CONFIG_BATTERY_MAX17042 is not set -CONFIG_CHARGER_ISP1704=m -# CONFIG_CHARGER_MAX8903 is not set -# CONFIG_CHARGER_LP8727 is not set -# CONFIG_CHARGER_BQ2415X is not set -# CONFIG_CHARGER_SMB347 is not set -# CONFIG_POWER_RESET is not set -# CONFIG_POWER_AVS is not set -CONFIG_HWMON=m -CONFIG_HWMON_VID=m -# CONFIG_HWMON_DEBUG_CHIP is not set - -# -# Native drivers -# -CONFIG_SENSORS_ABITUGURU=m -CONFIG_SENSORS_ABITUGURU3=m -CONFIG_SENSORS_AD7414=m -CONFIG_SENSORS_AD7418=m -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM1026=m -CONFIG_SENSORS_ADM1029=m -CONFIG_SENSORS_ADM1031=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_ADT7X10=m -CONFIG_SENSORS_ADT7410=m -CONFIG_SENSORS_ADT7411=m -CONFIG_SENSORS_ADT7462=m -CONFIG_SENSORS_ADT7470=m -CONFIG_SENSORS_ADT7475=m -CONFIG_SENSORS_ASC7621=m -CONFIG_SENSORS_K8TEMP=m -CONFIG_SENSORS_K10TEMP=m -CONFIG_SENSORS_FAM15H_POWER=m -CONFIG_SENSORS_APPLESMC=m -CONFIG_SENSORS_ASB100=m -CONFIG_SENSORS_ATXP1=m -CONFIG_SENSORS_DS620=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_I5K_AMB=m -CONFIG_SENSORS_F71805F=m -CONFIG_SENSORS_F71882FG=m -CONFIG_SENSORS_F75375S=m -CONFIG_SENSORS_FSCHMD=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_G760A=m -CONFIG_SENSORS_G762=m -CONFIG_SENSORS_HIH6130=m -CONFIG_SENSORS_CORETEMP=m -CONFIG_SENSORS_IT87=m -# CONFIG_SENSORS_JC42 is not set -CONFIG_SENSORS_LINEAGE=m -CONFIG_SENSORS_LTC2945=m -CONFIG_SENSORS_LTC4151=m -CONFIG_SENSORS_LTC4215=m -CONFIG_SENSORS_LTC4222=m -CONFIG_SENSORS_LTC4245=m -CONFIG_SENSORS_LTC4260=m -CONFIG_SENSORS_LTC4261=m -CONFIG_SENSORS_MAX16065=m -CONFIG_SENSORS_MAX1619=m -CONFIG_SENSORS_MAX1668=m -CONFIG_SENSORS_MAX197=m -CONFIG_SENSORS_MAX6639=m -CONFIG_SENSORS_MAX6642=m -CONFIG_SENSORS_MAX6650=m -CONFIG_SENSORS_MAX6697=m -CONFIG_SENSORS_HTU21=m -CONFIG_SENSORS_MCP3021=m -CONFIG_SENSORS_LM63=m -CONFIG_SENSORS_LM73=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM77=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM83=m -CONFIG_SENSORS_LM85=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM90=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_LM93=m -CONFIG_SENSORS_LM95234=m -CONFIG_SENSORS_LM95241=m -CONFIG_SENSORS_LM95245=m -CONFIG_SENSORS_PC87360=m -CONFIG_SENSORS_PC87427=m -CONFIG_SENSORS_NTC_THERMISTOR=m -CONFIG_SENSORS_NCT6775=m -CONFIG_SENSORS_PCF8591=m -CONFIG_PMBUS=m -CONFIG_SENSORS_PMBUS=m -CONFIG_SENSORS_ADM1275=m -CONFIG_SENSORS_LM25066=m -CONFIG_SENSORS_LTC2978=m -CONFIG_SENSORS_MAX16064=m -CONFIG_SENSORS_MAX34440=m -CONFIG_SENSORS_MAX8688=m -CONFIG_SENSORS_UCD9000=m -CONFIG_SENSORS_UCD9200=m -CONFIG_SENSORS_ZL6100=m -CONFIG_SENSORS_SHT21=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_DME1737=m -CONFIG_SENSORS_EMC1403=m -CONFIG_SENSORS_EMC2103=m -CONFIG_SENSORS_EMC6W201=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_SMSC47M192=m -CONFIG_SENSORS_SMSC47B397=m -CONFIG_SENSORS_SCH56XX_COMMON=m -CONFIG_SENSORS_SCH5627=m -CONFIG_SENSORS_SCH5636=m -CONFIG_SENSORS_SMM665=m -CONFIG_SENSORS_ADC128D818=m -CONFIG_SENSORS_ADS1015=m -CONFIG_SENSORS_ADS7828=m -CONFIG_SENSORS_AMC6821=m -CONFIG_SENSORS_INA209=m -CONFIG_SENSORS_INA2XX=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_TMP102=m -CONFIG_SENSORS_TMP401=m -CONFIG_SENSORS_TMP421=m -CONFIG_SENSORS_VIA_CPUTEMP=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_W83791D=m -CONFIG_SENSORS_W83792D=m -CONFIG_SENSORS_W83793=m -CONFIG_SENSORS_W83795=m -CONFIG_SENSORS_W83795_FANCTRL=y -CONFIG_SENSORS_W83L785TS=m -CONFIG_SENSORS_W83L786NG=m -CONFIG_SENSORS_W83627HF=m -CONFIG_SENSORS_W83627EHF=m - -# -# ACPI drivers -# -CONFIG_SENSORS_ACPI_POWER=m -CONFIG_SENSORS_ATK0110=m -CONFIG_THERMAL=y -CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y -# CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set -# CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set -# CONFIG_THERMAL_GOV_FAIR_SHARE is not set -CONFIG_THERMAL_GOV_STEP_WISE=y -CONFIG_THERMAL_GOV_USER_SPACE=y -# CONFIG_THERMAL_EMULATION is not set -CONFIG_INTEL_POWERCLAMP=m -CONFIG_X86_PKG_TEMP_THERMAL=m -CONFIG_ACPI_INT3403_THERMAL=m - -# -# Texas Instruments thermal drivers -# -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_CORE=y -# CONFIG_WATCHDOG_NOWAYOUT is not set - -# -# Watchdog Device Drivers -# -CONFIG_SOFT_WATCHDOG=m -CONFIG_XILINX_WATCHDOG=m -CONFIG_DW_WATCHDOG=m -CONFIG_RETU_WATCHDOG=m -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM1535_WDT=m -CONFIG_ALIM7101_WDT=m -# CONFIG_F71808E_WDT is not set -CONFIG_SP5100_TCO=m -CONFIG_SBC_FITPC2_WATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_IBMASR=m -CONFIG_WAFER_WDT=m -CONFIG_I6300ESB_WDT=m -CONFIG_IE6XX_WDT=m -CONFIG_ITCO_WDT=m -CONFIG_ITCO_VENDOR_SUPPORT=y -CONFIG_IT8712F_WDT=m -CONFIG_IT87_WDT=m -CONFIG_HP_WATCHDOG=m -# CONFIG_HPWDT_NMI_DECODING is not set -CONFIG_SC1200_WDT=m -CONFIG_PC87413_WDT=m -CONFIG_NV_TCO=m -CONFIG_60XX_WDT=m -CONFIG_SBC8360_WDT=m -CONFIG_CPU5_WDT=m -CONFIG_SMSC_SCH311X_WDT=m -CONFIG_SMSC37B787_WDT=m -CONFIG_VIA_WDT=m -CONFIG_W83627HF_WDT=m -CONFIG_W83697HF_WDT=m -CONFIG_W83697UG_WDT=m -CONFIG_W83877F_WDT=m -CONFIG_W83977F_WDT=m -CONFIG_MACHZ_WDT=m -CONFIG_SBC_EPX_C3_WATCHDOG=m - -# -# PCI-based Watchdog Cards -# -CONFIG_PCIPCWATCHDOG=m -CONFIG_WDTPCI=m - -# -# USB-based Watchdog Cards -# -CONFIG_USBPCWATCHDOG=m -CONFIG_SSB_POSSIBLE=y - -# -# Sonics Silicon Backplane -# -CONFIG_SSB=m -CONFIG_SSB_SPROM=y -CONFIG_SSB_BLOCKIO=y -CONFIG_SSB_PCIHOST_POSSIBLE=y -CONFIG_SSB_PCIHOST=y -CONFIG_SSB_B43_PCI_BRIDGE=y -CONFIG_SSB_PCMCIAHOST_POSSIBLE=y -CONFIG_SSB_PCMCIAHOST=y -CONFIG_SSB_SDIOHOST_POSSIBLE=y -CONFIG_SSB_SDIOHOST=y -# CONFIG_SSB_DEBUG is not set -CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y -CONFIG_SSB_DRIVER_PCICORE=y -CONFIG_BCMA_POSSIBLE=y - -# -# Broadcom specific AMBA -# -CONFIG_BCMA=m -CONFIG_BCMA_BLOCKIO=y -CONFIG_BCMA_HOST_PCI_POSSIBLE=y -CONFIG_BCMA_HOST_PCI=y -CONFIG_BCMA_HOST_SOC=y -# CONFIG_BCMA_DRIVER_GMAC_CMN is not set -# CONFIG_BCMA_DEBUG is not set - -# -# Multifunction device drivers -# -CONFIG_MFD_CORE=m -# CONFIG_MFD_CS5535 is not set -CONFIG_MFD_BCM590XX=m -# CONFIG_MFD_CROS_EC is not set -# CONFIG_MFD_MC13XXX_I2C is not set -# CONFIG_HTC_PASIC3 is not set -CONFIG_LPC_ICH=m -CONFIG_LPC_SCH=m -# CONFIG_MFD_JANZ_CMODIO is not set -# CONFIG_MFD_KEMPLD is not set -CONFIG_MFD_VIPERBOARD=m -CONFIG_MFD_RETU=m -# CONFIG_MFD_PCF50633 is not set -# CONFIG_MFD_RDC321X is not set -# CONFIG_MFD_RTSX_PCI is not set -CONFIG_MFD_RTSX_USB=m -# CONFIG_MFD_SI476X_CORE is not set -# CONFIG_MFD_SM501 is not set -# CONFIG_ABX500_CORE is not set -CONFIG_MFD_SYSCON=y -# CONFIG_MFD_TI_AM335X_TSCADC is not set -# CONFIG_MFD_LP3943 is not set -# CONFIG_TPS6105X is not set -# CONFIG_TPS6507X is not set -# CONFIG_MFD_TPS65217 is not set -CONFIG_MFD_TPS65218=m -CONFIG_MFD_WL1273_CORE=m -# CONFIG_MFD_LM3533 is not set -# CONFIG_MFD_TMIO is not set -# CONFIG_MFD_VX855 is not set -# CONFIG_MFD_ARIZONA_I2C is not set -# CONFIG_REGULATOR is not set -CONFIG_MEDIA_SUPPORT=m - -# -# Multimedia core support -# -CONFIG_MEDIA_CAMERA_SUPPORT=y -CONFIG_MEDIA_ANALOG_TV_SUPPORT=y -CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y -# CONFIG_MEDIA_RADIO_SUPPORT is not set -# CONFIG_MEDIA_RC_SUPPORT is not set -# CONFIG_MEDIA_CONTROLLER is not set -CONFIG_VIDEO_DEV=m -CONFIG_VIDEO_V4L2=m -# CONFIG_VIDEO_ADV_DEBUG is not set -# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set -CONFIG_VIDEO_TUNER=m -CONFIG_VIDEOBUF_GEN=m -CONFIG_VIDEOBUF_DMA_SG=m -CONFIG_VIDEOBUF_VMALLOC=m -CONFIG_VIDEOBUF_DVB=m -CONFIG_VIDEOBUF2_CORE=m -CONFIG_VIDEOBUF2_MEMOPS=m -CONFIG_VIDEOBUF2_VMALLOC=m -CONFIG_DVB_CORE=m -CONFIG_DVB_NET=y -# CONFIG_TTPCI_EEPROM is not set -CONFIG_DVB_MAX_ADAPTERS=8 -# CONFIG_DVB_DYNAMIC_MINORS is not set - -# -# Media drivers -# -CONFIG_MEDIA_USB_SUPPORT=y - -# -# Webcam devices -# -CONFIG_USB_VIDEO_CLASS=m -CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y -CONFIG_USB_GSPCA=m -CONFIG_USB_M5602=m -CONFIG_USB_STV06XX=m -CONFIG_USB_GL860=m -CONFIG_USB_GSPCA_BENQ=m -CONFIG_USB_GSPCA_CONEX=m -CONFIG_USB_GSPCA_CPIA1=m -CONFIG_USB_GSPCA_ETOMS=m -CONFIG_USB_GSPCA_FINEPIX=m -CONFIG_USB_GSPCA_JEILINJ=m -CONFIG_USB_GSPCA_JL2005BCD=m -CONFIG_USB_GSPCA_KINECT=m -# CONFIG_USB_GSPCA_KONICA is not set -CONFIG_USB_GSPCA_MARS=m -CONFIG_USB_GSPCA_MR97310A=m -CONFIG_USB_GSPCA_NW80X=m -CONFIG_USB_GSPCA_OV519=m -CONFIG_USB_GSPCA_OV534=m -CONFIG_USB_GSPCA_OV534_9=m -CONFIG_USB_GSPCA_PAC207=m -CONFIG_USB_GSPCA_PAC7302=m -CONFIG_USB_GSPCA_PAC7311=m -CONFIG_USB_GSPCA_SE401=m -CONFIG_USB_GSPCA_SN9C2028=m -CONFIG_USB_GSPCA_SN9C20X=m -CONFIG_USB_GSPCA_SONIXB=m -CONFIG_USB_GSPCA_SONIXJ=m -CONFIG_USB_GSPCA_SPCA500=m -CONFIG_USB_GSPCA_SPCA501=m -CONFIG_USB_GSPCA_SPCA505=m -CONFIG_USB_GSPCA_SPCA506=m -CONFIG_USB_GSPCA_SPCA508=m -CONFIG_USB_GSPCA_SPCA561=m -CONFIG_USB_GSPCA_SPCA1528=m -CONFIG_USB_GSPCA_SQ905=m -CONFIG_USB_GSPCA_SQ905C=m -CONFIG_USB_GSPCA_SQ930X=m -CONFIG_USB_GSPCA_STK014=m -CONFIG_USB_GSPCA_STK1135=m -CONFIG_USB_GSPCA_STV0680=m -CONFIG_USB_GSPCA_SUNPLUS=m -CONFIG_USB_GSPCA_T613=m -CONFIG_USB_GSPCA_TOPRO=m -CONFIG_USB_GSPCA_TV8532=m -CONFIG_USB_GSPCA_VC032X=m -CONFIG_USB_GSPCA_VICAM=m -# CONFIG_USB_GSPCA_XIRLINK_CIT is not set -CONFIG_USB_GSPCA_ZC3XX=m -CONFIG_USB_PWC=m -# CONFIG_USB_PWC_DEBUG is not set -CONFIG_USB_PWC_INPUT_EVDEV=y -CONFIG_VIDEO_CPIA2=m -CONFIG_USB_ZR364XX=m -CONFIG_USB_STKWEBCAM=m -CONFIG_USB_S2255=m -CONFIG_VIDEO_USBTV=m - -# -# Analog TV USB devices -# -CONFIG_VIDEO_PVRUSB2=m -CONFIG_VIDEO_PVRUSB2_SYSFS=y -CONFIG_VIDEO_PVRUSB2_DVB=y -# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set -CONFIG_VIDEO_HDPVR=m -CONFIG_VIDEO_USBVISION=m -CONFIG_VIDEO_STK1160_COMMON=m -CONFIG_VIDEO_STK1160_AC97=y -CONFIG_VIDEO_STK1160=m - -# -# Analog/digital TV USB devices -# -CONFIG_VIDEO_AU0828=m -CONFIG_VIDEO_AU0828_V4L2=y - -# -# Digital TV USB devices -# -CONFIG_DVB_USB_V2=m -CONFIG_DVB_USB_AF9015=m -CONFIG_DVB_USB_AF9035=m -CONFIG_DVB_USB_ANYSEE=m -CONFIG_DVB_USB_AU6610=m -CONFIG_DVB_USB_AZ6007=m -CONFIG_DVB_USB_CE6230=m -CONFIG_DVB_USB_EC168=m -CONFIG_DVB_USB_GL861=m -CONFIG_DVB_USB_MXL111SF=m -CONFIG_DVB_USB_RTL28XXU=m -# CONFIG_DVB_TTUSB_BUDGET is not set -# CONFIG_DVB_TTUSB_DEC is not set -# CONFIG_SMS_USB_DRV is not set -# CONFIG_DVB_B2C2_FLEXCOP_USB is not set - -# -# Webcam, TV (analog/digital) USB devices -# -CONFIG_VIDEO_EM28XX=m -CONFIG_VIDEO_EM28XX_V4L2=m -CONFIG_VIDEO_EM28XX_ALSA=m -# CONFIG_VIDEO_EM28XX_DVB is not set -CONFIG_MEDIA_PCI_SUPPORT=y - -# -# Media capture support -# -CONFIG_VIDEO_MEYE=m - -# -# Media capture/analog TV support -# -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_DC30=m -CONFIG_VIDEO_ZORAN_ZR36060=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZORAN_LML33R10=m -CONFIG_VIDEO_ZORAN_AVS6EYES=m -CONFIG_VIDEO_HEXIUM_GEMINI=m -CONFIG_VIDEO_HEXIUM_ORION=m -CONFIG_VIDEO_MXB=m - -# -# Media capture/analog/hybrid TV support -# -# CONFIG_VIDEO_CX25821 is not set -CONFIG_VIDEO_SAA7134=m -CONFIG_VIDEO_SAA7134_ALSA=m -CONFIG_VIDEO_SAA7134_DVB=m -CONFIG_VIDEO_SAA7164=m - -# -# Media digital TV PCI Adapters -# -# CONFIG_DVB_AV7110 is not set -# CONFIG_DVB_BUDGET_CORE is not set -# CONFIG_DVB_B2C2_FLEXCOP_PCI is not set -# CONFIG_DVB_PLUTO2 is not set -# CONFIG_DVB_PT1 is not set -# CONFIG_DVB_NGENE is not set -# CONFIG_DVB_DDBRIDGE is not set -# CONFIG_V4L_PLATFORM_DRIVERS is not set -# CONFIG_V4L_MEM2MEM_DRIVERS is not set -# CONFIG_V4L_TEST_DRIVERS is not set - -# -# Supported MMC/SDIO adapters -# -CONFIG_SMS_SDIO_DRV=m -# CONFIG_MEDIA_PARPORT_SUPPORT is not set - -# -# Supported FireWire (IEEE 1394) Adapters -# -# CONFIG_DVB_FIREDTV is not set -CONFIG_MEDIA_COMMON_OPTIONS=y - -# -# common driver options -# -CONFIG_VIDEO_CX2341X=m -CONFIG_VIDEO_TVEEPROM=m -CONFIG_CYPRESS_FIRMWARE=m -CONFIG_VIDEO_SAA7146=m -CONFIG_VIDEO_SAA7146_VV=m -CONFIG_SMS_SIANO_MDTV=m - -# -# Media ancillary drivers (tuners, sensors, i2c, frontends) -# -CONFIG_MEDIA_SUBDRV_AUTOSELECT=y -CONFIG_MEDIA_ATTACH=y - -# -# Audio decoders, processors and mixers -# -CONFIG_VIDEO_TDA9840=m -CONFIG_VIDEO_TEA6415C=m -CONFIG_VIDEO_TEA6420=m -CONFIG_VIDEO_MSP3400=m -CONFIG_VIDEO_CS53L32A=m -CONFIG_VIDEO_WM8775=m - -# -# RDS decoders -# -CONFIG_VIDEO_SAA6588=m - -# -# Video decoders -# -CONFIG_VIDEO_BT819=m -CONFIG_VIDEO_BT856=m -CONFIG_VIDEO_BT866=m -CONFIG_VIDEO_KS0127=m -CONFIG_VIDEO_SAA7110=m -CONFIG_VIDEO_SAA711X=m -CONFIG_VIDEO_TVP5150=m -CONFIG_VIDEO_VPX3220=m - -# -# Video and audio decoders -# -CONFIG_VIDEO_CX25840=m - -# -# Video encoders -# -CONFIG_VIDEO_SAA7185=m -CONFIG_VIDEO_ADV7170=m -CONFIG_VIDEO_ADV7175=m - -# -# Camera sensor devices -# -CONFIG_VIDEO_MT9V011=m - -# -# Flash devices -# - -# -# Video improvement chips -# - -# -# Audio/Video compression chips -# -CONFIG_VIDEO_SAA6752HS=m - -# -# Miscellaneous helper chips -# - -# -# Sensors used on soc_camera driver -# -CONFIG_MEDIA_TUNER=m -CONFIG_MEDIA_TUNER_SIMPLE=m -CONFIG_MEDIA_TUNER_TDA8290=m -CONFIG_MEDIA_TUNER_TDA827X=m -CONFIG_MEDIA_TUNER_TDA18271=m -CONFIG_MEDIA_TUNER_TDA9887=m -CONFIG_MEDIA_TUNER_MT20XX=m -CONFIG_MEDIA_TUNER_MT2060=m -CONFIG_MEDIA_TUNER_MT2063=m -CONFIG_MEDIA_TUNER_QT1010=m -CONFIG_MEDIA_TUNER_XC2028=m -CONFIG_MEDIA_TUNER_XC5000=m -CONFIG_MEDIA_TUNER_XC4000=m -CONFIG_MEDIA_TUNER_MXL5005S=m -CONFIG_MEDIA_TUNER_MXL5007T=m -CONFIG_MEDIA_TUNER_MC44S803=m -CONFIG_MEDIA_TUNER_TDA18218=m -CONFIG_MEDIA_TUNER_FC0011=m -CONFIG_MEDIA_TUNER_FC0012=m -CONFIG_MEDIA_TUNER_FC0013=m -CONFIG_MEDIA_TUNER_TDA18212=m -CONFIG_MEDIA_TUNER_E4000=m -CONFIG_MEDIA_TUNER_FC2580=m -CONFIG_MEDIA_TUNER_TUA9001=m -CONFIG_MEDIA_TUNER_IT913X=m -CONFIG_MEDIA_TUNER_R820T=m - -# -# Multistandard (satellite) frontends -# - -# -# Multistandard (cable + terrestrial) frontends -# -CONFIG_DVB_DRXK=m - -# -# DVB-S (satellite) frontends -# -CONFIG_DVB_MT312=m -CONFIG_DVB_ZL10036=m -CONFIG_DVB_ZL10039=m -CONFIG_DVB_STV6110=m -CONFIG_DVB_STV0900=m -CONFIG_DVB_TDA10086=m -CONFIG_DVB_TDA826X=m -CONFIG_DVB_CX24116=m - -# -# DVB-T (terrestrial) frontends -# -CONFIG_DVB_TDA1004X=m -CONFIG_DVB_MT352=m -CONFIG_DVB_ZL10353=m -CONFIG_DVB_TDA10048=m -CONFIG_DVB_AF9013=m -CONFIG_DVB_EC100=m -CONFIG_DVB_CXD2820R=m -CONFIG_DVB_RTL2830=m -CONFIG_DVB_RTL2832=m - -# -# DVB-C (cable) frontends -# -CONFIG_DVB_TDA10023=m - -# -# ATSC (North American/Korean Terrestrial/Cable DTV) frontends -# -CONFIG_DVB_NXT200X=m -CONFIG_DVB_LGDT330X=m -CONFIG_DVB_LGDT3305=m -CONFIG_DVB_LG2160=m -CONFIG_DVB_S5H1409=m -CONFIG_DVB_AU8522=m -CONFIG_DVB_AU8522_DTV=m -CONFIG_DVB_AU8522_V4L=m -CONFIG_DVB_S5H1411=m - -# -# ISDB-T (terrestrial) frontends -# - -# -# Digital terrestrial only tuners/PLL -# -CONFIG_DVB_PLL=m - -# -# SEC control devices for DVB-S -# -CONFIG_DVB_LNBP21=m -CONFIG_DVB_ISL6405=m -CONFIG_DVB_ISL6421=m -CONFIG_DVB_ISL6423=m -CONFIG_DVB_AF9033=m - -# -# Tools to develop new frontends -# -# CONFIG_DVB_DUMMY_FE is not set - -# -# Graphics support -# -CONFIG_AGP=y -CONFIG_AGP_AMD64=m -CONFIG_AGP_INTEL=m -CONFIG_AGP_SIS=m -CONFIG_AGP_VIA=m -CONFIG_INTEL_GTT=m -CONFIG_VGA_ARB=y -CONFIG_VGA_ARB_MAX_GPUS=16 -CONFIG_VGA_SWITCHEROO=y - -# -# Direct Rendering Manager -# -CONFIG_DRM=m -CONFIG_DRM_USB=m -CONFIG_DRM_KMS_HELPER=m -CONFIG_DRM_KMS_FB_HELPER=y -# CONFIG_DRM_LOAD_EDID_FIRMWARE is not set -CONFIG_DRM_TTM=m - -# -# I2C encoder or helper chips -# -CONFIG_DRM_I2C_CH7006=m -CONFIG_DRM_I2C_SIL164=m -CONFIG_DRM_I2C_NXP_TDA998X=m -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -# CONFIG_DRM_RADEON_UMS is not set -CONFIG_DRM_NOUVEAU=m -CONFIG_NOUVEAU_DEBUG=5 -CONFIG_NOUVEAU_DEBUG_DEFAULT=3 -CONFIG_DRM_NOUVEAU_BACKLIGHT=y -CONFIG_DRM_I915=m -CONFIG_DRM_I915_KMS=y -CONFIG_DRM_I915_FBDEV=y -CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT=y -# CONFIG_DRM_I915_UMS is not set -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m -CONFIG_DRM_VIA=m -CONFIG_DRM_SAVAGE=m -# CONFIG_DRM_VMWGFX is not set -CONFIG_DRM_GMA500=m -CONFIG_DRM_GMA600=y -CONFIG_DRM_GMA3600=y -CONFIG_DRM_UDL=m -# CONFIG_DRM_AST is not set -# CONFIG_DRM_MGAG200 is not set -# CONFIG_DRM_CIRRUS_QEMU is not set -# CONFIG_DRM_QXL is not set -# CONFIG_DRM_BOCHS is not set -CONFIG_DRM_PTN3460=m - -# -# Frame buffer Devices -# -CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y -# CONFIG_FB_DDC is not set -# CONFIG_FB_BOOT_VESA_SUPPORT is not set -CONFIG_FB_CFB_FILLRECT=y -CONFIG_FB_CFB_COPYAREA=y -CONFIG_FB_CFB_IMAGEBLIT=y -# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set -CONFIG_FB_SYS_FILLRECT=m -CONFIG_FB_SYS_COPYAREA=m -CONFIG_FB_SYS_IMAGEBLIT=m -# CONFIG_FB_FOREIGN_ENDIAN is not set -CONFIG_FB_SYS_FOPS=m -CONFIG_FB_DEFERRED_IO=y -# CONFIG_FB_SVGALIB is not set -# CONFIG_FB_MACMODES is not set -CONFIG_FB_BACKLIGHT=y -CONFIG_FB_MODE_HELPERS=y -# CONFIG_FB_TILEBLITTING is not set - -# -# Frame buffer hardware drivers -# -# CONFIG_FB_CIRRUS is not set -# CONFIG_FB_PM2 is not set -# CONFIG_FB_CYBER2000 is not set -# CONFIG_FB_ARC is not set -# CONFIG_FB_ASILIANT is not set -# CONFIG_FB_IMSTT is not set -# CONFIG_FB_VGA16 is not set -CONFIG_FB_UVESA=m -# CONFIG_FB_VESA is not set -CONFIG_FB_EFI=y -# CONFIG_FB_N411 is not set -# CONFIG_FB_HGA is not set -# CONFIG_FB_OPENCORES is not set -# CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_NVIDIA is not set -# CONFIG_FB_RIVA is not set -# CONFIG_FB_I740 is not set -# CONFIG_FB_LE80578 is not set -# CONFIG_FB_MATROX is not set -# CONFIG_FB_RADEON is not set -# CONFIG_FB_ATY128 is not set -# CONFIG_FB_ATY is not set -# CONFIG_FB_S3 is not set -# CONFIG_FB_SAVAGE is not set -# CONFIG_FB_SIS is not set -# CONFIG_FB_VIA is not set -# CONFIG_FB_NEOMAGIC is not set -# CONFIG_FB_KYRO is not set -# CONFIG_FB_3DFX is not set -# CONFIG_FB_VOODOO1 is not set -# CONFIG_FB_VT8623 is not set -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_ARK is not set -# CONFIG_FB_PM3 is not set -# CONFIG_FB_CARMINE is not set -# CONFIG_FB_TMIO is not set -# CONFIG_FB_SMSCUFX is not set -# CONFIG_FB_UDL is not set -# CONFIG_FB_GOLDFISH is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FB_METRONOME is not set -# CONFIG_FB_MB862XX is not set -# CONFIG_FB_BROADSHEET is not set -# CONFIG_FB_AUO_K190X is not set -# CONFIG_FB_SIMPLE is not set -# CONFIG_EXYNOS_VIDEO is not set -CONFIG_BACKLIGHT_LCD_SUPPORT=y -CONFIG_LCD_CLASS_DEVICE=m -# CONFIG_LCD_PLATFORM is not set -CONFIG_BACKLIGHT_CLASS_DEVICE=y -CONFIG_BACKLIGHT_GENERIC=m -CONFIG_BACKLIGHT_APPLE=m -# CONFIG_BACKLIGHT_SAHARA is not set -# CONFIG_BACKLIGHT_ADP8860 is not set -CONFIG_BACKLIGHT_ADP8870=m -CONFIG_BACKLIGHT_LM3630A=m -CONFIG_BACKLIGHT_LM3639=m -CONFIG_BACKLIGHT_LP855X=m -CONFIG_BACKLIGHT_LV5207LP=m -CONFIG_BACKLIGHT_BD6107=m -# CONFIG_VGASTATE is not set -CONFIG_HDMI=y - -# -# Console display driver support -# -CONFIG_VGA_CONSOLE=y -# CONFIG_VGACON_SOFT_SCROLLBACK is not set -CONFIG_DUMMY_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set -CONFIG_FB_CON_DECOR=y -# CONFIG_LOGO is not set -CONFIG_SOUND=y -CONFIG_SOUND_OSS_CORE=y -CONFIG_SOUND_OSS_CORE_PRECLAIM=y -CONFIG_SND=m -CONFIG_SND_TIMER=m -CONFIG_SND_PCM=m -CONFIG_SND_HWDEP=m -CONFIG_SND_RAWMIDI=m -CONFIG_SND_JACK=y -CONFIG_SND_SEQUENCER=m -# CONFIG_SND_SEQ_DUMMY is not set -CONFIG_SND_OSSEMUL=y -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -CONFIG_SND_PCM_OSS_PLUGINS=y -# CONFIG_SND_SEQUENCER_OSS is not set -# CONFIG_SND_HRTIMER is not set -CONFIG_SND_DYNAMIC_MINORS=y -CONFIG_SND_MAX_CARDS=32 -# CONFIG_SND_SUPPORT_OLD_API is not set -CONFIG_SND_VERBOSE_PROCFS=y -# CONFIG_SND_VERBOSE_PRINTK is not set -# CONFIG_SND_DEBUG is not set -CONFIG_SND_VMASTER=y -CONFIG_SND_KCTL_JACK=y -CONFIG_SND_DMA_SGBUF=y -CONFIG_SND_RAWMIDI_SEQ=m -CONFIG_SND_OPL3_LIB_SEQ=m -# CONFIG_SND_OPL4_LIB_SEQ is not set -# CONFIG_SND_SBAWE_SEQ is not set -CONFIG_SND_EMU10K1_SEQ=m -CONFIG_SND_MPU401_UART=m -CONFIG_SND_OPL3_LIB=m -CONFIG_SND_VX_LIB=m -CONFIG_SND_AC97_CODEC=m -CONFIG_SND_DRIVERS=y -# CONFIG_SND_PCSP is not set -# CONFIG_SND_DUMMY is not set -# CONFIG_SND_ALOOP is not set -# CONFIG_SND_VIRMIDI is not set -# CONFIG_SND_MTPAV is not set -# CONFIG_SND_MTS64 is not set -# CONFIG_SND_SERIAL_U16550 is not set -# CONFIG_SND_MPU401 is not set -# CONFIG_SND_PORTMAN2X4 is not set -CONFIG_SND_AC97_POWER_SAVE=y -CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0 -CONFIG_SND_SB_COMMON=m -CONFIG_SND_PCI=y -CONFIG_SND_AD1889=m -CONFIG_SND_ALS300=m -CONFIG_SND_ALS4000=m -CONFIG_SND_ALI5451=m -CONFIG_SND_ASIHPI=m -CONFIG_SND_ATIIXP=m -CONFIG_SND_ATIIXP_MODEM=m -CONFIG_SND_AU8810=m -CONFIG_SND_AU8820=m -CONFIG_SND_AU8830=m -CONFIG_SND_AW2=m -CONFIG_SND_AZT3328=m -CONFIG_SND_BT87X=m -CONFIG_SND_BT87X_OVERCLOCK=y -CONFIG_SND_CA0106=m -CONFIG_SND_CMIPCI=m -CONFIG_SND_OXYGEN_LIB=m -CONFIG_SND_OXYGEN=m -CONFIG_SND_CS4281=m -CONFIG_SND_CS46XX=m -CONFIG_SND_CS46XX_NEW_DSP=y -CONFIG_SND_CTXFI=m -CONFIG_SND_DARLA20=m -CONFIG_SND_GINA20=m -CONFIG_SND_LAYLA20=m -CONFIG_SND_DARLA24=m -CONFIG_SND_GINA24=m -CONFIG_SND_LAYLA24=m -CONFIG_SND_MONA=m -CONFIG_SND_MIA=m -CONFIG_SND_ECHO3G=m -CONFIG_SND_INDIGO=m -CONFIG_SND_INDIGOIO=m -CONFIG_SND_INDIGODJ=m -CONFIG_SND_INDIGOIOX=m -CONFIG_SND_INDIGODJX=m -CONFIG_SND_EMU10K1=m -CONFIG_SND_EMU10K1X=m -CONFIG_SND_ENS1370=m -CONFIG_SND_ENS1371=m -CONFIG_SND_ES1938=m -CONFIG_SND_ES1968=m -CONFIG_SND_ES1968_INPUT=y -CONFIG_SND_FM801=m -CONFIG_SND_HDSP=m -CONFIG_SND_HDSPM=m -CONFIG_SND_ICE1712=m -CONFIG_SND_ICE1724=m -CONFIG_SND_INTEL8X0=m -CONFIG_SND_INTEL8X0M=m -CONFIG_SND_KORG1212=m -CONFIG_SND_LOLA=m -CONFIG_SND_LX6464ES=m -CONFIG_SND_MAESTRO3=m -CONFIG_SND_MAESTRO3_INPUT=y -CONFIG_SND_MIXART=m -CONFIG_SND_NM256=m -CONFIG_SND_PCXHR=m -CONFIG_SND_RIPTIDE=m -CONFIG_SND_RME32=m -CONFIG_SND_RME96=m -CONFIG_SND_RME9652=m -CONFIG_SND_SONICVIBES=m -CONFIG_SND_TRIDENT=m -CONFIG_SND_VIA82XX=m -CONFIG_SND_VIA82XX_MODEM=m -CONFIG_SND_VIRTUOSO=m -CONFIG_SND_VX222=m -CONFIG_SND_YMFPCI=m - -# -# HD-Audio -# -CONFIG_SND_HDA=m -CONFIG_SND_HDA_INTEL=m -CONFIG_SND_HDA_DSP_LOADER=y -CONFIG_SND_HDA_PREALLOC_SIZE=2048 -CONFIG_SND_HDA_HWDEP=y -CONFIG_SND_HDA_RECONFIG=y -CONFIG_SND_HDA_INPUT_BEEP=y -CONFIG_SND_HDA_INPUT_BEEP_MODE=1 -CONFIG_SND_HDA_INPUT_JACK=y -CONFIG_SND_HDA_PATCH_LOADER=y -CONFIG_SND_HDA_CODEC_REALTEK=m -CONFIG_SND_HDA_CODEC_ANALOG=m -CONFIG_SND_HDA_CODEC_SIGMATEL=m -CONFIG_SND_HDA_CODEC_VIA=m -CONFIG_SND_HDA_CODEC_HDMI=m -CONFIG_SND_HDA_I915=y -CONFIG_SND_HDA_CODEC_CIRRUS=m -CONFIG_SND_HDA_CODEC_CONEXANT=m -CONFIG_SND_HDA_CODEC_CA0110=m -CONFIG_SND_HDA_CODEC_CA0132=m -CONFIG_SND_HDA_CODEC_CA0132_DSP=y -CONFIG_SND_HDA_CODEC_CMEDIA=m -CONFIG_SND_HDA_CODEC_SI3054=m -CONFIG_SND_HDA_GENERIC=m -CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 -CONFIG_SND_USB=y -CONFIG_SND_USB_AUDIO=m -CONFIG_SND_USB_UA101=m -CONFIG_SND_USB_USX2Y=m -CONFIG_SND_USB_CAIAQ=m -CONFIG_SND_USB_CAIAQ_INPUT=y -CONFIG_SND_USB_US122L=m -CONFIG_SND_USB_6FIRE=m -CONFIG_SND_USB_HIFACE=m -CONFIG_SND_FIREWIRE=y -CONFIG_SND_FIREWIRE_LIB=m -# CONFIG_SND_DICE is not set -CONFIG_SND_FIREWIRE_SPEAKERS=m -CONFIG_SND_ISIGHT=m -CONFIG_SND_SCS1X=m -CONFIG_SND_PCMCIA=y -# CONFIG_SND_VXPOCKET is not set -# CONFIG_SND_PDAUDIOCF is not set -# CONFIG_SND_SOC is not set -# CONFIG_SOUND_PRIME is not set -CONFIG_AC97_BUS=m - -# -# HID support -# -CONFIG_HID=y -# CONFIG_HID_BATTERY_STRENGTH is not set -CONFIG_HIDRAW=y -CONFIG_UHID=m -CONFIG_HID_GENERIC=y - -# -# Special HID drivers -# -CONFIG_HID_A4TECH=y -CONFIG_HID_ACRUX=m -CONFIG_HID_ACRUX_FF=y -CONFIG_HID_APPLE=y -# CONFIG_HID_APPLEIR is not set -# CONFIG_HID_AUREAL is not set -CONFIG_HID_BELKIN=y -CONFIG_HID_CHERRY=y -CONFIG_HID_CHICONY=y -CONFIG_HID_PRODIKEYS=m -CONFIG_HID_CYPRESS=y -CONFIG_HID_DRAGONRISE=m -CONFIG_DRAGONRISE_FF=y -CONFIG_HID_EMS_FF=m -CONFIG_HID_ELECOM=m -CONFIG_HID_ELO=m -CONFIG_HID_EZKEY=y -CONFIG_HID_HOLTEK=m -CONFIG_HOLTEK_FF=y -CONFIG_HID_HUION=m -CONFIG_HID_KEYTOUCH=m -CONFIG_HID_KYE=y -CONFIG_HID_UCLOGIC=m -CONFIG_HID_WALTOP=m -CONFIG_HID_GYRATION=m -CONFIG_HID_ICADE=m -CONFIG_HID_TWINHAN=m -CONFIG_HID_KENSINGTON=y -CONFIG_HID_LCPOWER=m -CONFIG_HID_LENOVO_TPKBD=m -CONFIG_HID_LOGITECH=y -CONFIG_HID_LOGITECH_DJ=m -CONFIG_LOGITECH_FF=y -CONFIG_LOGIRUMBLEPAD2_FF=y -CONFIG_LOGIG940_FF=y -CONFIG_LOGIWHEELS_FF=y -CONFIG_HID_MAGICMOUSE=m -CONFIG_HID_MICROSOFT=y -CONFIG_HID_MONTEREY=y -CONFIG_HID_MULTITOUCH=m -CONFIG_HID_NTRIG=m -CONFIG_HID_ORTEK=m -CONFIG_HID_PANTHERLORD=m -CONFIG_PANTHERLORD_FF=y -CONFIG_HID_PETALYNX=m -CONFIG_HID_PICOLCD=m -CONFIG_HID_PICOLCD_FB=y -CONFIG_HID_PICOLCD_BACKLIGHT=y -CONFIG_HID_PICOLCD_LCD=y -CONFIG_HID_PICOLCD_LEDS=y -CONFIG_HID_PRIMAX=m -CONFIG_HID_ROCCAT=m -CONFIG_HID_SAITEK=m -CONFIG_HID_SAMSUNG=m -CONFIG_HID_SONY=m -# CONFIG_SONY_FF is not set -CONFIG_HID_SPEEDLINK=m -CONFIG_HID_STEELSERIES=m -CONFIG_HID_SUNPLUS=m -CONFIG_HID_GREENASIA=m -CONFIG_GREENASIA_FF=y -CONFIG_HID_SMARTJOYPLUS=y -CONFIG_SMARTJOYPLUS_FF=y -CONFIG_HID_TIVO=m -CONFIG_HID_TOPSEED=y -CONFIG_HID_THINGM=m -CONFIG_HID_THRUSTMASTER=y -CONFIG_THRUSTMASTER_FF=y -CONFIG_HID_WACOM=m -CONFIG_HID_WIIMOTE=m -CONFIG_HID_XINMO=m -CONFIG_HID_ZEROPLUS=y -CONFIG_ZEROPLUS_FF=y -CONFIG_HID_ZYDACRON=m -CONFIG_HID_SENSOR_HUB=m - -# -# USB HID support -# -CONFIG_USB_HID=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y - -# -# I2C HID support -# -CONFIG_I2C_HID=m -CONFIG_USB_OHCI_LITTLE_ENDIAN=y -CONFIG_USB_SUPPORT=y -CONFIG_USB_COMMON=y -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB=y -# CONFIG_USB_DEBUG is not set -CONFIG_USB_ANNOUNCE_NEW_DEVICES=y - -# -# Miscellaneous USB options -# -CONFIG_USB_DEFAULT_PERSIST=y -# CONFIG_USB_DYNAMIC_MINORS is not set -CONFIG_USB_OTG=y -CONFIG_USB_OTG_WHITELIST=y -# CONFIG_USB_OTG_BLACKLIST_HUB is not set -CONFIG_USB_MON=y -# CONFIG_USB_WUSB_CBAF is not set - -# -# USB Host Controller Drivers -# -# CONFIG_USB_C67X00_HCD is not set -CONFIG_USB_XHCI_HCD=m -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_EHCI_TT_NEWSCHED=y -CONFIG_USB_EHCI_PCI=y -CONFIG_USB_EHCI_HCD_PLATFORM=y -# CONFIG_USB_OXU210HP_HCD is not set -CONFIG_USB_ISP116X_HCD=m -# CONFIG_USB_ISP1760_HCD is not set -# CONFIG_USB_ISP1362_HCD is not set -# CONFIG_USB_FUSBH200_HCD is not set -# CONFIG_USB_FOTG210_HCD is not set -CONFIG_USB_OHCI_HCD=m -CONFIG_USB_OHCI_HCD_PCI=m -# CONFIG_USB_OHCI_HCD_SSB is not set -CONFIG_USB_OHCI_HCD_PLATFORM=m -CONFIG_USB_UHCI_HCD=m -# CONFIG_USB_SL811_HCD is not set -# CONFIG_USB_R8A66597_HCD is not set -CONFIG_USB_HCD_BCMA=m -CONFIG_USB_HCD_SSB=m -# CONFIG_USB_HCD_TEST_MODE is not set -# CONFIG_USB_RENESAS_USBHS is not set - -# -# USB Device Class drivers -# -CONFIG_USB_ACM=m -# CONFIG_USB_PRINTER is not set -CONFIG_USB_WDM=m -# CONFIG_USB_TMC is not set - -# -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may -# - -# -# also be needed; see USB_STORAGE Help for more info -# -CONFIG_USB_STORAGE=y -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_REALTEK=m -CONFIG_REALTEK_AUTOPM=y -# CONFIG_USB_STORAGE_DATAFAB is not set -# CONFIG_USB_STORAGE_FREECOM is not set -# CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_USBAT is not set -# CONFIG_USB_STORAGE_SDDR09 is not set -# CONFIG_USB_STORAGE_SDDR55 is not set -# CONFIG_USB_STORAGE_JUMPSHOT is not set -# CONFIG_USB_STORAGE_ALAUDA is not set -# CONFIG_USB_STORAGE_ONETOUCH is not set -# CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set -CONFIG_USB_STORAGE_ENE_UB6250=m -CONFIG_USB_UAS=m - -# -# USB Imaging devices -# -# CONFIG_USB_MDC800 is not set -# CONFIG_USB_MICROTEK is not set -CONFIG_USB_MUSB_HDRC=m -# CONFIG_USB_MUSB_HOST is not set -# CONFIG_USB_MUSB_GADGET is not set -CONFIG_USB_MUSB_DUAL_ROLE=y -CONFIG_USB_MUSB_TUSB6010=m -CONFIG_USB_MUSB_UX500=m -# CONFIG_USB_UX500_DMA is not set -CONFIG_MUSB_PIO_ONLY=y -# CONFIG_USB_DWC3 is not set -# CONFIG_USB_DWC2 is not set -CONFIG_USB_CHIPIDEA=m -CONFIG_USB_CHIPIDEA_UDC=y -CONFIG_USB_CHIPIDEA_HOST=y -# CONFIG_USB_CHIPIDEA_DEBUG is not set - -# -# USB port drivers -# -# CONFIG_USB_USS720 is not set -CONFIG_USB_SERIAL=m -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_SIMPLE=m -CONFIG_USB_SERIAL_AIRCABLE=m -CONFIG_USB_SERIAL_ARK3116=m -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_CH341=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_CP210X=m -CONFIG_USB_SERIAL_CYPRESS_M8=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_F81232=m -CONFIG_USB_SERIAL_GARMIN=m -CONFIG_USB_SERIAL_IPW=m -CONFIG_USB_SERIAL_IUU=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -CONFIG_USB_SERIAL_KEYSPAN_MPR=y -CONFIG_USB_SERIAL_KEYSPAN_USA28=y -CONFIG_USB_SERIAL_KEYSPAN_USA28X=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -CONFIG_USB_SERIAL_KEYSPAN_USA19=y -CONFIG_USB_SERIAL_KEYSPAN_USA18X=y -CONFIG_USB_SERIAL_KEYSPAN_USA19W=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_METRO=m -CONFIG_USB_SERIAL_MOS7720=m -CONFIG_USB_SERIAL_MOS7715_PARPORT=y -CONFIG_USB_SERIAL_MOS7840=m -CONFIG_USB_SERIAL_MXUPORT=m -CONFIG_USB_SERIAL_NAVMAN=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_OTI6858=m -CONFIG_USB_SERIAL_QCAUX=m -CONFIG_USB_SERIAL_QUALCOMM=m -CONFIG_USB_SERIAL_SPCP8X5=m -CONFIG_USB_SERIAL_SAFE=m -CONFIG_USB_SERIAL_SAFE_PADDED=y -CONFIG_USB_SERIAL_SIERRAWIRELESS=m -CONFIG_USB_SERIAL_SYMBOL=m -CONFIG_USB_SERIAL_TI=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_WWAN=m -CONFIG_USB_SERIAL_OPTION=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_SERIAL_OPTICON=m -CONFIG_USB_SERIAL_XSENS_MT=m -CONFIG_USB_SERIAL_WISHBONE=m -CONFIG_USB_SERIAL_ZTE=m -CONFIG_USB_SERIAL_SSU100=m -CONFIG_USB_SERIAL_QT2=m -CONFIG_USB_SERIAL_DEBUG=m - -# -# USB Miscellaneous drivers -# -# CONFIG_USB_EMI62 is not set -# CONFIG_USB_EMI26 is not set -# CONFIG_USB_ADUTUX is not set -# CONFIG_USB_SEVSEG is not set -# CONFIG_USB_RIO500 is not set -# CONFIG_USB_LEGOTOWER is not set -# CONFIG_USB_LCD is not set -# CONFIG_USB_LED is not set -# CONFIG_USB_CYPRESS_CY7C63 is not set -# CONFIG_USB_CYTHERM is not set -# CONFIG_USB_IDMOUSE is not set -# CONFIG_USB_FTDI_ELAN is not set -# CONFIG_USB_APPLEDISPLAY is not set -# CONFIG_USB_SISUSBVGA is not set -# CONFIG_USB_LD is not set -# CONFIG_USB_TRANCEVIBRATOR is not set -# CONFIG_USB_IOWARRIOR is not set -# CONFIG_USB_TEST is not set -# CONFIG_USB_EHSET_TEST_FIXTURE is not set -# CONFIG_USB_ISIGHTFW is not set -# CONFIG_USB_YUREX is not set -CONFIG_USB_EZUSB_FX2=m -# CONFIG_USB_HSIC_USB3503 is not set - -# -# USB Physical Layer drivers -# -CONFIG_USB_PHY=y -CONFIG_USB_OTG_FSM=m -# CONFIG_NOP_USB_XCEIV is not set -# CONFIG_SAMSUNG_USB2PHY is not set -# CONFIG_SAMSUNG_USB3PHY is not set -# CONFIG_USB_ISP1301 is not set -# CONFIG_USB_RCAR_PHY is not set -CONFIG_USB_GADGET=m -# CONFIG_USB_GADGET_DEBUG is not set -# CONFIG_USB_GADGET_DEBUG_FILES is not set -CONFIG_USB_GADGET_VBUS_DRAW=2 -CONFIG_USB_GADGET_STORAGE_NUM_BUFFERS=2 - -# -# USB Peripheral Controller -# -# CONFIG_USB_FOTG210_UDC is not set -CONFIG_USB_GR_UDC=m -CONFIG_USB_R8A66597=m -CONFIG_USB_PXA27X=m -CONFIG_USB_S3C_HSOTG=m -CONFIG_USB_MV_UDC=m -CONFIG_USB_MV_U3D=m -CONFIG_USB_M66592=m -CONFIG_USB_AMD5536UDC=m -CONFIG_USB_NET2272=m -CONFIG_USB_NET2272_DMA=y -CONFIG_USB_NET2280=m -CONFIG_USB_GOKU=m -CONFIG_USB_EG20T=m -CONFIG_USB_DUMMY_HCD=m -CONFIG_USB_LIBCOMPOSITE=m -CONFIG_USB_U_ETHER=m -CONFIG_USB_F_NCM=m -CONFIG_USB_F_ECM=m -CONFIG_USB_F_EEM=m -CONFIG_USB_F_SUBSET=m -CONFIG_USB_F_RNDIS=m -CONFIG_USB_CONFIGFS=m -# CONFIG_USB_CONFIGFS_SERIAL is not set -# CONFIG_USB_CONFIGFS_ACM is not set -# CONFIG_USB_CONFIGFS_OBEX is not set -# CONFIG_USB_CONFIGFS_NCM is not set -# CONFIG_USB_CONFIGFS_ECM is not set -# CONFIG_USB_CONFIGFS_ECM_SUBSET is not set -# CONFIG_USB_CONFIGFS_RNDIS is not set -# CONFIG_USB_CONFIGFS_EEM is not set -# CONFIG_USB_CONFIGFS_MASS_STORAGE is not set -# CONFIG_USB_CONFIGFS_F_LB_SS is not set -# CONFIG_USB_CONFIGFS_F_FS is not set -# CONFIG_USB_ZERO is not set -# CONFIG_USB_AUDIO is not set -CONFIG_USB_ETH=m -CONFIG_USB_ETH_RNDIS=y -CONFIG_USB_ETH_EEM=y -CONFIG_USB_G_NCM=m -# CONFIG_USB_GADGETFS is not set -# CONFIG_USB_FUNCTIONFS is not set -# CONFIG_USB_MASS_STORAGE is not set -# CONFIG_USB_G_SERIAL is not set -# CONFIG_USB_MIDI_GADGET is not set -# CONFIG_USB_G_PRINTER is not set -# CONFIG_USB_CDC_COMPOSITE is not set -# CONFIG_USB_G_ACM_MS is not set -# CONFIG_USB_G_MULTI is not set -# CONFIG_USB_G_HID is not set -# CONFIG_USB_G_DBGP is not set -# CONFIG_USB_G_WEBCAM is not set -# CONFIG_UWB is not set -CONFIG_MMC=m -# CONFIG_MMC_DEBUG is not set -# CONFIG_MMC_CLKGATE is not set - -# -# MMC/SD/SDIO Card Drivers -# -CONFIG_MMC_BLOCK=m -CONFIG_MMC_BLOCK_MINORS=8 -CONFIG_MMC_BLOCK_BOUNCE=y -CONFIG_SDIO_UART=m -# CONFIG_MMC_TEST is not set - -# -# MMC/SD/SDIO Host Controller Drivers -# -CONFIG_MMC_SDHCI=m -CONFIG_MMC_SDHCI_PCI=m -CONFIG_MMC_RICOH_MMC=y -CONFIG_MMC_SDHCI_ACPI=m -# CONFIG_MMC_SDHCI_PLTFM is not set -CONFIG_MMC_WBSD=m -CONFIG_MMC_TIFM_SD=m -CONFIG_MMC_SDRICOH_CS=m -CONFIG_MMC_CB710=m -CONFIG_MMC_VIA_SDMMC=m -CONFIG_MMC_VUB300=m -# CONFIG_MMC_USHC is not set -CONFIG_MEMSTICK=m -# CONFIG_MEMSTICK_DEBUG is not set - -# -# MemoryStick drivers -# -# CONFIG_MEMSTICK_UNSAFE_RESUME is not set -CONFIG_MSPRO_BLOCK=m -CONFIG_MS_BLOCK=m - -# -# MemoryStick Host Controller Drivers -# -CONFIG_MEMSTICK_TIFM_MS=m -CONFIG_MEMSTICK_JMICRON_38X=m -CONFIG_MEMSTICK_R592=m -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y - -# -# LED drivers -# -CONFIG_LEDS_LM3530=m -# CONFIG_LEDS_LM3642 is not set -CONFIG_LEDS_PCA9532=m -CONFIG_LEDS_LP3944=m -CONFIG_LEDS_LP55XX_COMMON=m -CONFIG_LEDS_LP5521=m -CONFIG_LEDS_LP5523=m -CONFIG_LEDS_LP5562=m -CONFIG_LEDS_LP8501=m -CONFIG_LEDS_CLEVO_MAIL=m -CONFIG_LEDS_PCA955X=m -CONFIG_LEDS_PCA963X=m -CONFIG_LEDS_PCA9685=m -CONFIG_LEDS_BD2802=m -CONFIG_LEDS_INTEL_SS4200=m -CONFIG_LEDS_DELL_NETBOOKS=m -CONFIG_LEDS_TCA6507=m -CONFIG_LEDS_LM355x=m -CONFIG_LEDS_BLINKM=m - -# -# LED Triggers -# -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=m -CONFIG_LEDS_TRIGGER_ONESHOT=m -CONFIG_LEDS_TRIGGER_HEARTBEAT=m -CONFIG_LEDS_TRIGGER_BACKLIGHT=m -# CONFIG_LEDS_TRIGGER_CPU is not set -CONFIG_LEDS_TRIGGER_DEFAULT_ON=m - -# -# iptables trigger is under Netfilter config (LED target) -# -CONFIG_LEDS_TRIGGER_TRANSIENT=m -CONFIG_LEDS_TRIGGER_CAMERA=m -# CONFIG_ACCESSIBILITY is not set -# CONFIG_INFINIBAND is not set -# CONFIG_EDAC is not set -CONFIG_RTC_LIB=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_HCTOSYS=y -CONFIG_RTC_SYSTOHC=y -CONFIG_RTC_HCTOSYS_DEVICE="rtc0" -# CONFIG_RTC_DEBUG is not set - -# -# RTC interfaces -# -CONFIG_RTC_INTF_SYSFS=y -CONFIG_RTC_INTF_PROC=y -CONFIG_RTC_INTF_DEV=y -# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set -# CONFIG_RTC_DRV_TEST is not set - -# -# I2C RTC drivers -# -# CONFIG_RTC_DRV_DS1307 is not set -# CONFIG_RTC_DRV_DS1374 is not set -# CONFIG_RTC_DRV_DS1672 is not set -# CONFIG_RTC_DRV_DS3232 is not set -# CONFIG_RTC_DRV_MAX6900 is not set -# CONFIG_RTC_DRV_RS5C372 is not set -# CONFIG_RTC_DRV_ISL1208 is not set -# CONFIG_RTC_DRV_ISL12022 is not set -# CONFIG_RTC_DRV_ISL12057 is not set -# CONFIG_RTC_DRV_X1205 is not set -# CONFIG_RTC_DRV_PCF2127 is not set -# CONFIG_RTC_DRV_PCF8523 is not set -# CONFIG_RTC_DRV_PCF8563 is not set -# CONFIG_RTC_DRV_PCF8583 is not set -# CONFIG_RTC_DRV_M41T80 is not set -# CONFIG_RTC_DRV_BQ32K is not set -# CONFIG_RTC_DRV_S35390A is not set -# CONFIG_RTC_DRV_FM3130 is not set -# CONFIG_RTC_DRV_RX8581 is not set -# CONFIG_RTC_DRV_RX8025 is not set -# CONFIG_RTC_DRV_EM3027 is not set -# CONFIG_RTC_DRV_RV3029C2 is not set - -# -# SPI RTC drivers -# - -# -# Platform RTC drivers -# -CONFIG_RTC_DRV_CMOS=y -# CONFIG_RTC_DRV_DS1286 is not set -# CONFIG_RTC_DRV_DS1511 is not set -# CONFIG_RTC_DRV_DS1553 is not set -# CONFIG_RTC_DRV_DS1742 is not set -# CONFIG_RTC_DRV_STK17TA8 is not set -# CONFIG_RTC_DRV_M48T86 is not set -# CONFIG_RTC_DRV_M48T35 is not set -# CONFIG_RTC_DRV_M48T59 is not set -# CONFIG_RTC_DRV_MSM6242 is not set -# CONFIG_RTC_DRV_BQ4802 is not set -# CONFIG_RTC_DRV_RP5C01 is not set -# CONFIG_RTC_DRV_V3020 is not set -# CONFIG_RTC_DRV_DS2404 is not set - -# -# on-CPU RTC drivers -# -# CONFIG_RTC_DRV_MOXART is not set - -# -# HID Sensor RTC drivers -# -# CONFIG_RTC_DRV_HID_SENSOR_TIME is not set -CONFIG_DMADEVICES=y -# CONFIG_DMADEVICES_DEBUG is not set - -# -# DMA Devices -# -CONFIG_INTEL_MID_DMAC=m -CONFIG_INTEL_IOATDMA=m -CONFIG_DW_DMAC_CORE=m -CONFIG_DW_DMAC=m -CONFIG_DW_DMAC_PCI=m -CONFIG_PCH_DMA=m -CONFIG_DMA_ENGINE=y -CONFIG_DMA_ACPI=y - -# -# DMA Clients -# -# CONFIG_ASYNC_TX_DMA is not set -# CONFIG_DMATEST is not set -CONFIG_DMA_ENGINE_RAID=y -CONFIG_DCA=m -# CONFIG_AUXDISPLAY is not set -CONFIG_UIO=m -# CONFIG_UIO_CIF is not set -# CONFIG_UIO_PDRV_GENIRQ is not set -# CONFIG_UIO_DMEM_GENIRQ is not set -# CONFIG_UIO_AEC is not set -# CONFIG_UIO_SERCOS3 is not set -CONFIG_UIO_PCI_GENERIC=m -# CONFIG_UIO_NETX is not set -# CONFIG_UIO_MF624 is not set -CONFIG_VIRT_DRIVERS=y -CONFIG_VIRTIO=m - -# -# Virtio drivers -# -CONFIG_VIRTIO_PCI=m -CONFIG_VIRTIO_BALLOON=m -CONFIG_VIRTIO_MMIO=m -# CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set - -# -# Microsoft Hyper-V guest support -# -CONFIG_STAGING=y -# CONFIG_ET131X is not set -# CONFIG_SLICOSS is not set -# CONFIG_USBIP_CORE is not set -# CONFIG_W35UND is not set -# CONFIG_PRISM2_USB is not set -# CONFIG_COMEDI is not set -# CONFIG_PANEL is not set -CONFIG_RTL8192U=m -CONFIG_RTLLIB=m -CONFIG_RTLLIB_CRYPTO_CCMP=m -CONFIG_RTLLIB_CRYPTO_TKIP=m -CONFIG_RTLLIB_CRYPTO_WEP=m -CONFIG_RTL8192E=m -CONFIG_R8712U=m -CONFIG_R8188EU=m -CONFIG_88EU_AP_MODE=y -CONFIG_88EU_P2P=y -CONFIG_R8723AU=m -CONFIG_8723AU_AP_MODE=y -CONFIG_8723AU_P2P=y -CONFIG_8723AU_BT_COEXIST=y -CONFIG_R8821AE=m -CONFIG_RTS5139=m -# CONFIG_RTS5139_DEBUG is not set -CONFIG_RTS5208=m -# CONFIG_RTS5208_DEBUG is not set -# CONFIG_TRANZPORT is not set -# CONFIG_IDE_PHISON is not set -# CONFIG_LINE6_USB is not set -# CONFIG_USB_SERIAL_QUATECH2 is not set -# CONFIG_VT6655 is not set -# CONFIG_VT6656 is not set -# CONFIG_DX_SEP is not set -# CONFIG_WLAGS49_H2 is not set -# CONFIG_WLAGS49_H25 is not set -# CONFIG_CRYSTALHD is not set -# CONFIG_FB_XGI is not set -# CONFIG_ACPI_QUICKSTART is not set -# CONFIG_USB_ENESTORAGE is not set -# CONFIG_BCM_WIMAX is not set -# CONFIG_FT1000 is not set - -# -# Speakup console speech -# -# CONFIG_SPEAKUP is not set -CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4=m -# CONFIG_STAGING_MEDIA is not set - -# -# Android -# -# CONFIG_ANDROID is not set -# CONFIG_USB_WPAN_HCD is not set -CONFIG_WIMAX_GDM72XX=m -CONFIG_WIMAX_GDM72XX_QOS=y -CONFIG_WIMAX_GDM72XX_K_MODE=y -CONFIG_WIMAX_GDM72XX_WIMAX2=y -CONFIG_WIMAX_GDM72XX_USB=y -# CONFIG_WIMAX_GDM72XX_SDIO is not set -CONFIG_WIMAX_GDM72XX_USB_PM=y -CONFIG_LTE_GDM724X=m -CONFIG_NET_VENDOR_SILICOM=y -# CONFIG_SBYPASS is not set -# CONFIG_BPCTL is not set -# CONFIG_CED1401 is not set -# CONFIG_DGRP is not set -# CONFIG_FIREWIRE_SERIAL is not set -# CONFIG_LUSTRE_FS is not set -# CONFIG_XILLYBUS is not set -# CONFIG_DGNC is not set -# CONFIG_DGAP is not set -CONFIG_GS_FPGABOOT=m -CONFIG_X86_PLATFORM_DEVICES=y -CONFIG_ACER_WMI=m -CONFIG_ACERHDF=m -CONFIG_ALIENWARE_WMI=m -CONFIG_ASUS_LAPTOP=m -CONFIG_DELL_LAPTOP=m -CONFIG_DELL_WMI=m -CONFIG_DELL_WMI_AIO=m -CONFIG_FUJITSU_LAPTOP=m -# CONFIG_FUJITSU_LAPTOP_DEBUG is not set -CONFIG_FUJITSU_TABLET=m -CONFIG_AMILO_RFKILL=m -CONFIG_HP_ACCEL=m -CONFIG_HP_WIRELESS=m -CONFIG_HP_WMI=m -CONFIG_MSI_LAPTOP=m -CONFIG_PANASONIC_LAPTOP=m -CONFIG_COMPAL_LAPTOP=m -CONFIG_SONY_LAPTOP=m -CONFIG_SONYPI_COMPAT=y -CONFIG_IDEAPAD_LAPTOP=m -CONFIG_THINKPAD_ACPI=m -CONFIG_THINKPAD_ACPI_ALSA_SUPPORT=y -# CONFIG_THINKPAD_ACPI_DEBUGFACILITIES is not set -# CONFIG_THINKPAD_ACPI_DEBUG is not set -CONFIG_THINKPAD_ACPI_UNSAFE_LEDS=y -CONFIG_THINKPAD_ACPI_VIDEO=y -CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y -CONFIG_SENSORS_HDAPS=m -CONFIG_INTEL_MENLOW=m -CONFIG_EEEPC_LAPTOP=m -CONFIG_ASUS_WMI=m -CONFIG_ASUS_NB_WMI=m -CONFIG_EEEPC_WMI=m -CONFIG_ACPI_WMI=m -CONFIG_MSI_WMI=m -CONFIG_TOPSTAR_LAPTOP=m -CONFIG_ACPI_TOSHIBA=m -CONFIG_TOSHIBA_BT_RFKILL=m -CONFIG_ACPI_CMPC=m -CONFIG_INTEL_IPS=m -CONFIG_IBM_RTL=m -CONFIG_XO15_EBOOK=m -CONFIG_SAMSUNG_LAPTOP=m -CONFIG_MXM_WMI=m -CONFIG_INTEL_OAKTRAIL=m -CONFIG_SAMSUNG_Q10=m -CONFIG_APPLE_GMUX=m -CONFIG_INTEL_RST=m -CONFIG_INTEL_SMARTCONNECT=m -CONFIG_PVPANIC=m -# CONFIG_CHROME_PLATFORMS is not set - -# -# Hardware Spinlock drivers -# -CONFIG_CLKEVT_I8253=y -CONFIG_I8253_LOCK=y -CONFIG_CLKBLD_I8253=y -# CONFIG_SH_TIMER_CMT is not set -# CONFIG_SH_TIMER_MTU2 is not set -# CONFIG_SH_TIMER_TMU is not set -# CONFIG_EM_TIMER_STI is not set -# CONFIG_MAILBOX is not set -CONFIG_IOMMU_SUPPORT=y -# CONFIG_AMD_IOMMU is not set -# CONFIG_INTEL_IOMMU is not set -# CONFIG_IRQ_REMAP is not set - -# -# Remoteproc drivers -# -# CONFIG_STE_MODEM_RPROC is not set - -# -# Rpmsg drivers -# -CONFIG_PM_DEVFREQ=y - -# -# DEVFREQ Governors -# -# CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND is not set -# CONFIG_DEVFREQ_GOV_PERFORMANCE is not set -# CONFIG_DEVFREQ_GOV_POWERSAVE is not set -# CONFIG_DEVFREQ_GOV_USERSPACE is not set - -# -# DEVFREQ Drivers -# -# CONFIG_EXTCON is not set -# CONFIG_MEMORY is not set -# CONFIG_IIO is not set -# CONFIG_NTB is not set -# CONFIG_VME_BUS is not set -# CONFIG_PWM is not set -# CONFIG_IPACK_BUS is not set -CONFIG_RESET_CONTROLLER=y -# CONFIG_FMC is not set - -# -# PHY Subsystem -# -CONFIG_GENERIC_PHY=y -# CONFIG_BCM_KONA_USB2_PHY is not set -CONFIG_PHY_SAMSUNG_USB2=m -# CONFIG_POWERCAP is not set -# CONFIG_MCB is not set - -# -# Firmware Drivers -# -# CONFIG_EDD is not set -CONFIG_FIRMWARE_MEMMAP=y -# CONFIG_DELL_RBU is not set -CONFIG_DCDBAS=m -CONFIG_DMIID=y -# CONFIG_DMI_SYSFS is not set -CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y -# CONFIG_ISCSI_IBFT_FIND is not set -# CONFIG_GOOGLE_FIRMWARE is not set - -# -# EFI (Extensible Firmware Interface) Support -# -CONFIG_EFI_VARS=m -CONFIG_EFI_RUNTIME_MAP=y - -# -# File systems -# -CONFIG_DCACHE_WORD_ACCESS=y -CONFIG_EXT2_FS=m -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_DEFAULTS_TO_ORDERED=y -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=m -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -# CONFIG_EXT4_DEBUG is not set -CONFIG_FS_XIP=y -CONFIG_JBD=m -CONFIG_JBD2=m -# CONFIG_JBD2_DEBUG is not set -CONFIG_FS_MBCACHE=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -# CONFIG_JFS_DEBUG is not set -CONFIG_JFS_STATISTICS=y -CONFIG_XFS_FS=m -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_XFS_RT=y -# CONFIG_XFS_WARN is not set -# CONFIG_XFS_DEBUG is not set -CONFIG_GFS2_FS=m -CONFIG_GFS2_FS_LOCKING_DLM=y -CONFIG_OCFS2_FS=m -CONFIG_OCFS2_FS_O2CB=m -CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m -CONFIG_OCFS2_DEBUG_MASKLOG=y -# CONFIG_OCFS2_DEBUG_FS is not set -CONFIG_BTRFS_FS=m -CONFIG_BTRFS_FS_POSIX_ACL=y -# CONFIG_BTRFS_FS_CHECK_INTEGRITY is not set -# CONFIG_BTRFS_FS_RUN_SANITY_TESTS is not set -# CONFIG_BTRFS_DEBUG is not set -# CONFIG_BTRFS_ASSERT is not set -CONFIG_NILFS2_FS=m -CONFIG_FS_POSIX_ACL=y -CONFIG_EXPORTFS=y -CONFIG_FILE_LOCKING=y -CONFIG_FSNOTIFY=y -CONFIG_DNOTIFY=y -CONFIG_INOTIFY_USER=y -CONFIG_FANOTIFY=y -CONFIG_QUOTA=y -# CONFIG_QUOTA_NETLINK_INTERFACE is not set -CONFIG_PRINT_QUOTA_WARNING=y -# CONFIG_QUOTA_DEBUG is not set -CONFIG_QUOTA_TREE=m -# CONFIG_QFMT_V1 is not set -# CONFIG_QFMT_V2 is not set -CONFIG_QUOTACTL=y -CONFIG_QUOTACTL_COMPAT=y -CONFIG_AUTOFS4_FS=y -CONFIG_FUSE_FS=m -# CONFIG_CUSE is not set - -# -# Caches -# -CONFIG_FSCACHE=m -CONFIG_FSCACHE_STATS=y -# CONFIG_FSCACHE_HISTOGRAM is not set -# CONFIG_FSCACHE_DEBUG is not set -# CONFIG_FSCACHE_OBJECT_LIST is not set -CONFIG_CACHEFILES=m -# CONFIG_CACHEFILES_DEBUG is not set -# CONFIG_CACHEFILES_HISTOGRAM is not set - -# -# CD-ROM/DVD Filesystems -# -CONFIG_ISO9660_FS=m -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_UDF_NLS=y - -# -# DOS/FAT/NT Filesystems -# -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FAT_DEFAULT_CODEPAGE=866 -CONFIG_FAT_DEFAULT_IOCHARSET="utf8" -CONFIG_NTFS_FS=m -# CONFIG_NTFS_DEBUG is not set -CONFIG_NTFS_RW=y - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_KCORE=y -CONFIG_PROC_SYSCTL=y -CONFIG_PROC_PAGE_MONITOR=y -CONFIG_KERNFS=y -CONFIG_SYSFS=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_TMPFS_XATTR=y -# CONFIG_HUGETLBFS is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_CONFIGFS_FS=m -CONFIG_MISC_FILESYSTEMS=y -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -CONFIG_ECRYPT_FS=m -# CONFIG_ECRYPT_FS_MESSAGING is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_LOGFS is not set -# CONFIG_CRAMFS is not set -CONFIG_SQUASHFS=m -# CONFIG_SQUASHFS_FILE_CACHE is not set -CONFIG_SQUASHFS_FILE_DIRECT=y -# CONFIG_SQUASHFS_DECOMP_SINGLE is not set -# CONFIG_SQUASHFS_DECOMP_MULTI is not set -CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y -# CONFIG_SQUASHFS_XATTR is not set -CONFIG_SQUASHFS_ZLIB=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS_XZ=y -# CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set -# CONFIG_SQUASHFS_EMBEDDED is not set -CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 -# CONFIG_VXFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_OMFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX6FS_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_PSTORE is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set -CONFIG_F2FS_FS=m -CONFIG_F2FS_FS_XATTR=y -CONFIG_F2FS_FS_POSIX_ACL=y -CONFIG_F2FS_FS_SECURITY=y -# CONFIG_F2FS_CHECK_FS is not set -CONFIG_EFIVAR_FS=m -CONFIG_AUFS_FS=y -CONFIG_AUFS_BRANCH_MAX_127=y -# CONFIG_AUFS_BRANCH_MAX_511 is not set -# CONFIG_AUFS_BRANCH_MAX_1023 is not set -# CONFIG_AUFS_BRANCH_MAX_32767 is not set -CONFIG_AUFS_SBILIST=y -# CONFIG_AUFS_HNOTIFY is not set -# CONFIG_AUFS_EXPORT is not set -CONFIG_AUFS_RDU=y -CONFIG_AUFS_SHWH=y -CONFIG_AUFS_BR_RAMFS=y -CONFIG_AUFS_BR_FUSE=y -CONFIG_AUFS_POLL=y -CONFIG_AUFS_BDEV_LOOP=y -# CONFIG_AUFS_DEBUG is not set -CONFIG_NETWORK_FILESYSTEMS=y -CONFIG_NFS_FS=m -CONFIG_NFS_V2=m -CONFIG_NFS_V3=m -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=m -# CONFIG_NFS_SWAP is not set -# CONFIG_NFS_V4_1 is not set -CONFIG_NFS_FSCACHE=y -# CONFIG_NFS_USE_LEGACY_DNS is not set -CONFIG_NFS_USE_KERNEL_DNS=y -CONFIG_NFSD=m -CONFIG_NFSD_V2_ACL=y -CONFIG_NFSD_V3=y -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V4=y -# CONFIG_NFSD_FAULT_INJECTION is not set -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_NFS_ACL_SUPPORT=m -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=m -CONFIG_SUNRPC_GSS=m -# CONFIG_SUNRPC_DEBUG is not set -# CONFIG_CEPH_FS is not set -CONFIG_CIFS=m -# CONFIG_CIFS_STATS is not set -# CONFIG_CIFS_WEAK_PW_HASH is not set -# CONFIG_CIFS_UPCALL is not set -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -# CONFIG_CIFS_ACL is not set -# CONFIG_CIFS_DEBUG is not set -# CONFIG_CIFS_DFS_UPCALL is not set -# CONFIG_CIFS_SMB2 is not set -# CONFIG_CIFS_FSCACHE is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set -CONFIG_NLS=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=m -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -CONFIG_NLS_CODEPAGE_850=m -# CONFIG_NLS_CODEPAGE_852 is not set -CONFIG_NLS_CODEPAGE_855=m -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -CONFIG_NLS_CODEPAGE_866=m -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -CONFIG_NLS_ISO8859_5=m -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -# CONFIG_NLS_MAC_ROMAN is not set -# CONFIG_NLS_MAC_CELTIC is not set -# CONFIG_NLS_MAC_CENTEURO is not set -# CONFIG_NLS_MAC_CROATIAN is not set -# CONFIG_NLS_MAC_CYRILLIC is not set -# CONFIG_NLS_MAC_GAELIC is not set -# CONFIG_NLS_MAC_GREEK is not set -# CONFIG_NLS_MAC_ICELAND is not set -# CONFIG_NLS_MAC_INUIT is not set -# CONFIG_NLS_MAC_ROMANIAN is not set -# CONFIG_NLS_MAC_TURKISH is not set -CONFIG_NLS_UTF8=m -CONFIG_DLM=m -# CONFIG_DLM_DEBUG is not set - -# -# Kernel hacking -# -CONFIG_TRACE_IRQFLAGS_SUPPORT=y - -# -# printk and dmesg options -# -# CONFIG_PRINTK_TIME is not set -CONFIG_DEFAULT_MESSAGE_LOGLEVEL=4 -# CONFIG_BOOT_PRINTK_DELAY is not set - -# -# Compile-time checks and compiler options -# -# CONFIG_DEBUG_INFO is not set -# CONFIG_ENABLE_WARN_DEPRECATED is not set -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_FRAME_WARN=1024 -# CONFIG_STRIP_ASM_SYMS is not set -# CONFIG_READABLE_ASM is not set -# CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_FS is not set -# CONFIG_HEADERS_CHECK is not set -# CONFIG_DEBUG_SECTION_MISMATCH is not set -CONFIG_ARCH_WANT_FRAME_POINTERS=y -CONFIG_FRAME_POINTER=y -# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set -CONFIG_MAGIC_SYSRQ=y -CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 -CONFIG_DEBUG_KERNEL=y - -# -# Memory Debugging -# -# CONFIG_DEBUG_PAGEALLOC is not set -# CONFIG_DEBUG_OBJECTS is not set -# CONFIG_SLUB_DEBUG_ON is not set -# CONFIG_SLUB_STATS is not set -CONFIG_HAVE_DEBUG_KMEMLEAK=y -# CONFIG_DEBUG_KMEMLEAK is not set -# CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_DEBUG_VM is not set -# CONFIG_DEBUG_VIRTUAL is not set -CONFIG_DEBUG_MEMORY_INIT=y -# CONFIG_DEBUG_PER_CPU_MAPS is not set -CONFIG_HAVE_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_STACKOVERFLOW is not set -CONFIG_HAVE_ARCH_KMEMCHECK=y -# CONFIG_KMEMCHECK is not set -# CONFIG_DEBUG_SHIRQ is not set - -# -# Debug Lockups and Hangs -# -# CONFIG_LOCKUP_DETECTOR is not set -# CONFIG_DETECT_HUNG_TASK is not set -# CONFIG_PANIC_ON_OOPS is not set -CONFIG_PANIC_ON_OOPS_VALUE=0 -CONFIG_PANIC_TIMEOUT=0 -CONFIG_SCHED_DEBUG=y -# CONFIG_SCHEDSTATS is not set -CONFIG_TIMER_STATS=y -# CONFIG_DEBUG_PREEMPT is not set - -# -# Lock Debugging (spinlocks, mutexes, etc...) -# -# CONFIG_DEBUG_RT_MUTEXES is not set -# CONFIG_RT_MUTEX_TESTER is not set -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_DEBUG_MUTEXES is not set -# CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set -# CONFIG_DEBUG_LOCK_ALLOC is not set -# CONFIG_PROVE_LOCKING is not set -# CONFIG_LOCK_STAT is not set -# CONFIG_DEBUG_ATOMIC_SLEEP is not set -# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set -# CONFIG_LOCK_TORTURE_TEST is not set -# CONFIG_DEBUG_KOBJECT is not set -CONFIG_DEBUG_BUGVERBOSE=y -# CONFIG_DEBUG_LIST is not set -# CONFIG_DEBUG_SG is not set -# CONFIG_DEBUG_NOTIFIERS is not set -# CONFIG_DEBUG_CREDENTIALS is not set - -# -# RCU Debugging -# -# CONFIG_PROVE_RCU_DELAY is not set -# CONFIG_SPARSE_RCU_POINTER is not set -# CONFIG_TORTURE_TEST is not set -# CONFIG_RCU_TORTURE_TEST is not set -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -# CONFIG_RCU_CPU_STALL_VERBOSE is not set -# CONFIG_RCU_CPU_STALL_INFO is not set -# CONFIG_RCU_TRACE is not set -# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set -# CONFIG_NOTIFIER_ERROR_INJECTION is not set -# CONFIG_FAULT_INJECTION is not set -# CONFIG_LATENCYTOP is not set -CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS=y -# CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is not set -CONFIG_USER_STACKTRACE_SUPPORT=y -CONFIG_HAVE_FUNCTION_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y -CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y -CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y -CONFIG_HAVE_DYNAMIC_FTRACE=y -CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y -CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y -CONFIG_HAVE_SYSCALL_TRACEPOINTS=y -CONFIG_HAVE_FENTRY=y -CONFIG_HAVE_C_RECORDMCOUNT=y -CONFIG_TRACING_SUPPORT=y -# CONFIG_FTRACE is not set - -# -# Runtime Testing -# -# CONFIG_TEST_LIST_SORT is not set -# CONFIG_BACKTRACE_SELF_TEST is not set -# CONFIG_RBTREE_TEST is not set -# CONFIG_INTERVAL_TREE_TEST is not set -# CONFIG_PERCPU_TEST is not set -# CONFIG_ATOMIC64_SELFTEST is not set -# CONFIG_ASYNC_RAID6_TEST is not set -# CONFIG_TEST_STRING_HELPERS is not set -# CONFIG_TEST_KSTRTOX is not set -# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set -# CONFIG_DMA_API_DEBUG is not set -# CONFIG_TEST_MODULE is not set -# CONFIG_TEST_USER_COPY is not set -# CONFIG_SAMPLES is not set -CONFIG_HAVE_ARCH_KGDB=y -# CONFIG_KGDB is not set -# CONFIG_STRICT_DEVMEM is not set -CONFIG_X86_VERBOSE_BOOTUP=y -CONFIG_EARLY_PRINTK=y -# CONFIG_EARLY_PRINTK_DBGP is not set -# CONFIG_EARLY_PRINTK_EFI is not set -# CONFIG_X86_PTDUMP is not set -CONFIG_DEBUG_RODATA=y -CONFIG_DEBUG_RODATA_TEST=y -# CONFIG_DEBUG_SET_MODULE_RONX is not set -# CONFIG_DEBUG_NX_TEST is not set -CONFIG_DOUBLEFAULT=y -# CONFIG_DEBUG_TLBFLUSH is not set -# CONFIG_IOMMU_DEBUG is not set -# CONFIG_IOMMU_STRESS is not set -CONFIG_HAVE_MMIOTRACE_SUPPORT=y -CONFIG_IO_DELAY_TYPE_0X80=0 -CONFIG_IO_DELAY_TYPE_0XED=1 -CONFIG_IO_DELAY_TYPE_UDELAY=2 -CONFIG_IO_DELAY_TYPE_NONE=3 -CONFIG_IO_DELAY_0X80=y -# CONFIG_IO_DELAY_0XED is not set -# CONFIG_IO_DELAY_UDELAY is not set -# CONFIG_IO_DELAY_NONE is not set -CONFIG_DEFAULT_IO_DELAY_TYPE=0 -# CONFIG_CPA_DEBUG is not set -# CONFIG_OPTIMIZE_INLINING is not set -# CONFIG_DEBUG_NMI_SELFTEST is not set -# CONFIG_X86_DEBUG_STATIC_CPU_HAS is not set - -# -# Security options -# -CONFIG_KEYS=y -# CONFIG_PERSISTENT_KEYRINGS is not set -# CONFIG_BIG_KEYS is not set -# CONFIG_ENCRYPTED_KEYS is not set -CONFIG_KEYS_DEBUG_PROC_KEYS=y -# CONFIG_SECURITY_DMESG_RESTRICT is not set -# CONFIG_SECURITY is not set -# CONFIG_SECURITYFS is not set -CONFIG_DEFAULT_SECURITY_DAC=y -CONFIG_DEFAULT_SECURITY="" -CONFIG_XOR_BLOCKS=m -CONFIG_ASYNC_CORE=m -CONFIG_ASYNC_MEMCPY=m -CONFIG_ASYNC_XOR=m -CONFIG_ASYNC_PQ=m -CONFIG_ASYNC_RAID6_RECOV=m -CONFIG_CRYPTO=y - -# -# Crypto core or helper -# -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_ALGAPI2=y -CONFIG_CRYPTO_AEAD=m -CONFIG_CRYPTO_AEAD2=y -CONFIG_CRYPTO_BLKCIPHER=m -CONFIG_CRYPTO_BLKCIPHER2=y -CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_HASH2=y -CONFIG_CRYPTO_RNG=m -CONFIG_CRYPTO_RNG2=y -CONFIG_CRYPTO_PCOMP2=y -CONFIG_CRYPTO_MANAGER=m -CONFIG_CRYPTO_MANAGER2=y -# CONFIG_CRYPTO_USER is not set -CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y -CONFIG_CRYPTO_GF128MUL=m -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_PCRYPT is not set -CONFIG_CRYPTO_WORKQUEUE=y -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_AUTHENC=m -# CONFIG_CRYPTO_TEST is not set -CONFIG_CRYPTO_ABLK_HELPER=m -CONFIG_CRYPTO_GLUE_HELPER_X86=m - -# -# Authenticated Encryption with Associated Data -# -CONFIG_CRYPTO_CCM=m -# CONFIG_CRYPTO_GCM is not set -CONFIG_CRYPTO_SEQIV=m - -# -# Block modes -# -CONFIG_CRYPTO_CBC=m -CONFIG_CRYPTO_CTR=m -# CONFIG_CRYPTO_CTS is not set -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m - -# -# Hash modes -# -CONFIG_CRYPTO_CMAC=m -CONFIG_CRYPTO_HMAC=m -# CONFIG_CRYPTO_XCBC is not set -# CONFIG_CRYPTO_VMAC is not set - -# -# Digest -# -CONFIG_CRYPTO_CRC32C=m -# CONFIG_CRYPTO_CRC32C_INTEL is not set -# CONFIG_CRYPTO_CRC32 is not set -CONFIG_CRYPTO_CRC32_PCLMUL=m -CONFIG_CRYPTO_CRCT10DIF=y -CONFIG_CRYPTO_CRCT10DIF_PCLMUL=m -# CONFIG_CRYPTO_GHASH is not set -CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_MD5=m -CONFIG_CRYPTO_MICHAEL_MIC=m -# CONFIG_CRYPTO_RMD128 is not set -CONFIG_CRYPTO_RMD160=m -# CONFIG_CRYPTO_RMD256 is not set -# CONFIG_CRYPTO_RMD320 is not set -CONFIG_CRYPTO_SHA1=m -# CONFIG_CRYPTO_SHA1_SSSE3 is not set -CONFIG_CRYPTO_SHA256_SSSE3=m -CONFIG_CRYPTO_SHA512_SSSE3=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -# CONFIG_CRYPTO_TGR192 is not set -CONFIG_CRYPTO_WP512=m -# CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set - -# -# Ciphers -# -CONFIG_CRYPTO_AES=y -CONFIG_CRYPTO_AES_X86_64=m -CONFIG_CRYPTO_AES_NI_INTEL=m -# CONFIG_CRYPTO_ANUBIS is not set -CONFIG_CRYPTO_ARC4=m -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_BLOWFISH_X86_64 is not set -# CONFIG_CRYPTO_CAMELLIA is not set -CONFIG_CRYPTO_CAMELLIA_X86_64=m -CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64=m -CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64=m -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set -# CONFIG_CRYPTO_CAST6 is not set -# CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set -CONFIG_CRYPTO_DES=m -# CONFIG_CRYPTO_FCRYPT is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_SALSA20 is not set -# CONFIG_CRYPTO_SALSA20_X86_64 is not set -# CONFIG_CRYPTO_SEED is not set -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SERPENT_SSE2_X86_64=m -CONFIG_CRYPTO_SERPENT_AVX_X86_64=m -CONFIG_CRYPTO_SERPENT_AVX2_X86_64=m -# CONFIG_CRYPTO_TEA is not set -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_TWOFISH_COMMON=m -CONFIG_CRYPTO_TWOFISH_X86_64=m -CONFIG_CRYPTO_TWOFISH_X86_64_3WAY=m -CONFIG_CRYPTO_TWOFISH_AVX_X86_64=m - -# -# Compression -# -CONFIG_CRYPTO_DEFLATE=m -# CONFIG_CRYPTO_ZLIB is not set -CONFIG_CRYPTO_LZO=y -CONFIG_CRYPTO_LZ4=y -CONFIG_CRYPTO_LZ4HC=y - -# -# Random Number Generation -# -# CONFIG_CRYPTO_ANSI_CPRNG is not set -# CONFIG_CRYPTO_USER_API_HASH is not set -# CONFIG_CRYPTO_USER_API_SKCIPHER is not set -CONFIG_CRYPTO_HW=y -# CONFIG_CRYPTO_DEV_PADLOCK is not set -CONFIG_CRYPTO_DEV_CCP=y -CONFIG_CRYPTO_DEV_CCP_DD=m -CONFIG_CRYPTO_DEV_CCP_CRYPTO=m -# CONFIG_ASYMMETRIC_KEY_TYPE is not set -CONFIG_HAVE_KVM=y -CONFIG_HAVE_KVM_IRQCHIP=y -CONFIG_HAVE_KVM_IRQ_ROUTING=y -CONFIG_HAVE_KVM_EVENTFD=y -CONFIG_KVM_APIC_ARCHITECTURE=y -CONFIG_KVM_MMIO=y -CONFIG_KVM_ASYNC_PF=y -CONFIG_HAVE_KVM_MSI=y -CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y -CONFIG_KVM_VFIO=y -CONFIG_VIRTUALIZATION=y -CONFIG_KVM=m -CONFIG_KVM_INTEL=m -CONFIG_KVM_AMD=m -# CONFIG_BINARY_PRINTF is not set - -# -# Library routines -# -CONFIG_RAID6_PQ=m -CONFIG_BITREVERSE=y -CONFIG_GENERIC_STRNCPY_FROM_USER=y -CONFIG_GENERIC_STRNLEN_USER=y -CONFIG_GENERIC_NET_UTILS=y -CONFIG_GENERIC_FIND_FIRST_BIT=y -CONFIG_GENERIC_PCI_IOMAP=y -CONFIG_GENERIC_IOMAP=y -CONFIG_GENERIC_IO=y -CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y -CONFIG_CRC_CCITT=m -CONFIG_CRC16=m -CONFIG_CRC_T10DIF=y -CONFIG_CRC_ITU_T=m -CONFIG_CRC32=y -# CONFIG_CRC32_SELFTEST is not set -CONFIG_CRC32_SLICEBY8=y -# CONFIG_CRC32_SLICEBY4 is not set -# CONFIG_CRC32_SARWATE is not set -# CONFIG_CRC32_BIT is not set -CONFIG_CRC7=m -CONFIG_LIBCRC32C=m -CONFIG_CRC8=m -# CONFIG_AUDIT_ARCH_COMPAT_GENERIC is not set -# CONFIG_RANDOM32_SELFTEST is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -CONFIG_LZO_COMPRESS=y -CONFIG_LZO_DECOMPRESS=y -CONFIG_LZ4_COMPRESS=y -CONFIG_LZ4HC_COMPRESS=y -CONFIG_LZ4_DECOMPRESS=y -CONFIG_XZ_DEC=y -CONFIG_XZ_DEC_X86=y -CONFIG_XZ_DEC_POWERPC=y -CONFIG_XZ_DEC_IA64=y -CONFIG_XZ_DEC_ARM=y -CONFIG_XZ_DEC_ARMTHUMB=y -CONFIG_XZ_DEC_SPARC=y -CONFIG_XZ_DEC_BCJ=y -# CONFIG_XZ_DEC_TEST is not set -CONFIG_DECOMPRESS_GZIP=y -CONFIG_DECOMPRESS_BZIP2=y -CONFIG_DECOMPRESS_LZMA=y -CONFIG_DECOMPRESS_XZ=y -CONFIG_DECOMPRESS_LZO=y -CONFIG_DECOMPRESS_LZ4=y -CONFIG_TEXTSEARCH=y -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m -CONFIG_ASSOCIATIVE_ARRAY=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT_MAP=y -CONFIG_HAS_DMA=y -CONFIG_CHECK_SIGNATURE=y -CONFIG_CPU_RMAP=y -CONFIG_DQL=y -CONFIG_NLATTR=y -CONFIG_ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE=y -CONFIG_AVERAGE=y -CONFIG_CORDIC=m -# CONFIG_DDR is not set -CONFIG_OID_REGISTRY=m -CONFIG_UCS2_STRING=y -CONFIG_FONT_SUPPORT=y -# CONFIG_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/1_config-server b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/1_config-server deleted file mode 100644 index 56cd03b4f..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/1_config-server +++ /dev/null @@ -1,17 +0,0 @@ -# Calculate format=openrc name=.config os_linux_system==server -CONFIG_TREE_RCU=y -CONFIG_TREE_PREEMPT_RCU=n -CONFIG_PREEMPT_RCU=n -CONFIG_INLINE_SPIN_UNLOCK_IRQ=y -CONFIG_INLINE_READ_UNLOCK=y -CONFIG_INLINE_READ_UNLOCK_IRQ=y -CONFIG_INLINE_WRITE_UNLOCK=y -CONFIG_INLINE_WRITE_UNLOCK_IRQ=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT=n -CONFIG_HZ_100=y -CONFIG_HZ=1000 -CONFIG_HZ_1000=n -CONFIG_INPUT_JOYSTICK=n -CONFIG_INPUT_TABLET=n -CONFIG_DRM_I810=n diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/3_config-desktop-bfq-tuxonice b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/3_config-desktop-bfq-tuxonice deleted file mode 100644 index 4bc6252a4..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/3_config-desktop-bfq-tuxonice +++ /dev/null @@ -1,19 +0,0 @@ -# Calculate format=openrc os_linux_system==desktop name=.config -CONFIG_IOSCHED_BFQ=y -CONFIG_CGROUP_BFQIO=y -CONFIG_DEFAULT_BFQ=n -CONFIG_TOI_CORE=y -CONFIG_TOI_FILE=y -CONFIG_TOI_SWAP=y -CONFIG_TOI_CRYPTO=y -CONFIG_TOI_USERUI=n -CONFIG_TOI_DEFAULT_IMAGE_SIZE_LIMIT=-2 -CONFIG_TOI_KEEP_IMAGE=n -CONFIG_TOI_REPLACE_SWSUSP=y -CONFIG_TOI_IGNORE_LATE_INITCALL=y -CONFIG_TOI_DEFAULT_WAIT=25 -CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE=2000 -CONFIG_TOI_CHECKSUM=y -CONFIG_TOI=y -CONFIG_TOI_ZRAM_SUPPORT=y -CONFIG_TOI_INCREMENTAL=n diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/4300_aufs-3.patch b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/4300_aufs-3.patch deleted file mode 100644 index a429b3397..000000000 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.15/4300_aufs-3.patch +++ /dev/null @@ -1,31735 +0,0 @@ -# Calculate format=diff -diff --git Documentation/ABI/testing/debugfs-aufs Documentation/ABI/testing/debugfs-aufs -new file mode 100644 -index 0000000..99642d1 ---- /dev/null -+++ Documentation/ABI/testing/debugfs-aufs -@@ -0,0 +1,50 @@ -+What: /debug/aufs/si_/ -+Date: March 2009 -+Contact: J. R. Okajima -+Description: -+ Under /debug/aufs, a directory named si_ is created -+ per aufs mount, where is a unique id generated -+ internally. -+ -+What: /debug/aufs/si_/plink -+Date: Apr 2013 -+Contact: J. R. Okajima -+Description: -+ It has three lines and shows the information about the -+ pseudo-link. The first line is a single number -+ representing a number of buckets. The second line is a -+ number of pseudo-links per buckets (separated by a -+ blank). The last line is a single number representing a -+ total number of psedo-links. -+ When the aufs mount option 'noplink' is specified, it -+ will show "1\n0\n0\n". -+ -+What: /debug/aufs/si_/xib -+Date: March 2009 -+Contact: J. R. Okajima -+Description: -+ It shows the consumed blocks by xib (External Inode Number -+ Bitmap), its block size and file size. -+ When the aufs mount option 'noxino' is specified, it -+ will be empty. About XINO files, see the aufs manual. -+ -+What: /debug/aufs/si_/xino0, xino1 ... xinoN -+Date: March 2009 -+Contact: J. R. Okajima -+Description: -+ It shows the consumed blocks by xino (External Inode Number -+ Translation Table), its link count, block size and file -+ size. -+ When the aufs mount option 'noxino' is specified, it -+ will be empty. About XINO files, see the aufs manual. -+ -+What: /debug/aufs/si_/xigen -+Date: March 2009 -+Contact: J. R. Okajima -+Description: -+ It shows the consumed blocks by xigen (External Inode -+ Generation Table), its block size and file size. -+ If CONFIG_AUFS_EXPORT is disabled, this entry will not -+ be created. -+ When the aufs mount option 'noxino' is specified, it -+ will be empty. About XINO files, see the aufs manual. -diff --git Documentation/ABI/testing/sysfs-aufs Documentation/ABI/testing/sysfs-aufs -new file mode 100644 -index 0000000..82f9518 ---- /dev/null -+++ Documentation/ABI/testing/sysfs-aufs -@@ -0,0 +1,31 @@ -+What: /sys/fs/aufs/si_/ -+Date: March 2009 -+Contact: J. R. Okajima -+Description: -+ Under /sys/fs/aufs, a directory named si_ is created -+ per aufs mount, where is a unique id generated -+ internally. -+ -+What: /sys/fs/aufs/si_/br0, br1 ... brN -+Date: March 2009 -+Contact: J. R. Okajima -+Description: -+ It shows the abolute path of a member directory (which -+ is called branch) in aufs, and its permission. -+ -+What: /sys/fs/aufs/si_/brid0, brid1 ... bridN -+Date: July 2013 -+Contact: J. R. Okajima -+Description: -+ It shows the id of a member directory (which is called -+ branch) in aufs. -+ -+What: /sys/fs/aufs/si_/xi_path -+Date: March 2009 -+Contact: J. R. Okajima -+Description: -+ It shows the abolute path of XINO (External Inode Number -+ Bitmap, Translation Table and Generation Table) file -+ even if it is the default path. -+ When the aufs mount option 'noxino' is specified, it -+ will be empty. About XINO files, see the aufs manual. -diff --git Documentation/filesystems/aufs/README Documentation/filesystems/aufs/README -new file mode 100644 -index 0000000..950633f ---- /dev/null -+++ Documentation/filesystems/aufs/README -@@ -0,0 +1,368 @@ -+ -+Aufs3 -- advanced multi layered unification filesystem version 3.x -+http://aufs.sf.net -+Junjiro R. Okajima -+ -+ -+0. Introduction -+---------------------------------------- -+In the early days, aufs was entirely re-designed and re-implemented -+Unionfs Version 1.x series. After many original ideas, approaches, -+improvements and implementations, it becomes totally different from -+Unionfs while keeping the basic features. -+Recently, Unionfs Version 2.x series begin taking some of the same -+approaches to aufs1's. -+Unionfs is being developed by Professor Erez Zadok at Stony Brook -+University and his team. -+ -+Aufs3 supports linux-3.0 and later. -+If you want older kernel version support, try aufs2-2.6.git or -+aufs2-standalone.git repository, aufs1 from CVS on SourceForge. -+ -+Note: it becomes clear that "Aufs was rejected. Let's give it up." -+ According to Christoph Hellwig, linux rejects all union-type -+ filesystems but UnionMount. -+ -+ -+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and -+ UnionMount, and he pointed out an issue around a directory mutex -+ lock and aufs addressed it. But it is still unsure whether aufs will -+ be merged (or any other union solution). -+ -+ -+ -+1. Features -+---------------------------------------- -+- unite several directories into a single virtual filesystem. The member -+ directory is called as a branch. -+- you can specify the permission flags to the branch, which are 'readonly', -+ 'readwrite' and 'whiteout-able.' -+- by upper writable branch, internal copyup and whiteout, files/dirs on -+ readonly branch are modifiable logically. -+- dynamic branch manipulation, add, del. -+- etc... -+ -+Also there are many enhancements in aufs1, such as: -+- readdir(3) in userspace. -+- keep inode number by external inode number table -+- keep the timestamps of file/dir in internal copyup operation -+- seekable directory, supporting NFS readdir. -+- whiteout is hardlinked in order to reduce the consumption of inodes -+ on branch -+- do not copyup, nor create a whiteout when it is unnecessary -+- revert a single systemcall when an error occurs in aufs -+- remount interface instead of ioctl -+- maintain /etc/mtab by an external command, /sbin/mount.aufs. -+- loopback mounted filesystem as a branch -+- kernel thread for removing the dir who has a plenty of whiteouts -+- support copyup sparse file (a file which has a 'hole' in it) -+- default permission flags for branches -+- selectable permission flags for ro branch, whether whiteout can -+ exist or not -+- export via NFS. -+- support /fs/aufs and /aufs. -+- support multiple writable branches, some policies to select one -+ among multiple writable branches. -+- a new semantics for link(2) and rename(2) to support multiple -+ writable branches. -+- no glibc changes are required. -+- pseudo hardlink (hardlink over branches) -+- allow a direct access manually to a file on branch, e.g. bypassing aufs. -+ including NFS or remote filesystem branch. -+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX. -+- and more... -+ -+Currently these features are dropped temporary from aufs3. -+See design/08plan.txt in detail. -+- test only the highest one for the directory permission (dirperm1) -+- copyup on open (coo=) -+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs -+ (robr) -+- statistics of aufs thread (/sys/fs/aufs/stat) -+- delegation mode (dlgt) -+ a delegation of the internal branch access to support task I/O -+ accounting, which also supports Linux Security Modules (LSM) mainly -+ for Suse AppArmor. -+- intent.open/create (file open in a single lookup) -+ -+Features or just an idea in the future (see also design/*.txt), -+- reorder the branch index without del/re-add. -+- permanent xino files for NFSD -+- an option for refreshing the opened files after add/del branches -+- 'move' policy for copy-up between two writable branches, after -+ checking free space. -+- light version, without branch manipulation. (unnecessary?) -+- copyup in userspace -+- inotify in userspace -+- readv/writev -+- xattr, acl -+ -+ -+2. Download -+---------------------------------------- -+There were three GIT trees for aufs3, aufs3-linux.git, -+aufs3-standalone.git, and aufs-util.git. Note that there is no "3" in -+"aufs-util.git." -+While the aufs-util is always necessary, you need either of aufs3-linux -+or aufs3-standalone. -+ -+The aufs3-linux tree includes the whole linux mainline GIT tree, -+git://git.kernel.org/.../torvalds/linux.git. -+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot -+build aufs3 as an external kernel module. -+ -+On the other hand, the aufs3-standalone tree has only aufs source files -+and necessary patches, and you can select CONFIG_AUFS_FS=m. -+ -+You will find GIT branches whose name is in form of "aufs3.x" where "x" -+represents the linux kernel version, "linux-3.x". For instance, -+"aufs3.0" is for linux-3.0. For latest "linux-3.x-rcN", use -+"aufs3.x-rcN" branch. -+ -+o aufs3-linux tree -+$ git clone --reference /your/linux/git/tree \ -+ git://git.code.sf.net/p/aufs/aufs3-linux aufs-aufs3-linux \ -+ aufs3-linux.git -+- if you don't have linux GIT tree, then remove "--reference ..." -+$ cd aufs3-linux.git -+$ git checkout origin/aufs3.0 -+ -+o aufs3-standalone tree -+$ git clone git://git.code.sf.net/p/aufs/aufs3-standalone \ -+ aufs3-standalone.git -+$ cd aufs3-standalone.git -+$ git checkout origin/aufs3.0 -+ -+o aufs-util tree -+$ git clone git://git.code.sf.net/p/aufs/aufs-util \ -+ aufs-util.git -+$ cd aufs-util.git -+$ git checkout origin/aufs3.0 -+ -+Note: The 3.x-rcN branch is to be used with `rc' kernel versions ONLY. -+The minor version number, 'x' in '3.x', of aufs may not always -+follow the minor version number of the kernel. -+Because changes in the kernel that cause the use of a new -+minor version number do not always require changes to aufs-util. -+ -+Since aufs-util has its own minor version number, you may not be -+able to find a GIT branch in aufs-util for your kernel's -+exact minor version number. -+In this case, you should git-checkout the branch for the -+nearest lower number. -+ -+For (an unreleased) example: -+If you are using "linux-3.10" and the "aufs3.10" branch -+does not exist in aufs-util repository, then "aufs3.9", "aufs3.8" -+or something numerically smaller is the branch for your kernel. -+ -+Also you can view all branches by -+ $ git branch -a -+ -+ -+3. Configuration and Compilation -+---------------------------------------- -+Make sure you have git-checkout'ed the correct branch. -+ -+For aufs3-linux tree, -+- enable CONFIG_AUFS_FS. -+- set other aufs configurations if necessary. -+ -+For aufs3-standalone tree, -+There are several ways to build. -+ -+1. -+- apply ./aufs3-kbuild.patch to your kernel source files. -+- apply ./aufs3-base.patch too. -+- apply ./aufs3-mmap.patch too. -+- apply ./aufs3-standalone.patch too, if you have a plan to set -+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs3-standalone.patch. -+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your -+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild. -+- enable CONFIG_AUFS_FS, you can select either -+ =m or =y. -+- and build your kernel as usual. -+- install the built kernel. -+ Note: Since linux-3.9, every filesystem module requires an alias -+ "fs-". You should make sure that "fs-aufs" is listed in your -+ modules.aliases file if you set CONFIG_AUFS_FS=m. -+- install the header files too by "make headers_install" to the -+ directory where you specify. By default, it is $PWD/usr. -+ "make help" shows a brief note for headers_install. -+- and reboot your system. -+ -+2. -+- module only (CONFIG_AUFS_FS=m). -+- apply ./aufs3-base.patch to your kernel source files. -+- apply ./aufs3-mmap.patch too. -+- apply ./aufs3-standalone.patch too. -+- build your kernel, don't forget "make headers_install", and reboot. -+- edit ./config.mk and set other aufs configurations if necessary. -+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes -+ every aufs configurations. -+- build the module by simple "make". -+ Note: Since linux-3.9, every filesystem module requires an alias -+ "fs-". You should make sure that "fs-aufs" is listed in your -+ modules.aliases file. -+- you can specify ${KDIR} make variable which points to your kernel -+ source tree. -+- install the files -+ + run "make install" to install the aufs module, or copy the built -+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply). -+ + run "make install_headers" (instead of headers_install) to install -+ the modified aufs header file (you can specify DESTDIR which is -+ available in aufs standalone version's Makefile only), or copy -+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever -+ you like manually. By default, the target directory is $PWD/usr. -+- no need to apply aufs3-kbuild.patch, nor copying source files to your -+ kernel source tree. -+ -+Note: The header file aufs_type.h is necessary to build aufs-util -+ as well as "make headers_install" in the kernel source tree. -+ headers_install is subject to be forgotten, but it is essentially -+ necessary, not only for building aufs-util. -+ You may not meet problems without headers_install in some older -+ version though. -+ -+And then, -+- read README in aufs-util, build and install it -+- note that your distribution may contain an obsoleted version of -+ aufs_type.h in /usr/include/linux or something. When you build aufs -+ utilities, make sure that your compiler refers the correct aufs header -+ file which is built by "make headers_install." -+- if you want to use readdir(3) in userspace or pathconf(3) wrapper, -+ then run "make install_ulib" too. And refer to the aufs manual in -+ detail. -+ -+There several other patches in aufs3-standalone.git. They are all -+optional. When you meet some problems, they will help you. -+- aufs3-loopback.patch -+ Supports a nested loopback mount in a branch-fs. This patch is -+ unnecessary until aufs produces a message like "you may want to try -+ another patch for loopback file". -+- vfs-ino.patch -+ Modifies a system global kernel internal function get_next_ino() in -+ order to stop assigning 0 for an inode-number. Not directly related to -+ aufs, but recommended generally. -+- tmpfs-idr.patch -+ Keeps the tmpfs inode number as the lowest value. Effective to reduce -+ the size of aufs XINO files for tmpfs branch. Also it prevents the -+ duplication of inode number, which is important for backup tools and -+ other utilities. When you find aufs XINO files for tmpfs branch -+ growing too much, try this patch. -+ -+ -+4. Usage -+---------------------------------------- -+At first, make sure aufs-util are installed, and please read the aufs -+manual, aufs.5 in aufs-util.git tree. -+$ man -l aufs.5 -+ -+And then, -+$ mkdir /tmp/rw /tmp/aufs -+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs -+ -+Here is another example. The result is equivalent. -+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs -+ Or -+# mount -t aufs -o br:/tmp/rw none /tmp/aufs -+# mount -o remount,append:${HOME} /tmp/aufs -+ -+Then, you can see whole tree of your home dir through /tmp/aufs. If -+you modify a file under /tmp/aufs, the one on your home directory is -+not affected, instead the same named file will be newly created under -+/tmp/rw. And all of your modification to a file will be applied to -+the one under /tmp/rw. This is called the file based Copy on Write -+(COW) method. -+Aufs mount options are described in aufs.5. -+If you run chroot or something and make your aufs as a root directory, -+then you need to customize the shutdown script. See the aufs manual in -+detail. -+ -+Additionally, there are some sample usages of aufs which are a -+diskless system with network booting, and LiveCD over NFS. -+See sample dir in CVS tree on SourceForge. -+ -+ -+5. Contact -+---------------------------------------- -+When you have any problems or strange behaviour in aufs, please let me -+know with: -+- /proc/mounts (instead of the output of mount(8)) -+- /sys/module/aufs/* -+- /sys/fs/aufs/* (if you have them) -+- /debug/aufs/* (if you have them) -+- linux kernel version -+ if your kernel is not plain, for example modified by distributor, -+ the url where i can download its source is necessary too. -+- aufs version which was printed at loading the module or booting the -+ system, instead of the date you downloaded. -+- configuration (define/undefine CONFIG_AUFS_xxx) -+- kernel configuration or /proc/config.gz (if you have it) -+- behaviour which you think to be incorrect -+- actual operation, reproducible one is better -+- mailto: aufs-users at lists.sourceforge.net -+ -+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches, -+and Feature Requests) on SourceForge. Please join and write to -+aufs-users ML. -+ -+ -+6. Acknowledgements -+---------------------------------------- -+Thanks to everyone who have tried and are using aufs, whoever -+have reported a bug or any feedback. -+ -+Especially donators: -+Tomas Matejicek(slax.org) made a donation (much more than once). -+ Since Apr 2010, Tomas M (the author of Slax and Linux Live -+ scripts) is making "doubling" donations. -+ Unfortunately I cannot list all of the donators, but I really -+ appreciate. -+ It ends Aug 2010, but the ordinary donation URL is still available. -+ -+Dai Itasaka made a donation (2007/8). -+Chuck Smith made a donation (2008/4, 10 and 12). -+Henk Schoneveld made a donation (2008/9). -+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10). -+Francois Dupoux made a donation (2008/11). -+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public -+ aufs2 GIT tree (2009/2). -+William Grant made a donation (2009/3). -+Patrick Lane made a donation (2009/4). -+The Mail Archive (mail-archive.com) made donations (2009/5). -+Nippy Networks (Ed Wildgoose) made a donation (2009/7). -+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11). -+Pavel Pronskiy made a donation (2011/2). -+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy -+ Networks (Ed Wildgoose) made a donation for hardware (2011/3). -+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and -+11). -+Sam Liddicott made a donation (2011/9). -+Era Scarecrow made a donation (2013/4). -+Bor Ratajc made a donation (2013/4). -+Alessandro Gorreta made a donation (2013/4). -+POIRETTE Marc made a donation (2013/4). -+Alessandro Gorreta made a donation (2013/4). -+lauri kasvandik made a donation (2013/5). -+"pemasu from Finland" made a donation (2013/7). -+The Parted Magic Project made a donation (2013/9 and 11). -+Pavel Barta made a donation (2013/10). -+Nikolay Pertsev made a donation (2014/5). -+ -+Thank you very much. -+Donations are always, including future donations, very important and -+helpful for me to keep on developing aufs. -+ -+ -+7. -+---------------------------------------- -+If you are an experienced user, no explanation is needed. Aufs is -+just a linux filesystem. -+ -+ -+Enjoy! -+ -+# Local variables: ; -+# mode: text; -+# End: ; -diff --git Documentation/filesystems/aufs/design/01intro.txt Documentation/filesystems/aufs/design/01intro.txt -new file mode 100644 -index 0000000..d97a5a2 ---- /dev/null -+++ Documentation/filesystems/aufs/design/01intro.txt -@@ -0,0 +1,148 @@ -+ -+# Copyright (C) 2005-2014 Junjiro R. Okajima -+ -+Introduction -+---------------------------------------- -+ -+aufs [ei ju: ef es] | [a u f s] -+1. abbrev. for "advanced multi-layered unification filesystem". -+2. abbrev. for "another unionfs". -+3. abbrev. for "auf das" in German which means "on the" in English. -+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E). -+ But "Filesystem aufs Filesystem" is hard to understand. -+ -+AUFS is a filesystem with features: -+- multi layered stackable unification filesystem, the member directory -+ is called as a branch. -+- branch permission and attribute, 'readonly', 'real-readonly', -+ 'readwrite', 'whiteout-able', 'link-able whiteout' and their -+ combination. -+- internal "file copy-on-write". -+- logical deletion, whiteout. -+- dynamic branch manipulation, adding, deleting and changing permission. -+- allow bypassing aufs, user's direct branch access. -+- external inode number translation table and bitmap which maintains the -+ persistent aufs inode number. -+- seekable directory, including NFS readdir. -+- file mapping, mmap and sharing pages. -+- pseudo-link, hardlink over branches. -+- loopback mounted filesystem as a branch. -+- several policies to select one among multiple writable branches. -+- revert a single systemcall when an error occurs in aufs. -+- and more... -+ -+ -+Multi Layered Stackable Unification Filesystem -+---------------------------------------------------------------------- -+Most people already knows what it is. -+It is a filesystem which unifies several directories and provides a -+merged single directory. When users access a file, the access will be -+passed/re-directed/converted (sorry, I am not sure which English word is -+correct) to the real file on the member filesystem. The member -+filesystem is called 'lower filesystem' or 'branch' and has a mode -+'readonly' and 'readwrite.' And the deletion for a file on the lower -+readonly branch is handled by creating 'whiteout' on the upper writable -+branch. -+ -+On LKML, there have been discussions about UnionMount (Jan Blunck, -+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took -+different approaches to implement the merged-view. -+The former tries putting it into VFS, and the latter implements as a -+separate filesystem. -+(If I misunderstand about these implementations, please let me know and -+I shall correct it. Because it is a long time ago when I read their -+source files last time). -+ -+UnionMount's approach will be able to small, but may be hard to share -+branches between several UnionMount since the whiteout in it is -+implemented in the inode on branch filesystem and always -+shared. According to Bharata's post, readdir does not seems to be -+finished yet. -+There are several missing features known in this implementations such as -+- for users, the inode number may change silently. eg. copy-up. -+- link(2) may break by copy-up. -+- read(2) may get an obsoleted filedata (fstat(2) too). -+- fcntl(F_SETLK) may be broken by copy-up. -+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after -+ open(O_RDWR). -+ -+Unionfs has a longer history. When I started implementing a stacking filesystem -+(Aug 2005), it already existed. It has virtual super_block, inode, -+dentry and file objects and they have an array pointing lower same kind -+objects. After contributing many patches for Unionfs, I re-started my -+project AUFS (Jun 2006). -+ -+In AUFS, the structure of filesystem resembles to Unionfs, but I -+implemented my own ideas, approaches and enhancements and it became -+totally different one. -+ -+Comparing DM snapshot and fs based implementation -+- the number of bytes to be copied between devices is much smaller. -+- the type of filesystem must be one and only. -+- the fs must be writable, no readonly fs, even for the lower original -+ device. so the compression fs will not be usable. but if we use -+ loopback mount, we may address this issue. -+ for instance, -+ mount /cdrom/squashfs.img /sq -+ losetup /sq/ext2.img -+ losetup /somewhere/cow -+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..." -+- it will be difficult (or needs more operations) to extract the -+ difference between the original device and COW. -+- DM snapshot-merge may help a lot when users try merging. in the -+ fs-layer union, users will use rsync(1). -+ -+ -+Several characters/aspects of aufs -+---------------------------------------------------------------------- -+ -+Aufs has several characters or aspects. -+1. a filesystem, callee of VFS helper -+2. sub-VFS, caller of VFS helper for branches -+3. a virtual filesystem which maintains persistent inode number -+4. reader/writer of files on branches such like an application -+ -+1. Callee of VFS Helper -+As an ordinary linux filesystem, aufs is a callee of VFS. For instance, -+unlink(2) from an application reaches sys_unlink() kernel function and -+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it -+calls filesystem specific unlink operation. Actually aufs implements the -+unlink operation but it behaves like a redirector. -+ -+2. Caller of VFS Helper for Branches -+aufs_unlink() passes the unlink request to the branch filesystem as if -+it were called from VFS. So the called unlink operation of the branch -+filesystem acts as usual. As a caller of VFS helper, aufs should handle -+every necessary pre/post operation for the branch filesystem. -+- acquire the lock for the parent dir on a branch -+- lookup in a branch -+- revalidate dentry on a branch -+- mnt_want_write() for a branch -+- vfs_unlink() for a branch -+- mnt_drop_write() for a branch -+- release the lock on a branch -+ -+3. Persistent Inode Number -+One of the most important issue for a filesystem is to maintain inode -+numbers. This is particularly important to support exporting a -+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a -+backend block device for its own. But some storage is necessary to -+maintain inode number. It may be a large space and may not suit to keep -+in memory. Aufs rents some space from its first writable branch -+filesystem (by default) and creates file(s) on it. These files are -+created by aufs internally and removed soon (currently) keeping opened. -+Note: Because these files are removed, they are totally gone after -+ unmounting aufs. It means the inode numbers are not persistent -+ across unmount or reboot. I have a plan to make them really -+ persistent which will be important for aufs on NFS server. -+ -+4. Read/Write Files Internally (copy-on-write) -+Because a branch can be readonly, when you write a file on it, aufs will -+"copy-up" it to the upper writable branch internally. And then write the -+originally requested thing to the file. Generally kernel doesn't -+open/read/write file actively. In aufs, even a single write may cause a -+internal "file copy". This behaviour is very similar to cp(1) command. -+ -+Some people may think it is better to pass such work to user space -+helper, instead of doing in kernel space. Actually I am still thinking -+about it. But currently I have implemented it in kernel space. -diff --git Documentation/filesystems/aufs/design/02struct.txt Documentation/filesystems/aufs/design/02struct.txt -new file mode 100644 -index 0000000..d04ca2e ---- /dev/null -+++ Documentation/filesystems/aufs/design/02struct.txt -@@ -0,0 +1,229 @@ -+ -+# Copyright (C) 2005-2014 Junjiro R. Okajima -+ -+Basic Aufs Internal Structure -+ -+Superblock/Inode/Dentry/File Objects -+---------------------------------------------------------------------- -+As like an ordinary filesystem, aufs has its own -+superblock/inode/dentry/file objects. All these objects have a -+dynamically allocated array and store the same kind of pointers to the -+lower filesystem, branch. -+For example, when you build a union with one readwrite branch and one -+readonly, mounted /au, /rw and /ro respectively. -+- /au = /rw + /ro -+- /ro/fileA exists but /rw/fileA -+ -+Aufs lookup operation finds /ro/fileA and gets dentry for that. These -+pointers are stored in a aufs dentry. The array in aufs dentry will be, -+- [0] = NULL -+- [1] = /ro/fileA -+ -+This style of an array is essentially same to the aufs -+superblock/inode/dentry/file objects. -+ -+Because aufs supports manipulating branches, ie. add/delete/change -+dynamically, these objects has its own generation. When branches are -+changed, the generation in aufs superblock is incremented. And a -+generation in other object are compared when it is accessed. -+When a generation in other objects are obsoleted, aufs refreshes the -+internal array. -+ -+ -+Superblock -+---------------------------------------------------------------------- -+Additionally aufs superblock has some data for policies to select one -+among multiple writable branches, XIB files, pseudo-links and kobject. -+See below in detail. -+About the policies which supports copy-down a directory, see policy.txt -+too. -+ -+ -+Branch and XINO(External Inode Number Translation Table) -+---------------------------------------------------------------------- -+Every branch has its own xino (external inode number translation table) -+file. The xino file is created and unlinked by aufs internally. When two -+members of a union exist on the same filesystem, they share the single -+xino file. -+The struct of a xino file is simple, just a sequence of aufs inode -+numbers which is indexed by the lower inode number. -+In the above sample, assume the inode number of /ro/fileA is i111 and -+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as -+4(8) bytes at 111 * 4(8) bytes offset in the xino file. -+ -+When the inode numbers are not contiguous, the xino file will be sparse -+which has a hole in it and doesn't consume as much disk space as it -+might appear. If your branch filesystem consumes disk space for such -+holes, then you should specify 'xino=' option at mounting aufs. -+ -+Also a writable branch has three kinds of "whiteout bases". All these -+are existed when the branch is joined to aufs and the names are -+whiteout-ed doubly, so that users will never see their names in aufs -+hierarchy. -+1. a regular file which will be linked to all whiteouts. -+2. a directory to store a pseudo-link. -+3. a directory to store an "orphan-ed" file temporary. -+ -+1. Whiteout Base -+ When you remove a file on a readonly branch, aufs handles it as a -+ logical deletion and creates a whiteout on the upper writable branch -+ as a hardlink of this file in order not to consume inode on the -+ writable branch. -+2. Pseudo-link Dir -+ See below, Pseudo-link. -+3. Step-Parent Dir -+ When "fileC" exists on the lower readonly branch only and it is -+ opened and removed with its parent dir, and then user writes -+ something into it, then aufs copies-up fileC to this -+ directory. Because there is no other dir to store fileC. After -+ creating a file under this dir, the file is unlinked. -+ -+Because aufs supports manipulating branches, ie. add/delete/change -+dynamically, a branch has its own id. When the branch order changes, aufs -+finds the new index by searching the branch id. -+ -+ -+Pseudo-link -+---------------------------------------------------------------------- -+Assume "fileA" exists on the lower readonly branch only and it is -+hardlinked to "fileB" on the branch. When you write something to fileA, -+aufs copies-up it to the upper writable branch. Additionally aufs -+creates a hardlink under the Pseudo-link Directory of the writable -+branch. The inode of a pseudo-link is kept in aufs super_block as a -+simple list. If fileB is read after unlinking fileA, aufs returns -+filedata from the pseudo-link instead of the lower readonly -+branch. Because the pseudo-link is based upon the inode, to keep the -+inode number by xino (see above) is important. -+ -+All the hardlinks under the Pseudo-link Directory of the writable branch -+should be restored in a proper location later. Aufs provides a utility -+to do this. The userspace helpers executed at remounting and unmounting -+aufs by default. -+During this utility is running, it puts aufs into the pseudo-link -+maintenance mode. In this mode, only the process which began the -+maintenance mode (and its child processes) is allowed to operate in -+aufs. Some other processes which are not related to the pseudo-link will -+be allowed to run too, but the rest have to return an error or wait -+until the maintenance mode ends. If a process already acquires an inode -+mutex (in VFS), it has to return an error. -+ -+ -+XIB(external inode number bitmap) -+---------------------------------------------------------------------- -+Addition to the xino file per a branch, aufs has an external inode number -+bitmap in a superblock object. It is also a file such like a xino file. -+It is a simple bitmap to mark whether the aufs inode number is in-use or -+not. -+To reduce the file I/O, aufs prepares a single memory page to cache xib. -+ -+Aufs implements a feature to truncate/refresh both of xino and xib to -+reduce the number of consumed disk blocks for these files. -+ -+ -+Virtual or Vertical Dir, and Readdir in Userspace -+---------------------------------------------------------------------- -+In order to support multiple layers (branches), aufs readdir operation -+constructs a virtual dir block on memory. For readdir, aufs calls -+vfs_readdir() internally for each dir on branches, merges their entries -+with eliminating the whiteout-ed ones, and sets it to file (dir) -+object. So the file object has its entry list until it is closed. The -+entry list will be updated when the file position is zero and becomes -+old. This decision is made in aufs automatically. -+ -+The dynamically allocated memory block for the name of entries has a -+unit of 512 bytes (by default) and stores the names contiguously (no -+padding). Another block for each entry is handled by kmem_cache too. -+During building dir blocks, aufs creates hash list and judging whether -+the entry is whiteouted by its upper branch or already listed. -+The merged result is cached in the corresponding inode object and -+maintained by a customizable life-time option. -+ -+Some people may call it can be a security hole or invite DoS attack -+since the opened and once readdir-ed dir (file object) holds its entry -+list and becomes a pressure for system memory. But I'd say it is similar -+to files under /proc or /sys. The virtual files in them also holds a -+memory page (generally) while they are opened. When an idea to reduce -+memory for them is introduced, it will be applied to aufs too. -+For those who really hate this situation, I've developed readdir(3) -+library which operates this merging in userspace. You just need to set -+LD_PRELOAD environment variable, and aufs will not consume no memory in -+kernel space for readdir(3). -+ -+ -+Workqueue -+---------------------------------------------------------------------- -+Aufs sometimes requires privilege access to a branch. For instance, -+in copy-up/down operation. When a user process is going to make changes -+to a file which exists in the lower readonly branch only, and the mode -+of one of ancestor directories may not be writable by a user -+process. Here aufs copy-up the file with its ancestors and they may -+require privilege to set its owner/group/mode/etc. -+This is a typical case of a application character of aufs (see -+Introduction). -+ -+Aufs uses workqueue synchronously for this case. It creates its own -+workqueue. The workqueue is a kernel thread and has privilege. Aufs -+passes the request to call mkdir or write (for example), and wait for -+its completion. This approach solves a problem of a signal handler -+simply. -+If aufs didn't adopt the workqueue and changed the privilege of the -+process, and if the mkdir/write call arises SIGXFSZ or other signal, -+then the user process might gain a privilege or the generated core file -+was owned by a superuser. -+ -+Also aufs uses the system global workqueue ("events" kernel thread) too -+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a -+whiteout base and etc. This is unrelated to a privilege. -+Most of aufs operation tries acquiring a rw_semaphore for aufs -+superblock at the beginning, at the same time waits for the completion -+of all queued asynchronous tasks. -+ -+ -+Whiteout -+---------------------------------------------------------------------- -+The whiteout in aufs is very similar to Unionfs's. That is represented -+by its filename. UnionMount takes an approach of a file mode, but I am -+afraid several utilities (find(1) or something) will have to support it. -+ -+Basically the whiteout represents "logical deletion" which stops aufs to -+lookup further, but also it represents "dir is opaque" which also stop -+lookup. -+ -+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively. -+In order to make several functions in a single systemcall to be -+revertible, aufs adopts an approach to rename a directory to a temporary -+unique whiteouted name. -+For example, in rename(2) dir where the target dir already existed, aufs -+renames the target dir to a temporary unique whiteouted name before the -+actual rename on a branch and then handles other actions (make it opaque, -+update the attributes, etc). If an error happens in these actions, aufs -+simply renames the whiteouted name back and returns an error. If all are -+succeeded, aufs registers a function to remove the whiteouted unique -+temporary name completely and asynchronously to the system global -+workqueue. -+ -+ -+Copy-up -+---------------------------------------------------------------------- -+It is a well-known feature or concept. -+When user modifies a file on a readonly branch, aufs operate "copy-up" -+internally and makes change to the new file on the upper writable branch. -+When the trigger systemcall does not update the timestamps of the parent -+dir, aufs reverts it after copy-up. -+ -+ -+Move-down (aufs3.9 and later) -+---------------------------------------------------------------------- -+"Copy-up" is one of the essential feature in aufs. It copies a file from -+the lower readonly branch to the upper writable branch when a user -+changes something about the file. -+"Move-down" is an opposite action of copy-up. Basically this action is -+ran manually instead of automatically and internally. -+ -+Sometimes users want to move-down a file from the upper writable branch -+to the lower readonly or writable branch. For instance, -+- the free space of the upper writable branch is going to run out. -+- create a new intermediate branch between the upper and lower branch. -+- etc. -+ -+For this purpose, use "aumvdown" command in aufs-util.git. -diff --git Documentation/filesystems/aufs/design/03lookup.txt Documentation/filesystems/aufs/design/03lookup.txt -new file mode 100644 -index 0000000..b2b0a9c ---- /dev/null -+++ Documentation/filesystems/aufs/design/03lookup.txt -@@ -0,0 +1,120 @@ -+ -+# Copyright (C) 2005-2014 Junjiro R. Okajima -+ -+Lookup in a Branch -+---------------------------------------------------------------------- -+Since aufs has a character of sub-VFS (see Introduction), it operates -+lookup for branches as VFS does. It may be a heavy work. Generally -+speaking struct nameidata is a bigger structure and includes many -+information. But almost all lookup operation in aufs is the simplest -+case, ie. lookup only an entry directly connected to its parent. Digging -+down the directory hierarchy is unnecessary. -+ -+VFS has a function lookup_one_len() for that use, but it is not usable -+for a branch filesystem which requires struct nameidata. So aufs -+implements a simple lookup wrapper function. When a branch filesystem -+allows NULL as nameidata, it calls lookup_one_len(). Otherwise it builds -+a simplest nameidata and calls lookup_hash(). -+Here aufs applies "a principle in NFSD", ie. if the filesystem supports -+NFS-export, then it has to support NULL as a nameidata parameter for -+->create(), ->lookup() and ->d_revalidate(). So the lookup wrapper in -+aufs tests if ->s_export_op in the branch is NULL or not. -+ -+When a branch is a remote filesystem, aufs basically trusts its -+->d_revalidate(), also aufs forces the hardest revalidate tests for -+them. -+For d_revalidate, aufs implements three levels of revalidate tests. See -+"Revalidate Dentry and UDBA" in detail. -+ -+ -+Test Only the Highest One for the Directory Permission (dirperm1 option) -+---------------------------------------------------------------------- -+Let's try case study. -+- aufs has two branches, upper readwrite and lower readonly. -+ /au = /rw + /ro -+- "dirA" exists under /ro, but /rw. and its mode is 0700. -+- user invoked "chmod a+rx /au/dirA" -+- the internal copy-up is activated and "/rw/dirA" is created and its -+ permission bits are set to world readble. -+- then "/au/dirA" becomes world readable? -+ -+In this case, /ro/dirA is still 0700 since it exists in readonly branch, -+or it may be a natively readonly filesystem. If aufs respects the lower -+branch, it should not respond readdir request from other users. But user -+allowed it by chmod. Should really aufs rejects showing the entries -+under /ro/dirA? -+ -+To be honest, I don't have a best solution for this case. So aufs -+implements 'dirperm1' and 'nodirperm1' and leave it to users. -+When dirperm1 is specified, aufs checks only the highest one for the -+directory permission, and shows the entries. Otherwise, as usual, checks -+every dir existing on all branches and rejects the request. -+ -+As a side effect, dirperm1 option improves the performance of aufs -+because the number of permission check is reduced when the number of -+branch is many. -+ -+ -+Loopback Mount -+---------------------------------------------------------------------- -+Basically aufs supports any type of filesystem and block device for a -+branch (actually there are some exceptions). But it is prohibited to add -+a loopback mounted one whose backend file exists in a filesystem which is -+already added to aufs. The reason is to protect aufs from a recursive -+lookup. If it was allowed, the aufs lookup operation might re-enter a -+lookup for the loopback mounted branch in the same context, and will -+cause a deadlock. -+ -+ -+Revalidate Dentry and UDBA (User's Direct Branch Access) -+---------------------------------------------------------------------- -+Generally VFS helpers re-validate a dentry as a part of lookup. -+0. digging down the directory hierarchy. -+1. lock the parent dir by its i_mutex. -+2. lookup the final (child) entry. -+3. revalidate it. -+4. call the actual operation (create, unlink, etc.) -+5. unlock the parent dir -+ -+If the filesystem implements its ->d_revalidate() (step 3), then it is -+called. Actually aufs implements it and checks the dentry on a branch is -+still valid. -+But it is not enough. Because aufs has to release the lock for the -+parent dir on a branch at the end of ->lookup() (step 2) and -+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still -+held by VFS. -+If the file on a branch is changed directly, eg. bypassing aufs, after -+aufs released the lock, then the subsequent operation may cause -+something unpleasant result. -+ -+This situation is a result of VFS architecture, ->lookup() and -+->d_revalidate() is separated. But I never say it is wrong. It is a good -+design from VFS's point of view. It is just not suitable for sub-VFS -+character in aufs. -+ -+Aufs supports such case by three level of revalidation which is -+selectable by user. -+1. Simple Revalidate -+ Addition to the native flow in VFS's, confirm the child-parent -+ relationship on the branch just after locking the parent dir on the -+ branch in the "actual operation" (step 4). When this validation -+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still -+ checks the validation of the dentry on branches. -+2. Monitor Changes Internally by Inotify/Fsnotify -+ Addition to above, in the "actual operation" (step 4) aufs re-lookup -+ the dentry on the branch, and returns EBUSY if it finds different -+ dentry. -+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches -+ during it is in cache. When the event is notified, aufs registers a -+ function to kernel 'events' thread by schedule_work(). And the -+ function sets some special status to the cached aufs dentry and inode -+ private data. If they are not cached, then aufs has nothing to -+ do. When the same file is accessed through aufs (step 0-3) later, -+ aufs will detect the status and refresh all necessary data. -+ In this mode, aufs has to ignore the event which is fired by aufs -+ itself. -+3. No Extra Validation -+ This is the simplest test and doesn't add any additional revalidation -+ test, and skip therevalidatin in step 4. It is useful and improves -+ aufs performance when system surely hide the aufs branches from user, -+ by over-mounting something (or another method). -diff --git Documentation/filesystems/aufs/design/04branch.txt Documentation/filesystems/aufs/design/04branch.txt -new file mode 100644 -index 0000000..9f54b5d ---- /dev/null -+++ Documentation/filesystems/aufs/design/04branch.txt -@@ -0,0 +1,62 @@ -+ -+# Copyright (C) 2005-2014 Junjiro R. Okajima -+ -+Branch Manipulation -+ -+Since aufs supports dynamic branch manipulation, ie. add/remove a branch -+and changing its permission/attribute, there are a lot of works to do. -+ -+ -+Add a Branch -+---------------------------------------------------------------------- -+o Confirm the adding dir exists outside of aufs, including loopback -+ mount. -+- and other various attributes... -+o Initialize the xino file and whiteout bases if necessary. -+ See struct.txt. -+ -+o Check the owner/group/mode of the directory -+ When the owner/group/mode of the adding directory differs from the -+ existing branch, aufs issues a warning because it may impose a -+ security risk. -+ For example, when a upper writable branch has a world writable empty -+ top directory, a malicious user can create any files on the writable -+ branch directly, like copy-up and modify manually. If something like -+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper -+ writable branch, and the writable branch is world-writable, then a -+ malicious guy may create /etc/passwd on the writable branch directly -+ and the infected file will be valid in aufs. -+ I am afraid it can be a security issue, but nothing to do except -+ producing a warning. -+ -+ -+Delete a Branch -+---------------------------------------------------------------------- -+o Confirm the deleting branch is not busy -+ To be general, there is one merit to adopt "remount" interface to -+ manipulate branches. It is to discard caches. At deleting a branch, -+ aufs checks the still cached (and connected) dentries and inodes. If -+ there are any, then they are all in-use. An inode without its -+ corresponding dentry can be alive alone (for example, inotify/fsnotify case). -+ -+ For the cached one, aufs checks whether the same named entry exists on -+ other branches. -+ If the cached one is a directory, because aufs provides a merged view -+ to users, as long as one dir is left on any branch aufs can show the -+ dir to users. In this case, the branch can be removed from aufs. -+ Otherwise aufs rejects deleting the branch. -+ -+ If any file on the deleting branch is opened by aufs, then aufs -+ rejects deleting. -+ -+ -+Modify the Permission of a Branch -+---------------------------------------------------------------------- -+o Re-initialize or remove the xino file and whiteout bases if necessary. -+ See struct.txt. -+ -+o rw --> ro: Confirm the modifying branch is not busy -+ Aufs rejects the request if any of these conditions are true. -+ - a file on the branch is mmap-ed. -+ - a regular file on the branch is opened for write and there is no -+ same named entry on the upper branch. -diff --git Documentation/filesystems/aufs/design/05wbr_policy.txt Documentation/filesystems/aufs/design/05wbr_policy.txt -new file mode 100644 -index 0000000..c9b9326 ---- /dev/null -+++ Documentation/filesystems/aufs/design/05wbr_policy.txt -@@ -0,0 +1,51 @@ -+ -+# Copyright (C) 2005-2014 Junjiro R. Okajima -+ -+Policies to Select One among Multiple Writable Branches -+---------------------------------------------------------------------- -+When the number of writable branch is more than one, aufs has to decide -+the target branch for file creation or copy-up. By default, the highest -+writable branch which has the parent (or ancestor) dir of the target -+file is chosen (top-down-parent policy). -+By user's request, aufs implements some other policies to select the -+writable branch, for file creation two policies, round-robin and -+most-free-space policies. For copy-up three policies, top-down-parent, -+bottom-up-parent and bottom-up policies. -+ -+As expected, the round-robin policy selects the branch in circular. When -+you have two writable branches and creates 10 new files, 5 files will be -+created for each branch. mkdir(2) systemcall is an exception. When you -+create 10 new directories, all will be created on the same branch. -+And the most-free-space policy selects the one which has most free -+space among the writable branches. The amount of free space will be -+checked by aufs internally, and users can specify its time interval. -+ -+The policies for copy-up is more simple, -+top-down-parent is equivalent to the same named on in create policy, -+bottom-up-parent selects the writable branch where the parent dir -+exists and the nearest upper one from the copyup-source, -+bottom-up selects the nearest upper writable branch from the -+copyup-source, regardless the existence of the parent dir. -+ -+There are some rules or exceptions to apply these policies. -+- If there is a readonly branch above the policy-selected branch and -+ the parent dir is marked as opaque (a variation of whiteout), or the -+ target (creating) file is whiteout-ed on the upper readonly branch, -+ then the result of the policy is ignored and the target file will be -+ created on the nearest upper writable branch than the readonly branch. -+- If there is a writable branch above the policy-selected branch and -+ the parent dir is marked as opaque or the target file is whiteouted -+ on the branch, then the result of the policy is ignored and the target -+ file will be created on the highest one among the upper writable -+ branches who has diropq or whiteout. In case of whiteout, aufs removes -+ it as usual. -+- link(2) and rename(2) systemcalls are exceptions in every policy. -+ They try selecting the branch where the source exists as possible -+ since copyup a large file will take long time. If it can't be, -+ ie. the branch where the source exists is readonly, then they will -+ follow the copyup policy. -+- There is an exception for rename(2) when the target exists. -+ If the rename target exists, aufs compares the index of the branches -+ where the source and the target exists and selects the higher -+ one. If the selected branch is readonly, then aufs follows the -+ copyup policy. -diff --git Documentation/filesystems/aufs/design/06mmap.txt Documentation/filesystems/aufs/design/06mmap.txt -new file mode 100644 -index 0000000..9b2fe0f ---- /dev/null -+++ Documentation/filesystems/aufs/design/06mmap.txt -@@ -0,0 +1,33 @@ -+ -+# Copyright (C) 2005-2014 Junjiro R. Okajima -+ -+mmap(2) -- File Memory Mapping -+---------------------------------------------------------------------- -+In aufs, the file-mapped pages are handled by a branch fs directly, no -+interaction with aufs. It means aufs_mmap() calls the branch fs's -+->mmap(). -+This approach is simple and good, but there is one problem. -+Under /proc, several entries show the mmap-ped files by its path (with -+device and inode number), and the printed path will be the path on the -+branch fs's instead of virtual aufs's. -+This is not a problem in most cases, but some utilities lsof(1) (and its -+user) may expect the path on aufs. -+ -+To address this issue, aufs adds a new member called vm_prfile in struct -+vm_area_struct (and struct vm_region). The original vm_file points to -+the file on the branch fs in order to handle everything correctly as -+usual. The new vm_prfile points to a virtual file in aufs, and the -+show-functions in procfs refers to vm_prfile if it is set. -+Also we need to maintain several other places where touching vm_file -+such like -+- fork()/clone() copies vma and the reference count of vm_file is -+ incremented. -+- merging vma maintains the ref count too. -+ -+This is not a good approach. It just faking the printed path. But it -+leaves all behaviour around f_mapping unchanged. This is surely an -+advantage. -+Actually aufs had adopted another complicated approach which calls -+generic_file_mmap() and handles struct vm_operations_struct. In this -+approach, aufs met a hard problem and I could not solve it without -+switching the approach. -diff --git Documentation/filesystems/aufs/design/07export.txt Documentation/filesystems/aufs/design/07export.txt -new file mode 100644 -index 0000000..d0b1990 ---- /dev/null -+++ Documentation/filesystems/aufs/design/07export.txt -@@ -0,0 +1,45 @@ -+ -+# Copyright (C) 2005-2014 Junjiro R. Okajima -+ -+Export Aufs via NFS -+---------------------------------------------------------------------- -+Here is an approach. -+- like xino/xib, add a new file 'xigen' which stores aufs inode -+ generation. -+- iget_locked(): initialize aufs inode generation for a new inode, and -+ store it in xigen file. -+- destroy_inode(): increment aufs inode generation and store it in xigen -+ file. it is necessary even if it is not unlinked, because any data of -+ inode may be changed by UDBA. -+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise -+ build file handle by -+ + branch id (4 bytes) -+ + superblock generation (4 bytes) -+ + inode number (4 or 8 bytes) -+ + parent dir inode number (4 or 8 bytes) -+ + inode generation (4 bytes)) -+ + return value of exportfs_encode_fh() for the parent on a branch (4 -+ bytes) -+ + file handle for a branch (by exportfs_encode_fh()) -+- fh_to_dentry(): -+ + find the index of a branch from its id in handle, and check it is -+ still exist in aufs. -+ + 1st level: get the inode number from handle and search it in cache. -+ + 2nd level: if not found, get the parent inode number from handle and -+ search it in cache. and then open the parent dir, find the matching -+ inode number by vfs_readdir() and get its name, and call -+ lookup_one_len() for the target dentry. -+ + 3rd level: if the parent dir is not cached, call -+ exportfs_decode_fh() for a branch and get the parent on a branch, -+ build a pathname of it, convert it a pathname in aufs, call -+ path_lookup(). now aufs gets a parent dir dentry, then handle it as -+ the 2nd level. -+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount -+ for every branch, but not itself. to get this, (currently) aufs -+ searches in current->nsproxy->mnt_ns list. it may not be a good -+ idea, but I didn't get other approach. -+ + test the generation of the gotten inode. -+- every inode operation: they may get EBUSY due to UDBA. in this case, -+ convert it into ESTALE for NFSD. -+- readdir(): call lockdep_on/off() because filldir in NFSD calls -+ lookup_one_len(), vfs_getattr(), encode_fh() and others. -diff --git Documentation/filesystems/aufs/design/08shwh.txt Documentation/filesystems/aufs/design/08shwh.txt -new file mode 100644 -index 0000000..56e7c7d ---- /dev/null -+++ Documentation/filesystems/aufs/design/08shwh.txt -@@ -0,0 +1,39 @@ -+ -+# Copyright (C) 2005-2014 Junjiro R. Okajima -+ -+Show Whiteout Mode (shwh) -+---------------------------------------------------------------------- -+Generally aufs hides the name of whiteouts. But in some cases, to show -+them is very useful for users. For instance, creating a new middle layer -+(branch) by merging existing layers. -+ -+(borrowing aufs1 HOW-TO from a user, Michael Towers) -+When you have three branches, -+- Bottom: 'system', squashfs (underlying base system), read-only -+- Middle: 'mods', squashfs, read-only -+- Top: 'overlay', ram (tmpfs), read-write -+ -+The top layer is loaded at boot time and saved at shutdown, to preserve -+the changes made to the system during the session. -+When larger changes have been made, or smaller changes have accumulated, -+the size of the saved top layer data grows. At this point, it would be -+nice to be able to merge the two overlay branches ('mods' and 'overlay') -+and rewrite the 'mods' squashfs, clearing the top layer and thus -+restoring save and load speed. -+ -+This merging is simplified by the use of another aufs mount, of just the -+two overlay branches using the 'shwh' option. -+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \ -+ aufs /livesys/merge_union -+ -+A merged view of these two branches is then available at -+/livesys/merge_union, and the new feature is that the whiteouts are -+visible! -+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable -+writing to all branches. Also the default mode for all branches is 'ro'. -+It is now possible to save the combined contents of the two overlay -+branches to a new squashfs, e.g.: -+# mksquashfs /livesys/merge_union /path/to/newmods.squash -+ -+This new squashfs archive can be stored on the boot device and the -+initramfs will use it to replace the old one at the next boot. -diff --git Documentation/filesystems/aufs/design/10dynop.txt Documentation/filesystems/aufs/design/10dynop.txt -new file mode 100644 -index 0000000..e7f8f78 ---- /dev/null -+++ Documentation/filesystems/aufs/design/10dynop.txt -@@ -0,0 +1,33 @@ -+ -+# Copyright (C) 2010-2014 Junjiro R. Okajima -+ -+Dynamically customizable FS operations -+---------------------------------------------------------------------- -+Generally FS operations (struct inode_operations, struct -+address_space_operations, struct file_operations, etc.) are defined as -+"static const", but it never means that FS have only one set of -+operation. Some FS have multiple sets of them. For instance, ext2 has -+three sets, one for XIP, for NOBH, and for normal. -+Since aufs overrides and redirects these operations, sometimes aufs has -+to change its behaviour according to the branch FS type. More imporantly -+VFS acts differently if a function (member in the struct) is set or -+not. It means aufs should have several sets of operations and select one -+among them according to the branch FS definition. -+ -+In order to solve this problem and not to affect the behavour of VFS, -+aufs defines these operations dynamically. For instance, aufs defines -+aio_read function for struct file_operations, but it may not be set to -+the file_operations. When the branch FS doesn't have it, aufs doesn't -+set it to its file_operations while the function definition itself is -+still alive. So the behaviour of io_submit(2) will not change, and it -+will return an error when aio_read is not defined. -+ -+The lifetime of these dynamically generated operation object is -+maintained by aufs branch object. When the branch is removed from aufs, -+the reference counter of the object is decremented. When it reaches -+zero, the dynamically generated operation object will be freed. -+ -+This approach is designed to support AIO (io_submit), Direcit I/O and -+XIP mainly. -+Currently this approach is applied to file_operations and -+vm_operations_struct for regular files only. -diff --git Documentation/filesystems/aufs/design/99plan.txt Documentation/filesystems/aufs/design/99plan.txt -new file mode 100644 -index 0000000..3f0a8c1 ---- /dev/null -+++ Documentation/filesystems/aufs/design/99plan.txt -@@ -0,0 +1,45 @@ -+ -+# Copyright (C) 2005-2014 Junjiro R. Okajima -+ -+Plan -+ -+Restoring some features which was implemented in aufs1. -+They were dropped in aufs2 in order to make source files simpler and -+easier to be reviewed. -+ -+ -+Being Another Aufs's Readonly Branch (robr) -+---------------------------------------------------------------------- -+Aufs1 allows aufs to be another aufs's readonly branch. -+This feature was developed by a user's request. But it may not be used -+currecnly. -+ -+ -+Refresh the Opened File (refrof) -+---------------------------------------------------------------------- -+This option is implemented in aufs1 but incomplete. -+ -+When user reads from a file, he expects to get its latest filedata -+generally. If the file is removed and a new same named file is created, -+the content he gets is unchanged, ie. the unlinked filedata. -+ -+Let's try case study again. -+- aufs has two branches. -+ /au = /rw + /ro -+- "fileA" exists under /ro, but /rw. -+- user opened "/au/fileA". -+- he or someone else inserts a branch (/new) between /rw and /ro. -+ /au = /rw + /new + /ro -+- the new branch has "fileA". -+- user reads from the opened "fileA" -+- which filedata should aufs return, from /ro or /new? -+ -+Some people says it has to be "from /ro" and it is a semantics of Unix. -+The others say it should be "from /new" because the file is not removed -+and it is equivalent to the case of someone else modifies the file. -+ -+Here again I don't have a best and final answer. I got an idea to -+implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the -+Opened File) is specified (by default), aufs returns the filedata from -+/new. -+Otherwise from /new. -diff --git drivers/block/loop.c drivers/block/loop.c -index f70a230..138104b 100644 ---- drivers/block/loop.c -+++ drivers/block/loop.c -@@ -692,6 +692,24 @@ static inline int is_loop_device(struct file *file) - return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; - } - -+/* -+ * for AUFS -+ * no get/put for file. -+ */ -+struct file *loop_backing_file(struct super_block *sb) -+{ -+ struct file *ret; -+ struct loop_device *l; -+ -+ ret = NULL; -+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) { -+ l = sb->s_bdev->bd_disk->private_data; -+ ret = l->lo_backing_file; -+ } -+ return ret; -+} -+EXPORT_SYMBOL(loop_backing_file); -+ - /* loop sysfs attributes */ - - static ssize_t loop_attr_show(struct device *dev, char *page, -diff --git fs/Kconfig fs/Kconfig -index 312393f..78632ed1 100644 ---- fs/Kconfig -+++ fs/Kconfig -@@ -209,6 +209,7 @@ source "fs/ufs/Kconfig" - source "fs/exofs/Kconfig" - source "fs/f2fs/Kconfig" - source "fs/efivarfs/Kconfig" -+source "fs/aufs/Kconfig" - - endif # MISC_FILESYSTEMS - -diff --git fs/Makefile fs/Makefile -index f9cb987..a0c580f 100644 ---- fs/Makefile -+++ fs/Makefile -@@ -126,3 +126,4 @@ obj-y += exofs/ # Multiple modules - obj-$(CONFIG_CEPH_FS) += ceph/ - obj-$(CONFIG_PSTORE) += pstore/ - obj-$(CONFIG_EFIVAR_FS) += efivarfs/ -+obj-$(CONFIG_AUFS_FS) += aufs/ -diff --git fs/aufs/Kconfig fs/aufs/Kconfig -new file mode 100644 -index 0000000..32fa57a ---- /dev/null -+++ fs/aufs/Kconfig -@@ -0,0 +1,168 @@ -+config AUFS_FS -+ bool "Aufs (Advanced multi layered unification filesystem) support" -+ help -+ Aufs is a stackable unification filesystem such as Unionfs, -+ which unifies several directories and provides a merged single -+ directory. -+ In the early days, aufs was entirely re-designed and -+ re-implemented Unionfs Version 1.x series. Introducing many -+ original ideas, approaches and improvements, it becomes totally -+ different from Unionfs while keeping the basic features. -+ -+if AUFS_FS -+choice -+ prompt "Maximum number of branches" -+ default AUFS_BRANCH_MAX_127 -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_127 -+ bool "127" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_511 -+ bool "511" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_1023 -+ bool "1023" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+config AUFS_BRANCH_MAX_32767 -+ bool "32767" -+ help -+ Specifies the maximum number of branches (or member directories) -+ in a single aufs. The larger value consumes more system -+ resources and has a minor impact to performance. -+endchoice -+ -+config AUFS_SBILIST -+ bool -+ depends on AUFS_MAGIC_SYSRQ || PROC_FS -+ default y -+ help -+ Automatic configuration for internal use. -+ When aufs supports Magic SysRq or /proc, enabled automatically. -+ -+config AUFS_HNOTIFY -+ bool "Detect direct branch access (bypassing aufs)" -+ help -+ If you want to modify files on branches directly, eg. bypassing aufs, -+ and want aufs to detect the changes of them fully, then enable this -+ option and use 'udba=notify' mount option. -+ Currently there is only one available configuration, "fsnotify". -+ It will have a negative impact to the performance. -+ See detail in aufs.5. -+ -+choice -+ prompt "method" if AUFS_HNOTIFY -+ default AUFS_HFSNOTIFY -+config AUFS_HFSNOTIFY -+ bool "fsnotify" -+ select FSNOTIFY -+endchoice -+ -+config AUFS_EXPORT -+ bool "NFS-exportable aufs" -+ depends on EXPORTFS = y -+ help -+ If you want to export your mounted aufs via NFS, then enable this -+ option. There are several requirements for this configuration. -+ See detail in aufs.5. -+ -+config AUFS_INO_T_64 -+ bool -+ depends on AUFS_EXPORT -+ depends on 64BIT && !(ALPHA || S390) -+ default y -+ help -+ Automatic configuration for internal use. -+ /* typedef unsigned long/int __kernel_ino_t */ -+ /* alpha and s390x are int */ -+ -+config AUFS_RDU -+ bool "Readdir in userspace" -+ help -+ Aufs has two methods to provide a merged view for a directory, -+ by a user-space library and by kernel-space natively. The latter -+ is always enabled but sometimes large and slow. -+ If you enable this option, install the library in aufs2-util -+ package, and set some environment variables for your readdir(3), -+ then the work will be handled in user-space which generally -+ shows better performance in most cases. -+ See detail in aufs.5. -+ -+config AUFS_SHWH -+ bool "Show whiteouts" -+ help -+ If you want to make the whiteouts in aufs visible, then enable -+ this option and specify 'shwh' mount option. Although it may -+ sounds like philosophy or something, but in technically it -+ simply shows the name of whiteout with keeping its behaviour. -+ -+config AUFS_BR_RAMFS -+ bool "Ramfs (initramfs/rootfs) as an aufs branch" -+ help -+ If you want to use ramfs as an aufs branch fs, then enable this -+ option. Generally tmpfs is recommended. -+ Aufs prohibited them to be a branch fs by default, because -+ initramfs becomes unusable after switch_root or something -+ generally. If you sets initramfs as an aufs branch and boot your -+ system by switch_root, you will meet a problem easily since the -+ files in initramfs may be inaccessible. -+ Unless you are going to use ramfs as an aufs branch fs without -+ switch_root or something, leave it N. -+ -+config AUFS_BR_FUSE -+ bool "Fuse fs as an aufs branch" -+ depends on FUSE_FS -+ select AUFS_POLL -+ help -+ If you want to use fuse-based userspace filesystem as an aufs -+ branch fs, then enable this option. -+ It implements the internal poll(2) operation which is -+ implemented by fuse only (curretnly). -+ -+config AUFS_POLL -+ bool -+ help -+ Automatic configuration for internal use. -+ -+config AUFS_BR_HFSPLUS -+ bool "Hfsplus as an aufs branch" -+ depends on HFSPLUS_FS -+ default y -+ help -+ If you want to use hfsplus fs as an aufs branch fs, then enable -+ this option. This option introduces a small overhead at -+ copying-up a file on hfsplus. -+ -+config AUFS_BDEV_LOOP -+ bool -+ depends on BLK_DEV_LOOP -+ default y -+ help -+ Automatic configuration for internal use. -+ Convert =[ym] into =y. -+ -+config AUFS_DEBUG -+ bool "Debug aufs" -+ help -+ Enable this to compile aufs internal debug code. -+ It will have a negative impact to the performance. -+ -+config AUFS_MAGIC_SYSRQ -+ bool -+ depends on AUFS_DEBUG && MAGIC_SYSRQ -+ default y -+ help -+ Automatic configuration for internal use. -+ When aufs supports Magic SysRq, enabled automatically. -+endif -diff --git fs/aufs/Makefile fs/aufs/Makefile -new file mode 100644 -index 0000000..1bbac31 ---- /dev/null -+++ fs/aufs/Makefile -@@ -0,0 +1,33 @@ -+ -+include ${srctree}/${src}/magic.mk -+ -+# cf. include/linux/kernel.h -+# enable pr_debug -+ccflags-y += -DDEBUG -+# sparse requires the full pathname -+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h -+ -+obj-$(CONFIG_AUFS_FS) += aufs.o -+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \ -+ wkq.o vfsub.o dcsub.o \ -+ cpup.o whout.o wbr_policy.o \ -+ dinfo.o dentry.o \ -+ dynop.o \ -+ finfo.o file.o f_op.o \ -+ dir.o vdir.o \ -+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \ -+ mvdown.o ioctl.o -+ -+# all are boolean -+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o -+aufs-$(CONFIG_SYSFS) += sysfs.o -+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o -+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o -+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o -+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o -+aufs-$(CONFIG_AUFS_EXPORT) += export.o -+aufs-$(CONFIG_AUFS_POLL) += poll.o -+aufs-$(CONFIG_AUFS_RDU) += rdu.o -+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o -+aufs-$(CONFIG_AUFS_DEBUG) += debug.o -+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o -diff --git fs/aufs/aufs.h fs/aufs/aufs.h -new file mode 100644 -index 0000000..e013b24 ---- /dev/null -+++ fs/aufs/aufs.h -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * all header files -+ */ -+ -+#ifndef __AUFS_H__ -+#define __AUFS_H__ -+ -+#ifdef __KERNEL__ -+ -+#define AuStub(type, name, body, ...) \ -+ static inline type name(__VA_ARGS__) { body; } -+ -+#define AuStubVoid(name, ...) \ -+ AuStub(void, name, , __VA_ARGS__) -+#define AuStubInt0(name, ...) \ -+ AuStub(int, name, return 0, __VA_ARGS__) -+ -+#include "debug.h" -+ -+#include "branch.h" -+#include "cpup.h" -+#include "dcsub.h" -+#include "dbgaufs.h" -+#include "dentry.h" -+#include "dir.h" -+#include "dynop.h" -+#include "file.h" -+#include "fstype.h" -+#include "inode.h" -+#include "loop.h" -+#include "module.h" -+#include "opts.h" -+#include "rwsem.h" -+#include "spl.h" -+#include "super.h" -+#include "sysaufs.h" -+#include "vfsub.h" -+#include "whout.h" -+#include "wkq.h" -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_H__ */ -diff --git fs/aufs/branch.c fs/aufs/branch.c -new file mode 100644 -index 0000000..91cb484 ---- /dev/null -+++ fs/aufs/branch.c -@@ -0,0 +1,1207 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * branch management -+ */ -+ -+#include -+#include -+#include "aufs.h" -+ -+/* -+ * free a single branch -+ */ -+ -+/* prohibit rmdir to the root of the branch */ -+/* todo: another new flag? */ -+static void au_br_dflags_force(struct au_branch *br) -+{ -+ struct dentry *h_dentry; -+ -+ h_dentry = au_br_dentry(br); -+ spin_lock(&h_dentry->d_lock); -+ br->br_dflags = h_dentry->d_flags & DCACHE_MOUNTED; -+ h_dentry->d_flags |= DCACHE_MOUNTED; -+ spin_unlock(&h_dentry->d_lock); -+} -+ -+/* restore its d_flags */ -+static void au_br_dflags_restore(struct au_branch *br) -+{ -+ struct dentry *h_dentry; -+ -+ if (br->br_dflags) -+ return; -+ -+ h_dentry = au_br_dentry(br); -+ spin_lock(&h_dentry->d_lock); -+ h_dentry->d_flags &= ~DCACHE_MOUNTED; -+ spin_unlock(&h_dentry->d_lock); -+} -+ -+static void au_br_do_free(struct au_branch *br) -+{ -+ int i; -+ struct au_wbr *wbr; -+ struct au_dykey **key; -+ -+ au_hnotify_fin_br(br); -+ -+ if (br->br_xino.xi_file) -+ fput(br->br_xino.xi_file); -+ mutex_destroy(&br->br_xino.xi_nondir_mtx); -+ -+ AuDebugOn(atomic_read(&br->br_count)); -+ -+ wbr = br->br_wbr; -+ if (wbr) { -+ for (i = 0; i < AuBrWh_Last; i++) -+ dput(wbr->wbr_wh[i]); -+ AuDebugOn(atomic_read(&wbr->wbr_wh_running)); -+ AuRwDestroy(&wbr->wbr_wh_rwsem); -+ } -+ -+ key = br->br_dykey; -+ for (i = 0; i < AuBrDynOp; i++, key++) -+ if (*key) -+ au_dy_put(*key); -+ else -+ break; -+ -+ au_br_dflags_restore(br); -+ -+ /* recursive lock, s_umount of branch's */ -+ lockdep_off(); -+ path_put(&br->br_path); -+ lockdep_on(); -+ kfree(wbr); -+ kfree(br); -+} -+ -+/* -+ * frees all branches -+ */ -+void au_br_free(struct au_sbinfo *sbinfo) -+{ -+ aufs_bindex_t bmax; -+ struct au_branch **br; -+ -+ AuRwMustWriteLock(&sbinfo->si_rwsem); -+ -+ bmax = sbinfo->si_bend + 1; -+ br = sbinfo->si_branch; -+ while (bmax--) -+ au_br_do_free(*br++); -+} -+ -+/* -+ * find the index of a branch which is specified by @br_id. -+ */ -+int au_br_index(struct super_block *sb, aufs_bindex_t br_id) -+{ -+ aufs_bindex_t bindex, bend; -+ -+ bend = au_sbend(sb); -+ for (bindex = 0; bindex <= bend; bindex++) -+ if (au_sbr_id(sb, bindex) == br_id) -+ return bindex; -+ return -1; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * add a branch -+ */ -+ -+static int test_overlap(struct super_block *sb, struct dentry *h_adding, -+ struct dentry *h_root) -+{ -+ if (unlikely(h_adding == h_root -+ || au_test_loopback_overlap(sb, h_adding))) -+ return 1; -+ if (h_adding->d_sb != h_root->d_sb) -+ return 0; -+ return au_test_subdir(h_adding, h_root) -+ || au_test_subdir(h_root, h_adding); -+} -+ -+/* -+ * returns a newly allocated branch. @new_nbranch is a number of branches -+ * after adding a branch. -+ */ -+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch, -+ int perm) -+{ -+ struct au_branch *add_branch; -+ struct dentry *root; -+ int err; -+ -+ err = -ENOMEM; -+ root = sb->s_root; -+ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS); -+ if (unlikely(!add_branch)) -+ goto out; -+ -+ err = au_hnotify_init_br(add_branch, perm); -+ if (unlikely(err)) -+ goto out_br; -+ -+ add_branch->br_wbr = NULL; -+ if (au_br_writable(perm)) { -+ /* may be freed separately at changing the branch permission */ -+ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr), -+ GFP_NOFS); -+ if (unlikely(!add_branch->br_wbr)) -+ goto out_hnotify; -+ } -+ -+ err = au_sbr_realloc(au_sbi(sb), new_nbranch); -+ if (!err) -+ err = au_di_realloc(au_di(root), new_nbranch); -+ if (!err) -+ err = au_ii_realloc(au_ii(root->d_inode), new_nbranch); -+ if (!err) -+ return add_branch; /* success */ -+ -+ kfree(add_branch->br_wbr); -+ -+out_hnotify: -+ au_hnotify_fin_br(add_branch); -+out_br: -+ kfree(add_branch); -+out: -+ return ERR_PTR(err); -+} -+ -+/* -+ * test if the branch permission is legal or not. -+ */ -+static int test_br(struct inode *inode, int brperm, char *path) -+{ -+ int err; -+ -+ err = (au_br_writable(brperm) && IS_RDONLY(inode)); -+ if (!err) -+ goto out; -+ -+ err = -EINVAL; -+ pr_err("write permission for readonly mount or inode, %s\n", path); -+ -+out: -+ return err; -+} -+ -+/* -+ * returns: -+ * 0: success, the caller will add it -+ * plus: success, it is already unified, the caller should ignore it -+ * minus: error -+ */ -+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount) -+{ -+ int err; -+ aufs_bindex_t bend, bindex; -+ struct dentry *root; -+ struct inode *inode, *h_inode; -+ -+ root = sb->s_root; -+ bend = au_sbend(sb); -+ if (unlikely(bend >= 0 -+ && au_find_dbindex(root, add->path.dentry) >= 0)) { -+ err = 1; -+ if (!remount) { -+ err = -EINVAL; -+ pr_err("%s duplicated\n", add->pathname); -+ } -+ goto out; -+ } -+ -+ err = -ENOSPC; /* -E2BIG; */ -+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex -+ || AUFS_BRANCH_MAX - 1 <= bend)) { -+ pr_err("number of branches exceeded %s\n", add->pathname); -+ goto out; -+ } -+ -+ err = -EDOM; -+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) { -+ pr_err("bad index %d\n", add->bindex); -+ goto out; -+ } -+ -+ inode = add->path.dentry->d_inode; -+ err = -ENOENT; -+ if (unlikely(!inode->i_nlink)) { -+ pr_err("no existence %s\n", add->pathname); -+ goto out; -+ } -+ -+ err = -EINVAL; -+ if (unlikely(inode->i_sb == sb)) { -+ pr_err("%s must be outside\n", add->pathname); -+ goto out; -+ } -+ -+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) { -+ pr_err("unsupported filesystem, %s (%s)\n", -+ add->pathname, au_sbtype(inode->i_sb)); -+ goto out; -+ } -+ -+ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname); -+ if (unlikely(err)) -+ goto out; -+ -+ if (bend < 0) -+ return 0; /* success */ -+ -+ err = -EINVAL; -+ for (bindex = 0; bindex <= bend; bindex++) -+ if (unlikely(test_overlap(sb, add->path.dentry, -+ au_h_dptr(root, bindex)))) { -+ pr_err("%s is overlapped\n", add->pathname); -+ goto out; -+ } -+ -+ err = 0; -+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) { -+ h_inode = au_h_dptr(root, 0)->d_inode; -+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO) -+ || !uid_eq(h_inode->i_uid, inode->i_uid) -+ || !gid_eq(h_inode->i_gid, inode->i_gid)) -+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n", -+ add->pathname, -+ i_uid_read(inode), i_gid_read(inode), -+ (inode->i_mode & S_IALLUGO), -+ i_uid_read(h_inode), i_gid_read(h_inode), -+ (h_inode->i_mode & S_IALLUGO)); -+ } -+ -+out: -+ return err; -+} -+ -+/* -+ * initialize or clean the whiteouts for an adding branch -+ */ -+static int au_br_init_wh(struct super_block *sb, struct au_branch *br, -+ int new_perm) -+{ -+ int err, old_perm; -+ aufs_bindex_t bindex; -+ struct mutex *h_mtx; -+ struct au_wbr *wbr; -+ struct au_hinode *hdir; -+ -+ err = vfsub_mnt_want_write(au_br_mnt(br)); -+ if (unlikely(err)) -+ goto out; -+ -+ wbr = br->br_wbr; -+ old_perm = br->br_perm; -+ br->br_perm = new_perm; -+ hdir = NULL; -+ h_mtx = NULL; -+ bindex = au_br_index(sb, br->br_id); -+ if (0 <= bindex) { -+ hdir = au_hi(sb->s_root->d_inode, bindex); -+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT); -+ } else { -+ h_mtx = &au_br_dentry(br)->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT); -+ } -+ if (!wbr) -+ err = au_wh_init(br, sb); -+ else { -+ wbr_wh_write_lock(wbr); -+ err = au_wh_init(br, sb); -+ wbr_wh_write_unlock(wbr); -+ } -+ if (hdir) -+ au_hn_imtx_unlock(hdir); -+ else -+ mutex_unlock(h_mtx); -+ vfsub_mnt_drop_write(au_br_mnt(br)); -+ br->br_perm = old_perm; -+ -+ if (!err && wbr && !au_br_writable(new_perm)) { -+ kfree(wbr); -+ br->br_wbr = NULL; -+ } -+ -+out: -+ return err; -+} -+ -+static int au_wbr_init(struct au_branch *br, struct super_block *sb, -+ int perm) -+{ -+ int err; -+ struct kstatfs kst; -+ struct au_wbr *wbr; -+ -+ wbr = br->br_wbr; -+ au_rw_init(&wbr->wbr_wh_rwsem); -+ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh)); -+ atomic_set(&wbr->wbr_wh_running, 0); -+ wbr->wbr_bytes = 0; -+ -+ /* -+ * a limit for rmdir/rename a dir -+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h -+ */ -+ err = vfs_statfs(&br->br_path, &kst); -+ if (unlikely(err)) -+ goto out; -+ err = -EINVAL; -+ if (kst.f_namelen >= NAME_MAX) -+ err = au_br_init_wh(sb, br, perm); -+ else -+ pr_err("%pd(%s), unsupported namelen %ld\n", -+ au_br_dentry(br), -+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen); -+ -+out: -+ return err; -+} -+ -+/* intialize a new branch */ -+static int au_br_init(struct au_branch *br, struct super_block *sb, -+ struct au_opt_add *add) -+{ -+ int err; -+ -+ err = 0; -+ memset(&br->br_xino, 0, sizeof(br->br_xino)); -+ mutex_init(&br->br_xino.xi_nondir_mtx); -+ br->br_perm = add->perm; -+ BUILD_BUG_ON(sizeof(br->br_dflags) -+ != sizeof(br->br_path.dentry->d_flags)); -+ br->br_dflags = DCACHE_MOUNTED; -+ br->br_path = add->path; /* set first, path_get() later */ -+ spin_lock_init(&br->br_dykey_lock); -+ memset(br->br_dykey, 0, sizeof(br->br_dykey)); -+ atomic_set(&br->br_count, 0); -+ atomic_set(&br->br_xino_running, 0); -+ br->br_id = au_new_br_id(sb); -+ AuDebugOn(br->br_id < 0); -+ -+ if (au_br_writable(add->perm)) { -+ err = au_wbr_init(br, sb, add->perm); -+ if (unlikely(err)) -+ goto out_err; -+ } -+ -+ if (au_opt_test(au_mntflags(sb), XINO)) { -+ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino, -+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1); -+ if (unlikely(err)) { -+ AuDebugOn(br->br_xino.xi_file); -+ goto out_err; -+ } -+ } -+ -+ sysaufs_br_init(br); -+ path_get(&br->br_path); -+ goto out; /* success */ -+ -+out_err: -+ memset(&br->br_path, 0, sizeof(br->br_path)); -+out: -+ return err; -+} -+ -+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex, -+ struct au_branch *br, aufs_bindex_t bend, -+ aufs_bindex_t amount) -+{ -+ struct au_branch **brp; -+ -+ AuRwMustWriteLock(&sbinfo->si_rwsem); -+ -+ brp = sbinfo->si_branch + bindex; -+ memmove(brp + 1, brp, sizeof(*brp) * amount); -+ *brp = br; -+ sbinfo->si_bend++; -+ if (unlikely(bend < 0)) -+ sbinfo->si_bend = 0; -+} -+ -+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex, -+ aufs_bindex_t bend, aufs_bindex_t amount) -+{ -+ struct au_hdentry *hdp; -+ -+ AuRwMustWriteLock(&dinfo->di_rwsem); -+ -+ hdp = dinfo->di_hdentry + bindex; -+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount); -+ au_h_dentry_init(hdp); -+ dinfo->di_bend++; -+ if (unlikely(bend < 0)) -+ dinfo->di_bstart = 0; -+} -+ -+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex, -+ aufs_bindex_t bend, aufs_bindex_t amount) -+{ -+ struct au_hinode *hip; -+ -+ AuRwMustWriteLock(&iinfo->ii_rwsem); -+ -+ hip = iinfo->ii_hinode + bindex; -+ memmove(hip + 1, hip, sizeof(*hip) * amount); -+ hip->hi_inode = NULL; -+ au_hn_init(hip); -+ iinfo->ii_bend++; -+ if (unlikely(bend < 0)) -+ iinfo->ii_bstart = 0; -+} -+ -+static void au_br_do_add(struct super_block *sb, struct au_branch *br, -+ aufs_bindex_t bindex) -+{ -+ struct dentry *root, *h_dentry; -+ struct inode *root_inode; -+ aufs_bindex_t bend, amount; -+ -+ au_br_dflags_force(br); -+ -+ root = sb->s_root; -+ root_inode = root->d_inode; -+ bend = au_sbend(sb); -+ amount = bend + 1 - bindex; -+ h_dentry = au_br_dentry(br); -+ au_sbilist_lock(); -+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount); -+ au_br_do_add_hdp(au_di(root), bindex, bend, amount); -+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount); -+ au_set_h_dptr(root, bindex, dget(h_dentry)); -+ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode), -+ /*flags*/0); -+ au_sbilist_unlock(); -+} -+ -+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount) -+{ -+ int err; -+ aufs_bindex_t bend, add_bindex; -+ struct dentry *root, *h_dentry; -+ struct inode *root_inode; -+ struct au_branch *add_branch; -+ -+ root = sb->s_root; -+ root_inode = root->d_inode; -+ IMustLock(root_inode); -+ err = test_add(sb, add, remount); -+ if (unlikely(err < 0)) -+ goto out; -+ if (err) { -+ err = 0; -+ goto out; /* success */ -+ } -+ -+ bend = au_sbend(sb); -+ add_branch = au_br_alloc(sb, bend + 2, add->perm); -+ err = PTR_ERR(add_branch); -+ if (IS_ERR(add_branch)) -+ goto out; -+ -+ err = au_br_init(add_branch, sb, add); -+ if (unlikely(err)) { -+ au_br_do_free(add_branch); -+ goto out; -+ } -+ -+ add_bindex = add->bindex; -+ if (!remount) -+ au_br_do_add(sb, add_branch, add_bindex); -+ else { -+ sysaufs_brs_del(sb, add_bindex); -+ au_br_do_add(sb, add_branch, add_bindex); -+ sysaufs_brs_add(sb, add_bindex); -+ } -+ -+ h_dentry = add->path.dentry; -+ if (!add_bindex) { -+ au_cpup_attr_all(root_inode, /*force*/1); -+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes; -+ } else -+ au_add_nlink(root_inode, h_dentry->d_inode); -+ -+ /* -+ * this test/set prevents aufs from handling unnecesary notify events -+ * of xino files, in case of re-adding a writable branch which was -+ * once detached from aufs. -+ */ -+ if (au_xino_brid(sb) < 0 -+ && au_br_writable(add_branch->br_perm) -+ && !au_test_fs_bad_xino(h_dentry->d_sb) -+ && add_branch->br_xino.xi_file -+ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry) -+ au_xino_brid_set(sb, add_branch->br_id); -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * delete a branch -+ */ -+ -+/* to show the line number, do not make it inlined function */ -+#define AuVerbose(do_info, fmt, ...) do { \ -+ if (do_info) \ -+ pr_info(fmt, ##__VA_ARGS__); \ -+} while (0) -+ -+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart, -+ aufs_bindex_t bend) -+{ -+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend; -+} -+ -+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart, -+ aufs_bindex_t bend) -+{ -+ return au_test_ibusy(dentry->d_inode, bstart, bend); -+} -+ -+/* -+ * test if the branch is deletable or not. -+ */ -+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex, -+ unsigned int sigen, const unsigned int verbose) -+{ -+ int err, i, j, ndentry; -+ aufs_bindex_t bstart, bend; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry *d; -+ -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_dcsub_pages(&dpages, root, NULL, NULL); -+ if (unlikely(err)) -+ goto out_dpages; -+ -+ for (i = 0; !err && i < dpages.ndpage; i++) { -+ dpage = dpages.dpages + i; -+ ndentry = dpage->ndentry; -+ for (j = 0; !err && j < ndentry; j++) { -+ d = dpage->dentries[j]; -+ AuDebugOn(!d_count(d)); -+ if (!au_digen_test(d, sigen)) { -+ di_read_lock_child(d, AuLock_IR); -+ if (unlikely(au_dbrange_test(d))) { -+ di_read_unlock(d, AuLock_IR); -+ continue; -+ } -+ } else { -+ di_write_lock_child(d); -+ if (unlikely(au_dbrange_test(d))) { -+ di_write_unlock(d); -+ continue; -+ } -+ err = au_reval_dpath(d, sigen); -+ if (!err) -+ di_downgrade_lock(d, AuLock_IR); -+ else { -+ di_write_unlock(d); -+ break; -+ } -+ } -+ -+ /* AuDbgDentry(d); */ -+ bstart = au_dbstart(d); -+ bend = au_dbend(d); -+ if (bstart <= bindex -+ && bindex <= bend -+ && au_h_dptr(d, bindex) -+ && au_test_dbusy(d, bstart, bend)) { -+ err = -EBUSY; -+ AuVerbose(verbose, "busy %pd\n", d); -+ AuDbgDentry(d); -+ } -+ di_read_unlock(d, AuLock_IR); -+ } -+ } -+ -+out_dpages: -+ au_dpages_free(&dpages); -+out: -+ return err; -+} -+ -+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex, -+ unsigned int sigen, const unsigned int verbose) -+{ -+ int err; -+ unsigned long long max, ull; -+ struct inode *i, **array; -+ aufs_bindex_t bstart, bend; -+ -+ array = au_iarray_alloc(sb, &max); -+ err = PTR_ERR(array); -+ if (IS_ERR(array)) -+ goto out; -+ -+ err = 0; -+ AuDbg("b%d\n", bindex); -+ for (ull = 0; !err && ull < max; ull++) { -+ i = array[ull]; -+ if (i->i_ino == AUFS_ROOT_INO) -+ continue; -+ -+ /* AuDbgInode(i); */ -+ if (au_iigen(i, NULL) == sigen) -+ ii_read_lock_child(i); -+ else { -+ ii_write_lock_child(i); -+ err = au_refresh_hinode_self(i); -+ au_iigen_dec(i); -+ if (!err) -+ ii_downgrade_lock(i); -+ else { -+ ii_write_unlock(i); -+ break; -+ } -+ } -+ -+ bstart = au_ibstart(i); -+ bend = au_ibend(i); -+ if (bstart <= bindex -+ && bindex <= bend -+ && au_h_iptr(i, bindex) -+ && au_test_ibusy(i, bstart, bend)) { -+ err = -EBUSY; -+ AuVerbose(verbose, "busy i%lu\n", i->i_ino); -+ AuDbgInode(i); -+ } -+ ii_read_unlock(i); -+ } -+ au_iarray_free(array, max); -+ -+out: -+ return err; -+} -+ -+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex, -+ const unsigned int verbose) -+{ -+ int err; -+ unsigned int sigen; -+ -+ sigen = au_sigen(root->d_sb); -+ DiMustNoWaiters(root); -+ IiMustNoWaiters(root->d_inode); -+ di_write_unlock(root); -+ err = test_dentry_busy(root, bindex, sigen, verbose); -+ if (!err) -+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose); -+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */ -+ -+ return err; -+} -+ -+static void au_br_do_del_brp(struct au_sbinfo *sbinfo, -+ const aufs_bindex_t bindex, -+ const aufs_bindex_t bend) -+{ -+ struct au_branch **brp, **p; -+ -+ AuRwMustWriteLock(&sbinfo->si_rwsem); -+ -+ brp = sbinfo->si_branch + bindex; -+ if (bindex < bend) -+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex)); -+ sbinfo->si_branch[0 + bend] = NULL; -+ sbinfo->si_bend--; -+ -+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST); -+ if (p) -+ sbinfo->si_branch = p; -+ /* harmless error */ -+} -+ -+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex, -+ const aufs_bindex_t bend) -+{ -+ struct au_hdentry *hdp, *p; -+ -+ AuRwMustWriteLock(&dinfo->di_rwsem); -+ -+ hdp = dinfo->di_hdentry; -+ if (bindex < bend) -+ memmove(hdp + bindex, hdp + bindex + 1, -+ sizeof(*hdp) * (bend - bindex)); -+ hdp[0 + bend].hd_dentry = NULL; -+ dinfo->di_bend--; -+ -+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST); -+ if (p) -+ dinfo->di_hdentry = p; -+ /* harmless error */ -+} -+ -+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex, -+ const aufs_bindex_t bend) -+{ -+ struct au_hinode *hip, *p; -+ -+ AuRwMustWriteLock(&iinfo->ii_rwsem); -+ -+ hip = iinfo->ii_hinode + bindex; -+ if (bindex < bend) -+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex)); -+ iinfo->ii_hinode[0 + bend].hi_inode = NULL; -+ au_hn_init(iinfo->ii_hinode + bend); -+ iinfo->ii_bend--; -+ -+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST); -+ if (p) -+ iinfo->ii_hinode = p; -+ /* harmless error */ -+} -+ -+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex, -+ struct au_branch *br) -+{ -+ aufs_bindex_t bend; -+ struct au_sbinfo *sbinfo; -+ struct dentry *root, *h_root; -+ struct inode *inode, *h_inode; -+ struct au_hinode *hinode; -+ -+ SiMustWriteLock(sb); -+ -+ root = sb->s_root; -+ inode = root->d_inode; -+ sbinfo = au_sbi(sb); -+ bend = sbinfo->si_bend; -+ -+ h_root = au_h_dptr(root, bindex); -+ hinode = au_hi(inode, bindex); -+ h_inode = au_igrab(hinode->hi_inode); -+ au_hiput(hinode); -+ -+ au_sbilist_lock(); -+ au_br_do_del_brp(sbinfo, bindex, bend); -+ au_br_do_del_hdp(au_di(root), bindex, bend); -+ au_br_do_del_hip(au_ii(inode), bindex, bend); -+ au_sbilist_unlock(); -+ -+ dput(h_root); -+ iput(h_inode); -+ au_br_do_free(br); -+} -+ -+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount) -+{ -+ int err, rerr, i; -+ unsigned int mnt_flags; -+ aufs_bindex_t bindex, bend, br_id; -+ unsigned char do_wh, verbose; -+ struct au_branch *br; -+ struct au_wbr *wbr; -+ -+ err = 0; -+ bindex = au_find_dbindex(sb->s_root, del->h_path.dentry); -+ if (bindex < 0) { -+ if (remount) -+ goto out; /* success */ -+ err = -ENOENT; -+ pr_err("%s no such branch\n", del->pathname); -+ goto out; -+ } -+ AuDbg("bindex b%d\n", bindex); -+ -+ err = -EBUSY; -+ mnt_flags = au_mntflags(sb); -+ verbose = !!au_opt_test(mnt_flags, VERBOSE); -+ bend = au_sbend(sb); -+ if (unlikely(!bend)) { -+ AuVerbose(verbose, "no more branches left\n"); -+ goto out; -+ } -+ br = au_sbr(sb, bindex); -+ AuDebugOn(!path_equal(&br->br_path, &del->h_path)); -+ i = atomic_read(&br->br_count); -+ if (unlikely(i)) { -+ AuVerbose(verbose, "%d file(s) opened\n", i); -+ goto out; -+ } -+ -+ wbr = br->br_wbr; -+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph); -+ if (do_wh) { -+ /* instead of WbrWhMustWriteLock(wbr) */ -+ SiMustWriteLock(sb); -+ for (i = 0; i < AuBrWh_Last; i++) { -+ dput(wbr->wbr_wh[i]); -+ wbr->wbr_wh[i] = NULL; -+ } -+ } -+ -+ err = test_children_busy(sb->s_root, bindex, verbose); -+ if (unlikely(err)) { -+ if (do_wh) -+ goto out_wh; -+ goto out; -+ } -+ -+ err = 0; -+ br_id = br->br_id; -+ if (!remount) -+ au_br_do_del(sb, bindex, br); -+ else { -+ sysaufs_brs_del(sb, bindex); -+ au_br_do_del(sb, bindex, br); -+ sysaufs_brs_add(sb, bindex); -+ } -+ -+ if (!bindex) { -+ au_cpup_attr_all(sb->s_root->d_inode, /*force*/1); -+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes; -+ } else -+ au_sub_nlink(sb->s_root->d_inode, del->h_path.dentry->d_inode); -+ if (au_opt_test(mnt_flags, PLINK)) -+ au_plink_half_refresh(sb, br_id); -+ -+ if (au_xino_brid(sb) == br_id) -+ au_xino_brid_set(sb, -1); -+ goto out; /* success */ -+ -+out_wh: -+ /* revert */ -+ rerr = au_br_init_wh(sb, br, br->br_perm); -+ if (rerr) -+ pr_warn("failed re-creating base whiteout, %s. (%d)\n", -+ del->pathname, rerr); -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg) -+{ -+ int err; -+ aufs_bindex_t bstart, bend; -+ struct aufs_ibusy ibusy; -+ struct inode *inode, *h_inode; -+ -+ err = -EPERM; -+ if (unlikely(!capable(CAP_SYS_ADMIN))) -+ goto out; -+ -+ err = copy_from_user(&ibusy, arg, sizeof(ibusy)); -+ if (!err) -+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino)); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ goto out; -+ } -+ -+ err = -EINVAL; -+ si_read_lock(sb, AuLock_FLUSH); -+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb))) -+ goto out_unlock; -+ -+ err = 0; -+ ibusy.h_ino = 0; /* invalid */ -+ inode = ilookup(sb, ibusy.ino); -+ if (!inode -+ || inode->i_ino == AUFS_ROOT_INO -+ || is_bad_inode(inode)) -+ goto out_unlock; -+ -+ ii_read_lock_child(inode); -+ bstart = au_ibstart(inode); -+ bend = au_ibend(inode); -+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) { -+ h_inode = au_h_iptr(inode, ibusy.bindex); -+ if (h_inode && au_test_ibusy(inode, bstart, bend)) -+ ibusy.h_ino = h_inode->i_ino; -+ } -+ ii_read_unlock(inode); -+ iput(inode); -+ -+out_unlock: -+ si_read_unlock(sb); -+ if (!err) { -+ err = __put_user(ibusy.h_ino, &arg->h_ino); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ } -+ } -+out: -+ return err; -+} -+ -+long au_ibusy_ioctl(struct file *file, unsigned long arg) -+{ -+ return au_ibusy(file->f_dentry->d_sb, (void __user *)arg); -+} -+ -+#ifdef CONFIG_COMPAT -+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg) -+{ -+ return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg)); -+} -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * change a branch permission -+ */ -+ -+static void au_warn_ima(void) -+{ -+#ifdef CONFIG_IMA -+ /* since it doesn't support mark_files_ro() */ -+ AuWarn1("RW -> RO makes IMA to produce wrong message\n"); -+#endif -+} -+ -+static int do_need_sigen_inc(int a, int b) -+{ -+ return au_br_whable(a) && !au_br_whable(b); -+} -+ -+static int need_sigen_inc(int old, int new) -+{ -+ return do_need_sigen_inc(old, new) -+ || do_need_sigen_inc(new, old); -+} -+ -+static unsigned long long au_farray_cb(void *a, -+ unsigned long long max __maybe_unused, -+ void *arg) -+{ -+ unsigned long long n; -+ struct file **p, *f; -+ struct au_sphlhead *files; -+ struct au_finfo *finfo; -+ struct super_block *sb = arg; -+ -+ n = 0; -+ p = a; -+ files = &au_sbi(sb)->si_files; -+ spin_lock(&files->spin); -+ hlist_for_each_entry(finfo, &files->head, fi_hlist) { -+ f = finfo->fi_file; -+ if (file_count(f) -+ && !special_file(file_inode(f)->i_mode)) { -+ get_file(f); -+ *p++ = f; -+ n++; -+ AuDebugOn(n > max); -+ } -+ } -+ spin_unlock(&files->spin); -+ -+ return n; -+} -+ -+static struct file **au_farray_alloc(struct super_block *sb, -+ unsigned long long *max) -+{ -+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles); -+ return au_array_alloc(max, au_farray_cb, sb); -+} -+ -+static void au_farray_free(struct file **a, unsigned long long max) -+{ -+ unsigned long long ull; -+ -+ for (ull = 0; ull < max; ull++) -+ if (a[ull]) -+ fput(a[ull]); -+ au_array_free(a); -+} -+ -+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ int err, do_warn; -+ unsigned int mnt_flags; -+ unsigned long long ull, max; -+ aufs_bindex_t br_id; -+ unsigned char verbose, writer; -+ struct file *file, *hf, **array; -+ struct inode *inode; -+ struct au_hfile *hfile; -+ -+ mnt_flags = au_mntflags(sb); -+ verbose = !!au_opt_test(mnt_flags, VERBOSE); -+ -+ array = au_farray_alloc(sb, &max); -+ err = PTR_ERR(array); -+ if (IS_ERR(array)) -+ goto out; -+ -+ do_warn = 0; -+ br_id = au_sbr_id(sb, bindex); -+ for (ull = 0; ull < max; ull++) { -+ file = array[ull]; -+ -+ /* AuDbg("%pD\n", file); */ -+ fi_read_lock(file); -+ if (unlikely(au_test_mmapped(file))) { -+ err = -EBUSY; -+ AuVerbose(verbose, "mmapped %pD\n", file); -+ AuDbgFile(file); -+ FiMustNoWaiters(file); -+ fi_read_unlock(file); -+ goto out_array; -+ } -+ -+ inode = file_inode(file); -+ hfile = &au_fi(file)->fi_htop; -+ hf = hfile->hf_file; -+ if (!S_ISREG(inode->i_mode) -+ || !(file->f_mode & FMODE_WRITE) -+ || hfile->hf_br->br_id != br_id -+ || !(hf->f_mode & FMODE_WRITE)) -+ array[ull] = NULL; -+ else { -+ do_warn = 1; -+ get_file(file); -+ } -+ -+ FiMustNoWaiters(file); -+ fi_read_unlock(file); -+ fput(file); -+ } -+ -+ err = 0; -+ if (do_warn) -+ au_warn_ima(); -+ -+ for (ull = 0; ull < max; ull++) { -+ file = array[ull]; -+ if (!file) -+ continue; -+ -+ /* todo: already flushed? */ -+ /* -+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its -+ * approach which resets f_mode and calls mnt_drop_write() and -+ * file_release_write() for each file, because the branch -+ * attribute in aufs world is totally different from the native -+ * fs rw/ro mode. -+ */ -+ /* fi_read_lock(file); */ -+ hfile = &au_fi(file)->fi_htop; -+ hf = hfile->hf_file; -+ /* fi_read_unlock(file); */ -+ spin_lock(&hf->f_lock); -+ writer = !!(hf->f_mode & FMODE_WRITER); -+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER); -+ spin_unlock(&hf->f_lock); -+ if (writer) { -+ put_write_access(file_inode(hf)); -+ __mnt_drop_write(hf->f_path.mnt); -+ } -+ } -+ -+out_array: -+ au_farray_free(array, max); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount, -+ int *do_refresh) -+{ -+ int err, rerr; -+ aufs_bindex_t bindex; -+ struct dentry *root; -+ struct au_branch *br; -+ -+ root = sb->s_root; -+ bindex = au_find_dbindex(root, mod->h_root); -+ if (bindex < 0) { -+ if (remount) -+ return 0; /* success */ -+ err = -ENOENT; -+ pr_err("%s no such branch\n", mod->path); -+ goto out; -+ } -+ AuDbg("bindex b%d\n", bindex); -+ -+ err = test_br(mod->h_root->d_inode, mod->perm, mod->path); -+ if (unlikely(err)) -+ goto out; -+ -+ br = au_sbr(sb, bindex); -+ AuDebugOn(mod->h_root != au_br_dentry(br)); -+ if (br->br_perm == mod->perm) -+ return 0; /* success */ -+ -+ if (au_br_writable(br->br_perm)) { -+ /* remove whiteout base */ -+ err = au_br_init_wh(sb, br, mod->perm); -+ if (unlikely(err)) -+ goto out; -+ -+ if (!au_br_writable(mod->perm)) { -+ /* rw --> ro, file might be mmapped */ -+ DiMustNoWaiters(root); -+ IiMustNoWaiters(root->d_inode); -+ di_write_unlock(root); -+ err = au_br_mod_files_ro(sb, bindex); -+ /* aufs_write_lock() calls ..._child() */ -+ di_write_lock_child(root); -+ -+ if (unlikely(err)) { -+ rerr = -ENOMEM; -+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), -+ GFP_NOFS); -+ if (br->br_wbr) -+ rerr = au_wbr_init(br, sb, br->br_perm); -+ if (unlikely(rerr)) { -+ AuIOErr("nested error %d (%d)\n", -+ rerr, err); -+ br->br_perm = mod->perm; -+ } -+ } -+ } -+ } else if (au_br_writable(mod->perm)) { -+ /* ro --> rw */ -+ err = -ENOMEM; -+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS); -+ if (br->br_wbr) { -+ err = au_wbr_init(br, sb, mod->perm); -+ if (unlikely(err)) { -+ kfree(br->br_wbr); -+ br->br_wbr = NULL; -+ } -+ } -+ } -+ -+ if (!err) { -+ if ((br->br_perm & AuBrAttr_UNPIN) -+ && !(mod->perm & AuBrAttr_UNPIN)) -+ au_br_dflags_force(br); -+ else if (!(br->br_perm & AuBrAttr_UNPIN) -+ && (mod->perm & AuBrAttr_UNPIN)) -+ au_br_dflags_restore(br); -+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm); -+ br->br_perm = mod->perm; -+ } -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -diff --git fs/aufs/branch.h fs/aufs/branch.h -new file mode 100644 -index 0000000..e0ffde9 ---- /dev/null -+++ fs/aufs/branch.h -@@ -0,0 +1,275 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * branch filesystems and xino for them -+ */ -+ -+#ifndef __AUFS_BRANCH_H__ -+#define __AUFS_BRANCH_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include "dynop.h" -+#include "rwsem.h" -+#include "super.h" -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* a xino file */ -+struct au_xino_file { -+ struct file *xi_file; -+ struct mutex xi_nondir_mtx; -+ -+ /* todo: make xino files an array to support huge inode number */ -+ -+#ifdef CONFIG_DEBUG_FS -+ struct dentry *xi_dbgaufs; -+#endif -+}; -+ -+/* members for writable branch only */ -+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last}; -+struct au_wbr { -+ struct au_rwsem wbr_wh_rwsem; -+ struct dentry *wbr_wh[AuBrWh_Last]; -+ atomic_t wbr_wh_running; -+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */ -+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */ -+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */ -+ -+ /* mfs mode */ -+ unsigned long long wbr_bytes; -+}; -+ -+/* ext2 has 3 types of operations at least, ext3 has 4 */ -+#define AuBrDynOp (AuDyLast * 4) -+ -+#ifdef CONFIG_AUFS_HFSNOTIFY -+/* support for asynchronous destruction */ -+struct au_br_hfsnotify { -+ struct fsnotify_group *hfsn_group; -+}; -+#endif -+ -+/* sysfs entries */ -+struct au_brsysfs { -+ char name[16]; -+ struct attribute attr; -+}; -+ -+enum { -+ AuBrSysfs_BR, -+ AuBrSysfs_BRID, -+ AuBrSysfs_Last -+}; -+ -+/* protected by superblock rwsem */ -+struct au_branch { -+ struct au_xino_file br_xino; -+ -+ aufs_bindex_t br_id; -+ -+ int br_perm; -+ unsigned int br_dflags; -+ struct path br_path; -+ spinlock_t br_dykey_lock; -+ struct au_dykey *br_dykey[AuBrDynOp]; -+ atomic_t br_count; -+ -+ struct au_wbr *br_wbr; -+ -+ /* xino truncation */ -+ atomic_t br_xino_running; -+ -+#ifdef CONFIG_AUFS_HFSNOTIFY -+ struct au_br_hfsnotify *br_hfsn; -+#endif -+ -+#ifdef CONFIG_SYSFS -+ /* entries under sysfs per mount-point */ -+ struct au_brsysfs br_sysfs[AuBrSysfs_Last]; -+#endif -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct vfsmount *au_br_mnt(struct au_branch *br) -+{ -+ return br->br_path.mnt; -+} -+ -+static inline struct dentry *au_br_dentry(struct au_branch *br) -+{ -+ return br->br_path.dentry; -+} -+ -+static inline struct super_block *au_br_sb(struct au_branch *br) -+{ -+ return au_br_mnt(br)->mnt_sb; -+} -+ -+/* branch permissions and attributes */ -+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */ -+#define AuBrPerm_RO (1 << 1) /* readonly */ -+#define AuBrPerm_RR (1 << 2) /* natively readonly */ -+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR) -+ -+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */ -+#define AuBrAttr_COO_ALL (1 << 4) -+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL) -+ -+#define AuBrAttr_UNPIN (1 << 5) /* rename-able top dir of -+ branch */ -+ -+#define AuBrRAttr_WH (1 << 6) /* whiteout-able */ -+#define AuBrRAttr_Mask AuBrRAttr_WH -+ -+#define AuBrWAttr_NoLinkWH (1 << 7) /* un-hardlinkable whiteouts */ -+#define AuBrWAttr_MOO (1 << 8) /* move-up on open */ -+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO) -+ -+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO) -+ -+/* the longest combination */ -+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \ -+ "+" AUFS_BRATTR_COO_REG \ -+ "+" AUFS_BRATTR_UNPIN \ -+ "+" AUFS_BRWATTR_NLWH) -+ -+typedef struct { -+ char a[AuBrPermStrSz]; -+} au_br_perm_str_t; -+ -+static inline int au_br_writable(int brperm) -+{ -+ return brperm & AuBrPerm_RW; -+} -+ -+static inline int au_br_whable(int brperm) -+{ -+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH); -+} -+ -+static inline int au_br_wh_linkable(int brperm) -+{ -+ return !(brperm & AuBrWAttr_NoLinkWH); -+} -+ -+static inline int au_br_rdonly(struct au_branch *br) -+{ -+ return ((au_br_sb(br)->s_flags & MS_RDONLY) -+ || !au_br_writable(br->br_perm)) -+ ? -EROFS : 0; -+} -+ -+static inline int au_br_cmoo(int brperm) -+{ -+ return brperm & AuBrAttr_CMOO_Mask; -+} -+ -+static inline int au_br_hnotifyable(int brperm __maybe_unused) -+{ -+#ifdef CONFIG_AUFS_HNOTIFY -+ return !(brperm & AuBrPerm_RR); -+#else -+ return 0; -+#endif -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* branch.c */ -+struct au_sbinfo; -+void au_br_free(struct au_sbinfo *sinfo); -+int au_br_index(struct super_block *sb, aufs_bindex_t br_id); -+struct au_opt_add; -+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount); -+struct au_opt_del; -+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount); -+long au_ibusy_ioctl(struct file *file, unsigned long arg); -+#ifdef CONFIG_COMPAT -+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg); -+#endif -+struct au_opt_mod; -+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount, -+ int *do_refresh); -+ -+/* xino.c */ -+static const loff_t au_loff_max = LLONG_MAX; -+ -+int au_xib_trunc(struct super_block *sb); -+ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size, -+ loff_t *pos); -+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size, -+ loff_t *pos); -+struct file *au_xino_create2(struct file *base_file, struct file *copy_src); -+struct file *au_xino_create(struct super_block *sb, char *fname, int silent); -+ino_t au_xino_new_ino(struct super_block *sb); -+void au_xino_delete_inode(struct inode *inode, const int unlinked); -+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ ino_t ino); -+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ ino_t *ino); -+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino, -+ struct file *base_file, int do_test); -+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex); -+ -+struct au_opt_xino; -+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount); -+void au_xino_clr(struct super_block *sb); -+struct file *au_xino_def(struct super_block *sb); -+int au_xino_path(struct seq_file *seq, struct file *file); -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* Superblock to branch */ -+static inline -+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_sbr(sb, bindex)->br_id; -+} -+ -+static inline -+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_br_mnt(au_sbr(sb, bindex)); -+} -+ -+static inline -+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_br_sb(au_sbr(sb, bindex)); -+} -+ -+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ atomic_dec(&au_sbr(sb, bindex)->br_count); -+} -+ -+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_sbr(sb, bindex)->br_perm; -+} -+ -+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ return au_br_whable(au_sbr_perm(sb, bindex)); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * wbr_wh_read_lock, wbr_wh_write_lock -+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock -+ */ -+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem); -+ -+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem) -+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem) -+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem) -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_BRANCH_H__ */ -diff --git fs/aufs/cpup.c fs/aufs/cpup.c -new file mode 100644 -index 0000000..4d93497 ---- /dev/null -+++ fs/aufs/cpup.c -@@ -0,0 +1,1276 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * copy-up functions, see wbr_policy.c for copy-down -+ */ -+ -+#include -+#include -+#include "aufs.h" -+ -+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags) -+{ -+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE -+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT; -+ -+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags)); -+ -+ dst->i_flags |= iflags & ~mask; -+ if (au_test_fs_notime(dst->i_sb)) -+ dst->i_flags |= S_NOATIME | S_NOCMTIME; -+} -+ -+void au_cpup_attr_timesizes(struct inode *inode) -+{ -+ struct inode *h_inode; -+ -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ fsstack_copy_attr_times(inode, h_inode); -+ fsstack_copy_inode_size(inode, h_inode); -+} -+ -+void au_cpup_attr_nlink(struct inode *inode, int force) -+{ -+ struct inode *h_inode; -+ struct super_block *sb; -+ aufs_bindex_t bindex, bend; -+ -+ sb = inode->i_sb; -+ bindex = au_ibstart(inode); -+ h_inode = au_h_iptr(inode, bindex); -+ if (!force -+ && !S_ISDIR(h_inode->i_mode) -+ && au_opt_test(au_mntflags(sb), PLINK) -+ && au_plink_test(inode)) -+ return; -+ -+ /* -+ * 0 can happen in revalidating. -+ * h_inode->i_mutex may not be held here, but it is harmless since once -+ * i_nlink reaches 0, it will never become positive except O_TMPFILE -+ * case. -+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause -+ * the incorrect link count. -+ */ -+ set_nlink(inode, h_inode->i_nlink); -+ -+ /* -+ * fewer nlink makes find(1) noisy, but larger nlink doesn't. -+ * it may includes whplink directory. -+ */ -+ if (S_ISDIR(h_inode->i_mode)) { -+ bend = au_ibend(inode); -+ for (bindex++; bindex <= bend; bindex++) { -+ h_inode = au_h_iptr(inode, bindex); -+ if (h_inode) -+ au_add_nlink(inode, h_inode); -+ } -+ } -+} -+ -+void au_cpup_attr_changeable(struct inode *inode) -+{ -+ struct inode *h_inode; -+ -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ inode->i_mode = h_inode->i_mode; -+ inode->i_uid = h_inode->i_uid; -+ inode->i_gid = h_inode->i_gid; -+ au_cpup_attr_timesizes(inode); -+ au_cpup_attr_flags(inode, h_inode->i_flags); -+} -+ -+void au_cpup_igen(struct inode *inode, struct inode *h_inode) -+{ -+ struct au_iinfo *iinfo = au_ii(inode); -+ -+ IiMustWriteLock(inode); -+ -+ iinfo->ii_higen = h_inode->i_generation; -+ iinfo->ii_hsb1 = h_inode->i_sb; -+} -+ -+void au_cpup_attr_all(struct inode *inode, int force) -+{ -+ struct inode *h_inode; -+ -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ au_cpup_attr_changeable(inode); -+ if (inode->i_nlink > 0) -+ au_cpup_attr_nlink(inode, force); -+ inode->i_rdev = h_inode->i_rdev; -+ inode->i_blkbits = h_inode->i_blkbits; -+ au_cpup_igen(inode, h_inode); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */ -+ -+/* keep the timestamps of the parent dir when cpup */ -+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry, -+ struct path *h_path) -+{ -+ struct inode *h_inode; -+ -+ dt->dt_dentry = dentry; -+ dt->dt_h_path = *h_path; -+ h_inode = h_path->dentry->d_inode; -+ dt->dt_atime = h_inode->i_atime; -+ dt->dt_mtime = h_inode->i_mtime; -+ /* smp_mb(); */ -+} -+ -+void au_dtime_revert(struct au_dtime *dt) -+{ -+ struct iattr attr; -+ int err; -+ -+ attr.ia_atime = dt->dt_atime; -+ attr.ia_mtime = dt->dt_mtime; -+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET -+ | ATTR_ATIME | ATTR_ATIME_SET; -+ -+ /* no delegation since this is a directory */ -+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL); -+ if (unlikely(err)) -+ pr_warn("restoring timestamps failed(%d). ignored\n", err); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* internal use only */ -+struct au_cpup_reg_attr { -+ int valid; -+ struct kstat st; -+ unsigned int iflags; /* inode->i_flags */ -+}; -+ -+static noinline_for_stack -+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src, -+ struct au_cpup_reg_attr *h_src_attr) -+{ -+ int err, sbits; -+ struct iattr ia; -+ struct path h_path; -+ struct inode *h_isrc, *h_idst; -+ struct kstat *h_st; -+ -+ h_path.dentry = au_h_dptr(dst, bindex); -+ h_idst = h_path.dentry->d_inode; -+ h_path.mnt = au_sbr_mnt(dst->d_sb, bindex); -+ h_isrc = h_src->d_inode; -+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID -+ | ATTR_ATIME | ATTR_MTIME -+ | ATTR_ATIME_SET | ATTR_MTIME_SET; -+ if (h_src_attr && h_src_attr->valid) { -+ h_st = &h_src_attr->st; -+ ia.ia_uid = h_st->uid; -+ ia.ia_gid = h_st->gid; -+ ia.ia_atime = h_st->atime; -+ ia.ia_mtime = h_st->mtime; -+ if (h_idst->i_mode != h_st->mode -+ && !S_ISLNK(h_idst->i_mode)) { -+ ia.ia_valid |= ATTR_MODE; -+ ia.ia_mode = h_st->mode; -+ } -+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID)); -+ au_cpup_attr_flags(h_idst, h_src_attr->iflags); -+ } else { -+ ia.ia_uid = h_isrc->i_uid; -+ ia.ia_gid = h_isrc->i_gid; -+ ia.ia_atime = h_isrc->i_atime; -+ ia.ia_mtime = h_isrc->i_mtime; -+ if (h_idst->i_mode != h_isrc->i_mode -+ && !S_ISLNK(h_idst->i_mode)) { -+ ia.ia_valid |= ATTR_MODE; -+ ia.ia_mode = h_isrc->i_mode; -+ } -+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID)); -+ au_cpup_attr_flags(h_idst, h_isrc->i_flags); -+ } -+ /* no delegation since it is just created */ -+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL); -+ -+ /* is this nfs only? */ -+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) { -+ ia.ia_valid = ATTR_FORCE | ATTR_MODE; -+ ia.ia_mode = h_isrc->i_mode; -+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL); -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len, -+ char *buf, unsigned long blksize) -+{ -+ int err; -+ size_t sz, rbytes, wbytes; -+ unsigned char all_zero; -+ char *p, *zp; -+ struct mutex *h_mtx; -+ /* reduce stack usage */ -+ struct iattr *ia; -+ -+ zp = page_address(ZERO_PAGE(0)); -+ if (unlikely(!zp)) -+ return -ENOMEM; /* possible? */ -+ -+ err = 0; -+ all_zero = 0; -+ while (len) { -+ AuDbg("len %lld\n", len); -+ sz = blksize; -+ if (len < blksize) -+ sz = len; -+ -+ rbytes = 0; -+ /* todo: signal_pending? */ -+ while (!rbytes || err == -EAGAIN || err == -EINTR) { -+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos); -+ err = rbytes; -+ } -+ if (unlikely(err < 0)) -+ break; -+ -+ all_zero = 0; -+ if (len >= rbytes && rbytes == blksize) -+ all_zero = !memcmp(buf, zp, rbytes); -+ if (!all_zero) { -+ wbytes = rbytes; -+ p = buf; -+ while (wbytes) { -+ size_t b; -+ -+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos); -+ err = b; -+ /* todo: signal_pending? */ -+ if (unlikely(err == -EAGAIN || err == -EINTR)) -+ continue; -+ if (unlikely(err < 0)) -+ break; -+ wbytes -= b; -+ p += b; -+ } -+ if (unlikely(err < 0)) -+ break; -+ } else { -+ loff_t res; -+ -+ AuLabel(hole); -+ res = vfsub_llseek(dst, rbytes, SEEK_CUR); -+ err = res; -+ if (unlikely(res < 0)) -+ break; -+ } -+ len -= rbytes; -+ err = 0; -+ } -+ -+ /* the last block may be a hole */ -+ if (!err && all_zero) { -+ AuLabel(last hole); -+ -+ err = 1; -+ if (au_test_nfs(dst->f_dentry->d_sb)) { -+ /* nfs requires this step to make last hole */ -+ /* is this only nfs? */ -+ do { -+ /* todo: signal_pending? */ -+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos); -+ } while (err == -EAGAIN || err == -EINTR); -+ if (err == 1) -+ dst->f_pos--; -+ } -+ -+ if (err == 1) { -+ ia = (void *)buf; -+ ia->ia_size = dst->f_pos; -+ ia->ia_valid = ATTR_SIZE | ATTR_FILE; -+ ia->ia_file = dst; -+ h_mtx = &file_inode(dst)->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2); -+ /* no delegation since it is just created */ -+ err = vfsub_notify_change(&dst->f_path, ia, -+ /*delegated*/NULL); -+ mutex_unlock(h_mtx); -+ } -+ } -+ -+ return err; -+} -+ -+int au_copy_file(struct file *dst, struct file *src, loff_t len) -+{ -+ int err; -+ unsigned long blksize; -+ unsigned char do_kfree; -+ char *buf; -+ -+ err = -ENOMEM; -+ blksize = dst->f_dentry->d_sb->s_blocksize; -+ if (!blksize || PAGE_SIZE < blksize) -+ blksize = PAGE_SIZE; -+ AuDbg("blksize %lu\n", blksize); -+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *)); -+ if (do_kfree) -+ buf = kmalloc(blksize, GFP_NOFS); -+ else -+ buf = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!buf)) -+ goto out; -+ -+ if (len > (1 << 22)) -+ AuDbg("copying a large file %lld\n", (long long)len); -+ -+ src->f_pos = 0; -+ dst->f_pos = 0; -+ err = au_do_copy_file(dst, src, len, buf, blksize); -+ if (do_kfree) -+ kfree(buf); -+ else -+ free_page((unsigned long)buf); -+ -+out: -+ return err; -+} -+ -+/* -+ * to support a sparse file which is opened with O_APPEND, -+ * we need to close the file. -+ */ -+static int au_cp_regular(struct au_cp_generic *cpg) -+{ -+ int err, i; -+ enum { SRC, DST }; -+ struct { -+ aufs_bindex_t bindex; -+ unsigned int flags; -+ struct dentry *dentry; -+ int force_wr; -+ struct file *file; -+ void *label; -+ } *f, file[] = { -+ { -+ .bindex = cpg->bsrc, -+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE, -+ .label = &&out -+ }, -+ { -+ .bindex = cpg->bdst, -+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE, -+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST), -+ .label = &&out_src -+ } -+ }; -+ struct super_block *sb; -+ -+ /* bsrc branch can be ro/rw. */ -+ sb = cpg->dentry->d_sb; -+ f = file; -+ for (i = 0; i < 2; i++, f++) { -+ f->dentry = au_h_dptr(cpg->dentry, f->bindex); -+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags, -+ /*file*/NULL, f->force_wr); -+ err = PTR_ERR(f->file); -+ if (IS_ERR(f->file)) -+ goto *f->label; -+ } -+ -+ /* try stopping to update while we copyup */ -+ IMustLock(file[SRC].dentry->d_inode); -+ err = au_copy_file(file[DST].file, file[SRC].file, cpg->len); -+ -+ fput(file[DST].file); -+ au_sbr_put(sb, file[DST].bindex); -+ -+out_src: -+ fput(file[SRC].file); -+ au_sbr_put(sb, file[SRC].bindex); -+out: -+ return err; -+} -+ -+static int au_do_cpup_regular(struct au_cp_generic *cpg, -+ struct au_cpup_reg_attr *h_src_attr) -+{ -+ int err, rerr; -+ loff_t l; -+ struct path h_path; -+ struct inode *h_src_inode, *h_dst_inode; -+ -+ err = 0; -+ h_src_inode = au_h_iptr(cpg->dentry->d_inode, cpg->bsrc); -+ l = i_size_read(h_src_inode); -+ if (cpg->len == -1 || l < cpg->len) -+ cpg->len = l; -+ if (cpg->len) { -+ /* try stopping to update while we are referencing */ -+ mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD); -+ au_pin_hdir_unlock(cpg->pin); -+ -+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc); -+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc); -+ h_src_attr->iflags = h_src_inode->i_flags; -+ err = vfs_getattr(&h_path, &h_src_attr->st); -+ if (unlikely(err)) { -+ mutex_unlock(&h_src_inode->i_mutex); -+ goto out; -+ } -+ h_src_attr->valid = 1; -+ err = au_cp_regular(cpg); -+ mutex_unlock(&h_src_inode->i_mutex); -+ rerr = au_pin_hdir_relock(cpg->pin); -+ if (!err && rerr) -+ err = rerr; -+ } -+ if (!err && (h_src_inode->i_state & I_LINKABLE)) { -+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst); -+ h_dst_inode = h_path.dentry->d_inode; -+ spin_lock(&h_dst_inode->i_lock); -+ h_dst_inode->i_state |= I_LINKABLE; -+ spin_unlock(&h_dst_inode->i_lock); -+ } -+ -+out: -+ return err; -+} -+ -+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src, -+ struct inode *h_dir) -+{ -+ int err, symlen; -+ mm_segment_t old_fs; -+ union { -+ char *k; -+ char __user *u; -+ } sym; -+ -+ err = -ENOSYS; -+ if (unlikely(!h_src->d_inode->i_op->readlink)) -+ goto out; -+ -+ err = -ENOMEM; -+ sym.k = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!sym.k)) -+ goto out; -+ -+ /* unnecessary to support mmap_sem since symlink is not mmap-able */ -+ old_fs = get_fs(); -+ set_fs(KERNEL_DS); -+ symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX); -+ err = symlen; -+ set_fs(old_fs); -+ -+ if (symlen > 0) { -+ sym.k[symlen] = 0; -+ err = vfsub_symlink(h_dir, h_path, sym.k); -+ } -+ free_page((unsigned long)sym.k); -+ -+out: -+ return err; -+} -+ -+static noinline_for_stack -+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent, -+ struct au_cpup_reg_attr *h_src_attr) -+{ -+ int err; -+ umode_t mode; -+ unsigned int mnt_flags; -+ unsigned char isdir; -+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME); -+ struct au_dtime dt; -+ struct path h_path; -+ struct dentry *h_src, *h_dst, *h_parent; -+ struct inode *h_inode, *h_dir; -+ struct super_block *sb; -+ -+ /* bsrc branch can be ro/rw. */ -+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc); -+ h_inode = h_src->d_inode; -+ AuDebugOn(h_inode != au_h_iptr(cpg->dentry->d_inode, cpg->bsrc)); -+ -+ /* try stopping to be referenced while we are creating */ -+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst); -+ if (au_ftest_cpup(cpg->flags, RENAME)) -+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX, -+ AUFS_WH_PFX_LEN)); -+ h_parent = h_dst->d_parent; /* dir inode is locked */ -+ h_dir = h_parent->d_inode; -+ IMustLock(h_dir); -+ AuDebugOn(h_parent != h_dst->d_parent); -+ -+ sb = cpg->dentry->d_sb; -+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst); -+ if (do_dt) { -+ h_path.dentry = h_parent; -+ au_dtime_store(&dt, dst_parent, &h_path); -+ } -+ h_path.dentry = h_dst; -+ -+ isdir = 0; -+ mode = h_inode->i_mode; -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR, -+ /*want_excl*/true); -+ if (!err) -+ err = au_do_cpup_regular(cpg, h_src_attr); -+ break; -+ case S_IFDIR: -+ isdir = 1; -+ err = vfsub_mkdir(h_dir, &h_path, mode); -+ if (!err) { -+ /* -+ * strange behaviour from the users view, -+ * particularry setattr case -+ */ -+ if (au_ibstart(dst_parent->d_inode) == cpg->bdst) -+ au_cpup_attr_nlink(dst_parent->d_inode, -+ /*force*/1); -+ au_cpup_attr_nlink(cpg->dentry->d_inode, /*force*/1); -+ } -+ break; -+ case S_IFLNK: -+ err = au_do_cpup_symlink(&h_path, h_src, h_dir); -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ AuDebugOn(!capable(CAP_MKNOD)); -+ /*FALLTHROUGH*/ -+ case S_IFIFO: -+ case S_IFSOCK: -+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev); -+ break; -+ default: -+ AuIOErr("Unknown inode type 0%o\n", mode); -+ err = -EIO; -+ } -+ -+ mnt_flags = au_mntflags(sb); -+ if (!au_opt_test(mnt_flags, UDBA_NONE) -+ && !isdir -+ && au_opt_test(mnt_flags, XINO) -+ && (h_inode->i_nlink == 1 -+ || (h_inode->i_state & I_LINKABLE)) -+ /* todo: unnecessary? */ -+ /* && cpg->dentry->d_inode->i_nlink == 1 */ -+ && cpg->bdst < cpg->bsrc -+ && !au_ftest_cpup(cpg->flags, KEEPLINO)) -+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0); -+ /* ignore this error */ -+ -+ if (do_dt) -+ au_dtime_revert(&dt); -+ return err; -+} -+ -+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path) -+{ -+ int err; -+ struct dentry *dentry, *h_dentry, *h_parent, *parent; -+ struct inode *h_dir; -+ aufs_bindex_t bdst; -+ -+ dentry = cpg->dentry; -+ bdst = cpg->bdst; -+ h_dentry = au_h_dptr(dentry, bdst); -+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) { -+ dget(h_dentry); -+ au_set_h_dptr(dentry, bdst, NULL); -+ err = au_lkup_neg(dentry, bdst, /*wh*/0); -+ if (!err) -+ h_path->dentry = dget(au_h_dptr(dentry, bdst)); -+ au_set_h_dptr(dentry, bdst, h_dentry); -+ } else { -+ err = 0; -+ parent = dget_parent(dentry); -+ h_parent = au_h_dptr(parent, bdst); -+ dput(parent); -+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent); -+ if (IS_ERR(h_path->dentry)) -+ err = PTR_ERR(h_path->dentry); -+ } -+ if (unlikely(err)) -+ goto out; -+ -+ h_parent = h_dentry->d_parent; /* dir inode is locked */ -+ h_dir = h_parent->d_inode; -+ IMustLock(h_dir); -+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry); -+ /* no delegation since it is just created */ -+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL); -+ dput(h_path->dentry); -+ -+out: -+ return err; -+} -+ -+/* -+ * copyup the @dentry from @bsrc to @bdst. -+ * the caller must set the both of lower dentries. -+ * @len is for truncating when it is -1 copyup the entire file. -+ * in link/rename cases, @dst_parent may be different from the real one. -+ * basic->bsrc can be larger than basic->bdst. -+ */ -+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent) -+{ -+ int err, rerr; -+ aufs_bindex_t old_ibstart; -+ unsigned char isdir, plink; -+ struct dentry *h_src, *h_dst, *h_parent; -+ struct inode *dst_inode, *h_dir, *inode, *delegated; -+ struct super_block *sb; -+ struct au_branch *br; -+ /* to reuduce stack size */ -+ struct { -+ struct au_dtime dt; -+ struct path h_path; -+ struct au_cpup_reg_attr h_src_attr; -+ } *a; -+ -+ err = -ENOMEM; -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ a->h_src_attr.valid = 0; -+ -+ sb = cpg->dentry->d_sb; -+ br = au_sbr(sb, cpg->bdst); -+ a->h_path.mnt = au_br_mnt(br); -+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst); -+ h_parent = h_dst->d_parent; /* dir inode is locked */ -+ h_dir = h_parent->d_inode; -+ IMustLock(h_dir); -+ -+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc); -+ inode = cpg->dentry->d_inode; -+ -+ if (!dst_parent) -+ dst_parent = dget_parent(cpg->dentry); -+ else -+ dget(dst_parent); -+ -+ plink = !!au_opt_test(au_mntflags(sb), PLINK); -+ dst_inode = au_h_iptr(inode, cpg->bdst); -+ if (dst_inode) { -+ if (unlikely(!plink)) { -+ err = -EIO; -+ AuIOErr("hi%lu(i%lu) exists on b%d " -+ "but plink is disabled\n", -+ dst_inode->i_ino, inode->i_ino, cpg->bdst); -+ goto out_parent; -+ } -+ -+ if (dst_inode->i_nlink) { -+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME); -+ -+ h_src = au_plink_lkup(inode, cpg->bdst); -+ err = PTR_ERR(h_src); -+ if (IS_ERR(h_src)) -+ goto out_parent; -+ if (unlikely(!h_src->d_inode)) { -+ err = -EIO; -+ AuIOErr("i%lu exists on a upper branch " -+ "but not pseudo-linked\n", -+ inode->i_ino); -+ dput(h_src); -+ goto out_parent; -+ } -+ -+ if (do_dt) { -+ a->h_path.dentry = h_parent; -+ au_dtime_store(&a->dt, dst_parent, &a->h_path); -+ } -+ -+ a->h_path.dentry = h_dst; -+ delegated = NULL; -+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated); -+ if (!err && au_ftest_cpup(cpg->flags, RENAME)) -+ err = au_do_ren_after_cpup(cpg, &a->h_path); -+ if (do_dt) -+ au_dtime_revert(&a->dt); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal link\n"); -+ iput(delegated); -+ } -+ dput(h_src); -+ goto out_parent; -+ } else -+ /* todo: cpup_wh_file? */ -+ /* udba work */ -+ au_update_ibrange(inode, /*do_put_zero*/1); -+ } -+ -+ isdir = S_ISDIR(inode->i_mode); -+ old_ibstart = au_ibstart(inode); -+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr); -+ if (unlikely(err)) -+ goto out_rev; -+ dst_inode = h_dst->d_inode; -+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2); -+ /* todo: necessary? */ -+ /* au_pin_hdir_unlock(cpg->pin); */ -+ -+ err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr); -+ if (unlikely(err)) { -+ /* todo: necessary? */ -+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */ -+ mutex_unlock(&dst_inode->i_mutex); -+ goto out_rev; -+ } -+ -+ if (cpg->bdst < old_ibstart) { -+ if (S_ISREG(inode->i_mode)) { -+ err = au_dy_iaop(inode, cpg->bdst, dst_inode); -+ if (unlikely(err)) { -+ /* ignore an error */ -+ /* au_pin_hdir_relock(cpg->pin); */ -+ mutex_unlock(&dst_inode->i_mutex); -+ goto out_rev; -+ } -+ } -+ au_set_ibstart(inode, cpg->bdst); -+ } else -+ au_set_ibend(inode, cpg->bdst); -+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode), -+ au_hi_flags(inode, isdir)); -+ -+ /* todo: necessary? */ -+ /* err = au_pin_hdir_relock(cpg->pin); */ -+ mutex_unlock(&dst_inode->i_mutex); -+ if (unlikely(err)) -+ goto out_rev; -+ -+ if (!isdir -+ && (h_src->d_inode->i_nlink > 1 -+ || h_src->d_inode->i_state & I_LINKABLE) -+ && plink) -+ au_plink_append(inode, cpg->bdst, h_dst); -+ -+ if (au_ftest_cpup(cpg->flags, RENAME)) { -+ a->h_path.dentry = h_dst; -+ err = au_do_ren_after_cpup(cpg, &a->h_path); -+ } -+ if (!err) -+ goto out_parent; /* success */ -+ -+ /* revert */ -+out_rev: -+ a->h_path.dentry = h_parent; -+ au_dtime_store(&a->dt, dst_parent, &a->h_path); -+ a->h_path.dentry = h_dst; -+ rerr = 0; -+ if (h_dst->d_inode) { -+ if (!isdir) { -+ /* no delegation since it is just created */ -+ rerr = vfsub_unlink(h_dir, &a->h_path, -+ /*delegated*/NULL, /*force*/0); -+ } else -+ rerr = vfsub_rmdir(h_dir, &a->h_path); -+ } -+ au_dtime_revert(&a->dt); -+ if (rerr) { -+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr); -+ err = -EIO; -+ } -+out_parent: -+ dput(dst_parent); -+ kfree(a); -+out: -+ return err; -+} -+ -+#if 0 /* unused */ -+struct au_cpup_single_args { -+ int *errp; -+ struct au_cp_generic *cpg; -+ struct dentry *dst_parent; -+}; -+ -+static void au_call_cpup_single(void *args) -+{ -+ struct au_cpup_single_args *a = args; -+ -+ au_pin_hdir_acquire_nest(a->cpg->pin); -+ *a->errp = au_cpup_single(a->cpg, a->dst_parent); -+ au_pin_hdir_release(a->cpg->pin); -+} -+#endif -+ -+/* -+ * prevent SIGXFSZ in copy-up. -+ * testing CAP_MKNOD is for generic fs, -+ * but CAP_FSETID is for xfs only, currently. -+ */ -+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode) -+{ -+ int do_sio; -+ struct super_block *sb; -+ struct inode *h_dir; -+ -+ do_sio = 0; -+ sb = au_pinned_parent(pin)->d_sb; -+ if (!au_wkq_test() -+ && (!au_sbi(sb)->si_plink_maint_pid -+ || au_plink_maint(sb, AuLock_NOPLM))) { -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ /* no condition about RLIMIT_FSIZE and the file size */ -+ do_sio = 1; -+ break; -+ case S_IFCHR: -+ case S_IFBLK: -+ do_sio = !capable(CAP_MKNOD); -+ break; -+ } -+ if (!do_sio) -+ do_sio = ((mode & (S_ISUID | S_ISGID)) -+ && !capable(CAP_FSETID)); -+ /* this workaround may be removed in the future */ -+ if (!do_sio) { -+ h_dir = au_pinned_h_dir(pin); -+ do_sio = h_dir->i_mode & S_ISVTX; -+ } -+ } -+ -+ return do_sio; -+} -+ -+#if 0 /* unused */ -+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent) -+{ -+ int err, wkq_err; -+ struct dentry *h_dentry; -+ -+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc); -+ if (!au_cpup_sio_test(pin, h_dentry->d_inode->i_mode)) -+ err = au_cpup_single(cpg, dst_parent); -+ else { -+ struct au_cpup_single_args args = { -+ .errp = &err, -+ .cpg = cpg, -+ .dst_parent = dst_parent -+ }; -+ wkq_err = au_wkq_wait(au_call_cpup_single, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } -+ -+ return err; -+} -+#endif -+ -+/* -+ * copyup the @dentry from the first active lower branch to @bdst, -+ * using au_cpup_single(). -+ */ -+static int au_cpup_simple(struct au_cp_generic *cpg) -+{ -+ int err; -+ unsigned int flags_orig; -+ struct dentry *dentry; -+ -+ AuDebugOn(cpg->bsrc < 0); -+ -+ dentry = cpg->dentry; -+ DiMustWriteLock(dentry); -+ -+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1); -+ if (!err) { -+ flags_orig = cpg->flags; -+ au_fset_cpup(cpg->flags, RENAME); -+ err = au_cpup_single(cpg, NULL); -+ cpg->flags = flags_orig; -+ if (!err) -+ return 0; /* success */ -+ -+ /* revert */ -+ au_set_h_dptr(dentry, cpg->bdst, NULL); -+ au_set_dbstart(dentry, cpg->bsrc); -+ } -+ -+ return err; -+} -+ -+struct au_cpup_simple_args { -+ int *errp; -+ struct au_cp_generic *cpg; -+}; -+ -+static void au_call_cpup_simple(void *args) -+{ -+ struct au_cpup_simple_args *a = args; -+ -+ au_pin_hdir_acquire_nest(a->cpg->pin); -+ *a->errp = au_cpup_simple(a->cpg); -+ au_pin_hdir_release(a->cpg->pin); -+} -+ -+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg) -+{ -+ int err, wkq_err; -+ struct dentry *dentry, *parent; -+ struct file *h_file; -+ struct inode *h_dir; -+ -+ dentry = cpg->dentry; -+ h_file = NULL; -+ if (au_ftest_cpup(cpg->flags, HOPEN)) { -+ AuDebugOn(cpg->bsrc < 0); -+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ } -+ -+ parent = dget_parent(dentry); -+ h_dir = au_h_iptr(parent->d_inode, cpg->bdst); -+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE) -+ && !au_cpup_sio_test(cpg->pin, dentry->d_inode->i_mode)) -+ err = au_cpup_simple(cpg); -+ else { -+ struct au_cpup_simple_args args = { -+ .errp = &err, -+ .cpg = cpg -+ }; -+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } -+ -+ dput(parent); -+ if (h_file) -+ au_h_open_post(dentry, cpg->bsrc, h_file); -+ -+out: -+ return err; -+} -+ -+int au_sio_cpup_simple(struct au_cp_generic *cpg) -+{ -+ aufs_bindex_t bsrc, bend; -+ struct dentry *dentry, *h_dentry; -+ -+ if (cpg->bsrc < 0) { -+ dentry = cpg->dentry; -+ bend = au_dbend(dentry); -+ for (bsrc = cpg->bdst + 1; bsrc <= bend; bsrc++) { -+ h_dentry = au_h_dptr(dentry, bsrc); -+ if (h_dentry) { -+ AuDebugOn(!h_dentry->d_inode); -+ break; -+ } -+ } -+ AuDebugOn(bsrc > bend); -+ cpg->bsrc = bsrc; -+ } -+ AuDebugOn(cpg->bsrc <= cpg->bdst); -+ return au_do_sio_cpup_simple(cpg); -+} -+ -+int au_sio_cpdown_simple(struct au_cp_generic *cpg) -+{ -+ AuDebugOn(cpg->bdst <= cpg->bsrc); -+ return au_do_sio_cpup_simple(cpg); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * copyup the deleted file for writing. -+ */ -+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry, -+ struct file *file) -+{ -+ int err; -+ unsigned int flags_orig; -+ aufs_bindex_t bsrc_orig; -+ struct dentry *h_d_dst, *h_d_start; -+ struct au_dinfo *dinfo; -+ struct au_hdentry *hdp; -+ -+ dinfo = au_di(cpg->dentry); -+ AuRwMustWriteLock(&dinfo->di_rwsem); -+ -+ bsrc_orig = cpg->bsrc; -+ cpg->bsrc = dinfo->di_bstart; -+ hdp = dinfo->di_hdentry; -+ h_d_dst = hdp[0 + cpg->bdst].hd_dentry; -+ dinfo->di_bstart = cpg->bdst; -+ hdp[0 + cpg->bdst].hd_dentry = wh_dentry; -+ h_d_start = NULL; -+ if (file) { -+ h_d_start = hdp[0 + cpg->bsrc].hd_dentry; -+ hdp[0 + cpg->bsrc].hd_dentry = au_hf_top(file)->f_dentry; -+ } -+ flags_orig = cpg->flags; -+ cpg->flags = !AuCpup_DTIME; -+ err = au_cpup_single(cpg, /*h_parent*/NULL); -+ cpg->flags = flags_orig; -+ if (file) { -+ if (!err) -+ err = au_reopen_nondir(file); -+ hdp[0 + cpg->bsrc].hd_dentry = h_d_start; -+ } -+ hdp[0 + cpg->bdst].hd_dentry = h_d_dst; -+ dinfo->di_bstart = cpg->bsrc; -+ cpg->bsrc = bsrc_orig; -+ -+ return err; -+} -+ -+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file) -+{ -+ int err; -+ aufs_bindex_t bdst; -+ struct au_dtime dt; -+ struct dentry *dentry, *parent, *h_parent, *wh_dentry; -+ struct au_branch *br; -+ struct path h_path; -+ -+ dentry = cpg->dentry; -+ bdst = cpg->bdst; -+ br = au_sbr(dentry->d_sb, bdst); -+ parent = dget_parent(dentry); -+ h_parent = au_h_dptr(parent, bdst); -+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out; -+ -+ h_path.dentry = h_parent; -+ h_path.mnt = au_br_mnt(br); -+ au_dtime_store(&dt, parent, &h_path); -+ err = au_do_cpup_wh(cpg, wh_dentry, file); -+ if (unlikely(err)) -+ goto out_wh; -+ -+ dget(wh_dentry); -+ h_path.dentry = wh_dentry; -+ if (!S_ISDIR(wh_dentry->d_inode->i_mode)) { -+ /* no delegation since it is just created */ -+ err = vfsub_unlink(h_parent->d_inode, &h_path, -+ /*delegated*/NULL, /*force*/0); -+ } else -+ err = vfsub_rmdir(h_parent->d_inode, &h_path); -+ if (unlikely(err)) { -+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n", -+ wh_dentry, err); -+ err = -EIO; -+ } -+ au_dtime_revert(&dt); -+ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry); -+ -+out_wh: -+ dput(wh_dentry); -+out: -+ dput(parent); -+ return err; -+} -+ -+struct au_cpup_wh_args { -+ int *errp; -+ struct au_cp_generic *cpg; -+ struct file *file; -+}; -+ -+static void au_call_cpup_wh(void *args) -+{ -+ struct au_cpup_wh_args *a = args; -+ -+ au_pin_hdir_acquire_nest(a->cpg->pin); -+ *a->errp = au_cpup_wh(a->cpg, a->file); -+ au_pin_hdir_release(a->cpg->pin); -+} -+ -+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file) -+{ -+ int err, wkq_err; -+ aufs_bindex_t bdst; -+ struct dentry *dentry, *parent, *h_orph, *h_parent, *h_dentry; -+ struct inode *dir, *h_dir, *h_tmpdir; -+ struct au_wbr *wbr; -+ struct au_pin wh_pin, *pin_orig; -+ -+ dentry = cpg->dentry; -+ bdst = cpg->bdst; -+ parent = dget_parent(dentry); -+ dir = parent->d_inode; -+ h_orph = NULL; -+ h_parent = NULL; -+ h_dir = au_igrab(au_h_iptr(dir, bdst)); -+ h_tmpdir = h_dir; -+ pin_orig = NULL; -+ if (!h_dir->i_nlink) { -+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr; -+ h_orph = wbr->wbr_orph; -+ -+ h_parent = dget(au_h_dptr(parent, bdst)); -+ au_set_h_dptr(parent, bdst, dget(h_orph)); -+ h_tmpdir = h_orph->d_inode; -+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0); -+ -+ if (file) -+ h_dentry = au_hf_top(file)->f_dentry; -+ else -+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry)); -+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3); -+ /* todo: au_h_open_pre()? */ -+ -+ pin_orig = cpg->pin; -+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT, -+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED); -+ cpg->pin = &wh_pin; -+ } -+ -+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE) -+ && !au_cpup_sio_test(cpg->pin, dentry->d_inode->i_mode)) -+ err = au_cpup_wh(cpg, file); -+ else { -+ struct au_cpup_wh_args args = { -+ .errp = &err, -+ .cpg = cpg, -+ .file = file -+ }; -+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } -+ -+ if (h_orph) { -+ mutex_unlock(&h_tmpdir->i_mutex); -+ /* todo: au_h_open_post()? */ -+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0); -+ au_set_h_dptr(parent, bdst, h_parent); -+ AuDebugOn(!pin_orig); -+ cpg->pin = pin_orig; -+ } -+ iput(h_dir); -+ dput(parent); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * generic routine for both of copy-up and copy-down. -+ */ -+/* cf. revalidate function in file.c */ -+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst, -+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst, -+ struct au_pin *pin, -+ struct dentry *h_parent, void *arg), -+ void *arg) -+{ -+ int err; -+ struct au_pin pin; -+ struct dentry *d, *parent, *h_parent, *real_parent; -+ -+ err = 0; -+ parent = dget_parent(dentry); -+ if (IS_ROOT(parent)) -+ goto out; -+ -+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2, -+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE); -+ -+ /* do not use au_dpage */ -+ real_parent = parent; -+ while (1) { -+ dput(parent); -+ parent = dget_parent(dentry); -+ h_parent = au_h_dptr(parent, bdst); -+ if (h_parent) -+ goto out; /* success */ -+ -+ /* find top dir which is necessary to cpup */ -+ do { -+ d = parent; -+ dput(parent); -+ parent = dget_parent(d); -+ di_read_lock_parent3(parent, !AuLock_IR); -+ h_parent = au_h_dptr(parent, bdst); -+ di_read_unlock(parent, !AuLock_IR); -+ } while (!h_parent); -+ -+ if (d != real_parent) -+ di_write_lock_child3(d); -+ -+ /* somebody else might create while we were sleeping */ -+ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) { -+ if (au_h_dptr(d, bdst)) -+ au_update_dbstart(d); -+ -+ au_pin_set_dentry(&pin, d); -+ err = au_do_pin(&pin); -+ if (!err) { -+ err = cp(d, bdst, &pin, h_parent, arg); -+ au_unpin(&pin); -+ } -+ } -+ -+ if (d != real_parent) -+ di_write_unlock(d); -+ if (unlikely(err)) -+ break; -+ } -+ -+out: -+ dput(parent); -+ return err; -+} -+ -+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst, -+ struct au_pin *pin, -+ struct dentry *h_parent __maybe_unused , -+ void *arg __maybe_unused) -+{ -+ struct au_cp_generic cpg = { -+ .dentry = dentry, -+ .bdst = bdst, -+ .bsrc = -1, -+ .len = 0, -+ .pin = pin, -+ .flags = AuCpup_DTIME -+ }; -+ return au_sio_cpup_simple(&cpg); -+} -+ -+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst) -+{ -+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL); -+} -+ -+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst) -+{ -+ int err; -+ struct dentry *parent; -+ struct inode *dir; -+ -+ parent = dget_parent(dentry); -+ dir = parent->d_inode; -+ err = 0; -+ if (au_h_iptr(dir, bdst)) -+ goto out; -+ -+ di_read_unlock(parent, AuLock_IR); -+ di_write_lock_parent(parent); -+ /* someone else might change our inode while we were sleeping */ -+ if (!au_h_iptr(dir, bdst)) -+ err = au_cpup_dirs(dentry, bdst); -+ di_downgrade_lock(parent, AuLock_IR); -+ -+out: -+ dput(parent); -+ return err; -+} -diff --git fs/aufs/cpup.h fs/aufs/cpup.h -new file mode 100644 -index 0000000..e3a773d ---- /dev/null -+++ fs/aufs/cpup.h -@@ -0,0 +1,81 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * copy-up/down functions -+ */ -+ -+#ifndef __AUFS_CPUP_H__ -+#define __AUFS_CPUP_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+ -+struct inode; -+struct file; -+struct au_pin; -+ -+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags); -+void au_cpup_attr_timesizes(struct inode *inode); -+void au_cpup_attr_nlink(struct inode *inode, int force); -+void au_cpup_attr_changeable(struct inode *inode); -+void au_cpup_igen(struct inode *inode, struct inode *h_inode); -+void au_cpup_attr_all(struct inode *inode, int force); -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_cp_generic { -+ struct dentry *dentry; -+ aufs_bindex_t bdst, bsrc; -+ loff_t len; -+ struct au_pin *pin; -+ unsigned int flags; -+}; -+ -+/* cpup flags */ -+#define AuCpup_DTIME 1 /* do dtime_store/revert */ -+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino, -+ for link(2) */ -+#define AuCpup_RENAME (1 << 2) /* rename after cpup */ -+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in -+ cpup */ -+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the -+ existing entry */ -+#define AuCpup_RWDST (1 << 5) /* force write target even if -+ the branch is marked as RO */ -+ -+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name) -+#define au_fset_cpup(flags, name) \ -+ do { (flags) |= AuCpup_##name; } while (0) -+#define au_fclr_cpup(flags, name) \ -+ do { (flags) &= ~AuCpup_##name; } while (0) -+ -+int au_copy_file(struct file *dst, struct file *src, loff_t len); -+int au_sio_cpup_simple(struct au_cp_generic *cpg); -+int au_sio_cpdown_simple(struct au_cp_generic *cpg); -+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file); -+ -+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst, -+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst, -+ struct au_pin *pin, -+ struct dentry *h_parent, void *arg), -+ void *arg); -+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst); -+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst); -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* keep timestamps when copyup */ -+struct au_dtime { -+ struct dentry *dt_dentry; -+ struct path dt_h_path; -+ struct timespec dt_atime, dt_mtime; -+}; -+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry, -+ struct path *h_path); -+void au_dtime_revert(struct au_dtime *dt); -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_CPUP_H__ */ -diff --git fs/aufs/dbgaufs.c fs/aufs/dbgaufs.c -new file mode 100644 -index 0000000..2c2477d ---- /dev/null -+++ fs/aufs/dbgaufs.c -@@ -0,0 +1,419 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * debugfs interface -+ */ -+ -+#include -+#include "aufs.h" -+ -+#ifndef CONFIG_SYSFS -+#error DEBUG_FS depends upon SYSFS -+#endif -+ -+static struct dentry *dbgaufs; -+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH; -+ -+/* 20 is max digits length of ulong 64 */ -+struct dbgaufs_arg { -+ int n; -+ char a[20 * 4]; -+}; -+ -+/* -+ * common function for all XINO files -+ */ -+static int dbgaufs_xi_release(struct inode *inode __maybe_unused, -+ struct file *file) -+{ -+ kfree(file->private_data); -+ return 0; -+} -+ -+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt) -+{ -+ int err; -+ struct kstat st; -+ struct dbgaufs_arg *p; -+ -+ err = -ENOMEM; -+ p = kmalloc(sizeof(*p), GFP_NOFS); -+ if (unlikely(!p)) -+ goto out; -+ -+ err = 0; -+ p->n = 0; -+ file->private_data = p; -+ if (!xf) -+ goto out; -+ -+ err = vfs_getattr(&xf->f_path, &st); -+ if (!err) { -+ if (do_fcnt) -+ p->n = snprintf -+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n", -+ (long)file_count(xf), st.blocks, st.blksize, -+ (long long)st.size); -+ else -+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n", -+ st.blocks, st.blksize, -+ (long long)st.size); -+ AuDebugOn(p->n >= sizeof(p->a)); -+ } else { -+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err); -+ err = 0; -+ } -+ -+out: -+ return err; -+ -+} -+ -+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ struct dbgaufs_arg *p; -+ -+ p = file->private_data; -+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct dbgaufs_plink_arg { -+ int n; -+ char a[]; -+}; -+ -+static int dbgaufs_plink_release(struct inode *inode __maybe_unused, -+ struct file *file) -+{ -+ free_page((unsigned long)file->private_data); -+ return 0; -+} -+ -+static int dbgaufs_plink_open(struct inode *inode, struct file *file) -+{ -+ int err, i, limit; -+ unsigned long n, sum; -+ struct dbgaufs_plink_arg *p; -+ struct au_sbinfo *sbinfo; -+ struct super_block *sb; -+ struct au_sphlhead *sphl; -+ -+ err = -ENOMEM; -+ p = (void *)get_zeroed_page(GFP_NOFS); -+ if (unlikely(!p)) -+ goto out; -+ -+ err = -EFBIG; -+ sbinfo = inode->i_private; -+ sb = sbinfo->si_sb; -+ si_noflush_read_lock(sb); -+ if (au_opt_test(au_mntflags(sb), PLINK)) { -+ limit = PAGE_SIZE - sizeof(p->n); -+ -+ /* the number of buckets */ -+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH); -+ p->n += n; -+ limit -= n; -+ -+ sum = 0; -+ for (i = 0, sphl = sbinfo->si_plink; -+ i < AuPlink_NHASH; -+ i++, sphl++) { -+ n = au_sphl_count(sphl); -+ sum += n; -+ -+ n = snprintf(p->a + p->n, limit, "%lu ", n); -+ p->n += n; -+ limit -= n; -+ if (unlikely(limit <= 0)) -+ goto out_free; -+ } -+ p->a[p->n - 1] = '\n'; -+ -+ /* the sum of plinks */ -+ n = snprintf(p->a + p->n, limit, "%lu\n", sum); -+ p->n += n; -+ limit -= n; -+ if (unlikely(limit <= 0)) -+ goto out_free; -+ } else { -+#define str "1\n0\n0\n" -+ p->n = sizeof(str) - 1; -+ strcpy(p->a, str); -+#undef str -+ } -+ si_read_unlock(sb); -+ -+ err = 0; -+ file->private_data = p; -+ goto out; /* success */ -+ -+out_free: -+ free_page((unsigned long)p); -+out: -+ return err; -+} -+ -+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf, -+ size_t count, loff_t *ppos) -+{ -+ struct dbgaufs_plink_arg *p; -+ -+ p = file->private_data; -+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n); -+} -+ -+static const struct file_operations dbgaufs_plink_fop = { -+ .owner = THIS_MODULE, -+ .open = dbgaufs_plink_open, -+ .release = dbgaufs_plink_release, -+ .read = dbgaufs_plink_read -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int dbgaufs_xib_open(struct inode *inode, struct file *file) -+{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ struct super_block *sb; -+ -+ sbinfo = inode->i_private; -+ sb = sbinfo->si_sb; -+ si_noflush_read_lock(sb); -+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0); -+ si_read_unlock(sb); -+ return err; -+} -+ -+static const struct file_operations dbgaufs_xib_fop = { -+ .owner = THIS_MODULE, -+ .open = dbgaufs_xib_open, -+ .release = dbgaufs_xi_release, -+ .read = dbgaufs_xi_read -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+#define DbgaufsXi_PREFIX "xi" -+ -+static int dbgaufs_xino_open(struct inode *inode, struct file *file) -+{ -+ int err; -+ long l; -+ struct au_sbinfo *sbinfo; -+ struct super_block *sb; -+ struct file *xf; -+ struct qstr *name; -+ -+ err = -ENOENT; -+ xf = NULL; -+ name = &file->f_dentry->d_name; -+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX) -+ || memcmp(name->name, DbgaufsXi_PREFIX, -+ sizeof(DbgaufsXi_PREFIX) - 1))) -+ goto out; -+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l); -+ if (unlikely(err)) -+ goto out; -+ -+ sbinfo = inode->i_private; -+ sb = sbinfo->si_sb; -+ si_noflush_read_lock(sb); -+ if (l <= au_sbend(sb)) { -+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file; -+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1); -+ } else -+ err = -ENOENT; -+ si_read_unlock(sb); -+ -+out: -+ return err; -+} -+ -+static const struct file_operations dbgaufs_xino_fop = { -+ .owner = THIS_MODULE, -+ .open = dbgaufs_xino_open, -+ .release = dbgaufs_xi_release, -+ .read = dbgaufs_xi_read -+}; -+ -+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ aufs_bindex_t bend; -+ struct au_branch *br; -+ struct au_xino_file *xi; -+ -+ if (!au_sbi(sb)->si_dbgaufs) -+ return; -+ -+ bend = au_sbend(sb); -+ for (; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ xi = &br->br_xino; -+ debugfs_remove(xi->xi_dbgaufs); -+ xi->xi_dbgaufs = NULL; -+ } -+} -+ -+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ struct au_sbinfo *sbinfo; -+ struct dentry *parent; -+ struct au_branch *br; -+ struct au_xino_file *xi; -+ aufs_bindex_t bend; -+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */ -+ -+ sbinfo = au_sbi(sb); -+ parent = sbinfo->si_dbgaufs; -+ if (!parent) -+ return; -+ -+ bend = au_sbend(sb); -+ for (; bindex <= bend; bindex++) { -+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex); -+ br = au_sbr(sb, bindex); -+ xi = &br->br_xino; -+ AuDebugOn(xi->xi_dbgaufs); -+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent, -+ sbinfo, &dbgaufs_xino_fop); -+ /* ignore an error */ -+ if (unlikely(!xi->xi_dbgaufs)) -+ AuWarn1("failed %s under debugfs\n", name); -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+#ifdef CONFIG_AUFS_EXPORT -+static int dbgaufs_xigen_open(struct inode *inode, struct file *file) -+{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ struct super_block *sb; -+ -+ sbinfo = inode->i_private; -+ sb = sbinfo->si_sb; -+ si_noflush_read_lock(sb); -+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0); -+ si_read_unlock(sb); -+ return err; -+} -+ -+static const struct file_operations dbgaufs_xigen_fop = { -+ .owner = THIS_MODULE, -+ .open = dbgaufs_xigen_open, -+ .release = dbgaufs_xi_release, -+ .read = dbgaufs_xi_read -+}; -+ -+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo) -+{ -+ int err; -+ -+ /* -+ * This function is a dynamic '__init' fucntion actually, -+ * so the tiny check for si_rwsem is unnecessary. -+ */ -+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ -+ -+ err = -EIO; -+ sbinfo->si_dbgaufs_xigen = debugfs_create_file -+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo, -+ &dbgaufs_xigen_fop); -+ if (sbinfo->si_dbgaufs_xigen) -+ err = 0; -+ -+ return err; -+} -+#else -+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo) -+{ -+ return 0; -+} -+#endif /* CONFIG_AUFS_EXPORT */ -+ -+/* ---------------------------------------------------------------------- */ -+ -+void dbgaufs_si_fin(struct au_sbinfo *sbinfo) -+{ -+ /* -+ * This function is a dynamic '__init' fucntion actually, -+ * so the tiny check for si_rwsem is unnecessary. -+ */ -+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ -+ -+ debugfs_remove_recursive(sbinfo->si_dbgaufs); -+ sbinfo->si_dbgaufs = NULL; -+ kobject_put(&sbinfo->si_kobj); -+} -+ -+int dbgaufs_si_init(struct au_sbinfo *sbinfo) -+{ -+ int err; -+ char name[SysaufsSiNameLen]; -+ -+ /* -+ * This function is a dynamic '__init' fucntion actually, -+ * so the tiny check for si_rwsem is unnecessary. -+ */ -+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ -+ -+ err = -ENOENT; -+ if (!dbgaufs) { -+ AuErr1("/debug/aufs is uninitialized\n"); -+ goto out; -+ } -+ -+ err = -EIO; -+ sysaufs_name(sbinfo, name); -+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs); -+ if (unlikely(!sbinfo->si_dbgaufs)) -+ goto out; -+ kobject_get(&sbinfo->si_kobj); -+ -+ sbinfo->si_dbgaufs_xib = debugfs_create_file -+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo, -+ &dbgaufs_xib_fop); -+ if (unlikely(!sbinfo->si_dbgaufs_xib)) -+ goto out_dir; -+ -+ sbinfo->si_dbgaufs_plink = debugfs_create_file -+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo, -+ &dbgaufs_plink_fop); -+ if (unlikely(!sbinfo->si_dbgaufs_plink)) -+ goto out_dir; -+ -+ err = dbgaufs_xigen_init(sbinfo); -+ if (!err) -+ goto out; /* success */ -+ -+out_dir: -+ dbgaufs_si_fin(sbinfo); -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void dbgaufs_fin(void) -+{ -+ debugfs_remove(dbgaufs); -+} -+ -+int __init dbgaufs_init(void) -+{ -+ int err; -+ -+ err = -EIO; -+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL); -+ if (dbgaufs) -+ err = 0; -+ return err; -+} -diff --git fs/aufs/dbgaufs.h fs/aufs/dbgaufs.h -new file mode 100644 -index 0000000..31947fb ---- /dev/null -+++ fs/aufs/dbgaufs.h -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * debugfs interface -+ */ -+ -+#ifndef __DBGAUFS_H__ -+#define __DBGAUFS_H__ -+ -+#ifdef __KERNEL__ -+ -+struct super_block; -+struct au_sbinfo; -+ -+#ifdef CONFIG_DEBUG_FS -+/* dbgaufs.c */ -+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex); -+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex); -+void dbgaufs_si_fin(struct au_sbinfo *sbinfo); -+int dbgaufs_si_init(struct au_sbinfo *sbinfo); -+void dbgaufs_fin(void); -+int __init dbgaufs_init(void); -+#else -+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex) -+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex) -+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo) -+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo) -+AuStubVoid(dbgaufs_fin, void) -+AuStubInt0(__init dbgaufs_init, void) -+#endif /* CONFIG_DEBUG_FS */ -+ -+#endif /* __KERNEL__ */ -+#endif /* __DBGAUFS_H__ */ -diff --git fs/aufs/dcsub.c fs/aufs/dcsub.c -new file mode 100644 -index 0000000..6c48003 ---- /dev/null -+++ fs/aufs/dcsub.c -@@ -0,0 +1,230 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * sub-routines for dentry cache -+ */ -+ -+#include "aufs.h" -+ -+static void au_dpage_free(struct au_dpage *dpage) -+{ -+ int i; -+ struct dentry **p; -+ -+ p = dpage->dentries; -+ for (i = 0; i < dpage->ndentry; i++) -+ dput(*p++); -+ free_page((unsigned long)dpage->dentries); -+} -+ -+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp) -+{ -+ int err; -+ void *p; -+ -+ err = -ENOMEM; -+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp); -+ if (unlikely(!dpages->dpages)) -+ goto out; -+ -+ p = (void *)__get_free_page(gfp); -+ if (unlikely(!p)) -+ goto out_dpages; -+ -+ dpages->dpages[0].ndentry = 0; -+ dpages->dpages[0].dentries = p; -+ dpages->ndpage = 1; -+ return 0; /* success */ -+ -+out_dpages: -+ kfree(dpages->dpages); -+out: -+ return err; -+} -+ -+void au_dpages_free(struct au_dcsub_pages *dpages) -+{ -+ int i; -+ struct au_dpage *p; -+ -+ p = dpages->dpages; -+ for (i = 0; i < dpages->ndpage; i++) -+ au_dpage_free(p++); -+ kfree(dpages->dpages); -+} -+ -+static int au_dpages_append(struct au_dcsub_pages *dpages, -+ struct dentry *dentry, gfp_t gfp) -+{ -+ int err, sz; -+ struct au_dpage *dpage; -+ void *p; -+ -+ dpage = dpages->dpages + dpages->ndpage - 1; -+ sz = PAGE_SIZE / sizeof(dentry); -+ if (unlikely(dpage->ndentry >= sz)) { -+ AuLabel(new dpage); -+ err = -ENOMEM; -+ sz = dpages->ndpage * sizeof(*dpages->dpages); -+ p = au_kzrealloc(dpages->dpages, sz, -+ sz + sizeof(*dpages->dpages), gfp); -+ if (unlikely(!p)) -+ goto out; -+ -+ dpages->dpages = p; -+ dpage = dpages->dpages + dpages->ndpage; -+ p = (void *)__get_free_page(gfp); -+ if (unlikely(!p)) -+ goto out; -+ -+ dpage->ndentry = 0; -+ dpage->dentries = p; -+ dpages->ndpage++; -+ } -+ -+ AuDebugOn(!d_count(dentry)); -+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry); -+ return 0; /* success */ -+ -+out: -+ return err; -+} -+ -+/* try d_walk() in linux/fs/dcache.c */ -+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root, -+ au_dpages_test test, void *arg) -+{ -+ int err; -+ struct dentry *this_parent; -+ struct list_head *next; -+ struct super_block *sb = root->d_sb; -+ -+ err = 0; -+ write_seqlock(&rename_lock); -+ this_parent = root; -+ spin_lock(&this_parent->d_lock); -+repeat: -+ next = this_parent->d_subdirs.next; -+resume: -+ if (this_parent->d_sb == sb -+ && !IS_ROOT(this_parent) -+ && au_di(this_parent) -+ && d_count(this_parent) -+ && (!test || test(this_parent, arg))) { -+ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+ while (next != &this_parent->d_subdirs) { -+ struct list_head *tmp = next; -+ struct dentry *dentry = list_entry(tmp, struct dentry, -+ d_u.d_child); -+ -+ next = tmp->next; -+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); -+ if (d_count(dentry)) { -+ if (!list_empty(&dentry->d_subdirs)) { -+ spin_unlock(&this_parent->d_lock); -+ spin_release(&dentry->d_lock.dep_map, 1, -+ _RET_IP_); -+ this_parent = dentry; -+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, -+ _RET_IP_); -+ goto repeat; -+ } -+ if (dentry->d_sb == sb -+ && au_di(dentry) -+ && (!test || test(dentry, arg))) -+ err = au_dpages_append(dpages, dentry, -+ GFP_ATOMIC); -+ } -+ spin_unlock(&dentry->d_lock); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+ if (this_parent != root) { -+ struct dentry *tmp; -+ struct dentry *child; -+ -+ tmp = this_parent->d_parent; -+ rcu_read_lock(); -+ spin_unlock(&this_parent->d_lock); -+ child = this_parent; -+ this_parent = tmp; -+ spin_lock(&this_parent->d_lock); -+ rcu_read_unlock(); -+ next = child->d_u.d_child.next; -+ goto resume; -+ } -+ -+out: -+ spin_unlock(&this_parent->d_lock); -+ write_sequnlock(&rename_lock); -+ return err; -+} -+ -+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry, -+ int do_include, au_dpages_test test, void *arg) -+{ -+ int err; -+ -+ err = 0; -+ write_seqlock(&rename_lock); -+ spin_lock(&dentry->d_lock); -+ if (do_include -+ && d_count(dentry) -+ && (!test || test(dentry, arg))) -+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC); -+ spin_unlock(&dentry->d_lock); -+ if (unlikely(err)) -+ goto out; -+ -+ /* -+ * RCU for vfsmount is unnecessary since this is a traverse in a single -+ * mount -+ */ -+ while (!IS_ROOT(dentry)) { -+ dentry = dentry->d_parent; /* rename_lock is locked */ -+ spin_lock(&dentry->d_lock); -+ if (d_count(dentry) -+ && (!test || test(dentry, arg))) -+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC); -+ spin_unlock(&dentry->d_lock); -+ if (unlikely(err)) -+ break; -+ } -+ -+out: -+ write_sequnlock(&rename_lock); -+ return err; -+} -+ -+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg) -+{ -+ return au_di(dentry) && dentry->d_sb == arg; -+} -+ -+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages, -+ struct dentry *dentry, int do_include) -+{ -+ return au_dcsub_pages_rev(dpages, dentry, do_include, -+ au_dcsub_dpages_aufs, dentry->d_sb); -+} -+ -+int au_test_subdir(struct dentry *d1, struct dentry *d2) -+{ -+ struct path path[2] = { -+ { -+ .dentry = d1 -+ }, -+ { -+ .dentry = d2 -+ } -+ }; -+ -+ return path_is_under(path + 0, path + 1); -+} -diff --git fs/aufs/dcsub.h fs/aufs/dcsub.h -new file mode 100644 -index 0000000..75d3e39 ---- /dev/null -+++ fs/aufs/dcsub.h -@@ -0,0 +1,107 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * sub-routines for dentry cache -+ */ -+ -+#ifndef __AUFS_DCSUB_H__ -+#define __AUFS_DCSUB_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include -+ -+struct dentry; -+ -+struct au_dpage { -+ int ndentry; -+ struct dentry **dentries; -+}; -+ -+struct au_dcsub_pages { -+ int ndpage; -+ struct au_dpage *dpages; -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* dcsub.c */ -+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp); -+void au_dpages_free(struct au_dcsub_pages *dpages); -+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg); -+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root, -+ au_dpages_test test, void *arg); -+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry, -+ int do_include, au_dpages_test test, void *arg); -+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages, -+ struct dentry *dentry, int do_include); -+int au_test_subdir(struct dentry *d1, struct dentry *d2); -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * todo: in linux-3.13, several similar (but faster) helpers are added to -+ * include/linux/dcache.h. Try them (in the future). -+ */ -+ -+static inline int au_d_hashed_positive(struct dentry *d) -+{ -+ int err; -+ struct inode *inode = d->d_inode; -+ -+ err = 0; -+ if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink)) -+ err = -ENOENT; -+ return err; -+} -+ -+static inline int au_d_linkable(struct dentry *d) -+{ -+ int err; -+ struct inode *inode = d->d_inode; -+ -+ err = au_d_hashed_positive(d); -+ if (err -+ && inode -+ && (inode->i_state & I_LINKABLE)) -+ err = 0; -+ return err; -+} -+ -+static inline int au_d_alive(struct dentry *d) -+{ -+ int err; -+ struct inode *inode; -+ -+ err = 0; -+ if (!IS_ROOT(d)) -+ err = au_d_hashed_positive(d); -+ else { -+ inode = d->d_inode; -+ if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink)) -+ err = -ENOENT; -+ } -+ return err; -+} -+ -+static inline int au_alive_dir(struct dentry *d) -+{ -+ int err; -+ -+ err = au_d_alive(d); -+ if (unlikely(err || IS_DEADDIR(d->d_inode))) -+ err = -ENOENT; -+ return err; -+} -+ -+static inline int au_qstreq(struct qstr *a, struct qstr *b) -+{ -+ return a->len == b->len -+ && !memcmp(a->name, b->name, a->len); -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_DCSUB_H__ */ -diff --git fs/aufs/debug.c fs/aufs/debug.c -new file mode 100644 -index 0000000..9e0e9ec ---- /dev/null -+++ fs/aufs/debug.c -@@ -0,0 +1,506 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * debug print functions -+ */ -+ -+#include -+#include "aufs.h" -+ -+/* Returns 0, or -errno. arg is in kp->arg. */ -+static int param_atomic_t_set(const char *val, const struct kernel_param *kp) -+{ -+ int err, n; -+ -+ err = kstrtoint(val, 0, &n); -+ if (!err) { -+ if (n > 0) -+ au_debug_on(); -+ else -+ au_debug_off(); -+ } -+ return err; -+} -+ -+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */ -+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp) -+{ -+ atomic_t *a; -+ -+ a = kp->arg; -+ return sprintf(buffer, "%d", atomic_read(a)); -+} -+ -+static struct kernel_param_ops param_ops_atomic_t = { -+ .set = param_atomic_t_set, -+ .get = param_atomic_t_get -+ /* void (*free)(void *arg) */ -+}; -+ -+atomic_t aufs_debug = ATOMIC_INIT(0); -+MODULE_PARM_DESC(debug, "debug print"); -+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP); -+ -+char *au_plevel = KERN_DEBUG; -+#define dpri(fmt, ...) do { \ -+ if ((au_plevel \ -+ && strcmp(au_plevel, KERN_DEBUG)) \ -+ || au_debug_test()) \ -+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \ -+} while (0) -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_dpri_whlist(struct au_nhash *whlist) -+{ -+ unsigned long ul, n; -+ struct hlist_head *head; -+ struct au_vdir_wh *pos; -+ -+ n = whlist->nh_num; -+ head = whlist->nh_head; -+ for (ul = 0; ul < n; ul++) { -+ hlist_for_each_entry(pos, head, wh_hash) -+ dpri("b%d, %.*s, %d\n", -+ pos->wh_bindex, -+ pos->wh_str.len, pos->wh_str.name, -+ pos->wh_str.len); -+ head++; -+ } -+} -+ -+void au_dpri_vdir(struct au_vdir *vdir) -+{ -+ unsigned long ul; -+ union au_vdir_deblk_p p; -+ unsigned char *o; -+ -+ if (!vdir || IS_ERR(vdir)) { -+ dpri("err %ld\n", PTR_ERR(vdir)); -+ return; -+ } -+ -+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n", -+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk, -+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version); -+ for (ul = 0; ul < vdir->vd_nblk; ul++) { -+ p.deblk = vdir->vd_deblk[ul]; -+ o = p.deblk; -+ dpri("[%lu]: %p\n", ul, o); -+ } -+} -+ -+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn, -+ struct dentry *wh) -+{ -+ char *n = NULL; -+ int l = 0; -+ -+ if (!inode || IS_ERR(inode)) { -+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode)); -+ return -1; -+ } -+ -+ /* the type of i_blocks depends upon CONFIG_LBDAF */ -+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long) -+ && sizeof(inode->i_blocks) != sizeof(u64)); -+ if (wh) { -+ n = (void *)wh->d_name.name; -+ l = wh->d_name.len; -+ } -+ -+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu," -+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n", -+ bindex, inode, -+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??", -+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode, -+ i_size_read(inode), (unsigned long long)inode->i_blocks, -+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff, -+ inode->i_mapping ? inode->i_mapping->nrpages : 0, -+ inode->i_state, inode->i_flags, inode->i_version, -+ inode->i_generation, -+ l ? ", wh " : "", l, n); -+ return 0; -+} -+ -+void au_dpri_inode(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; -+ aufs_bindex_t bindex; -+ int err, hn; -+ -+ err = do_pri_inode(-1, inode, -1, NULL); -+ if (err || !au_test_aufs(inode->i_sb)) -+ return; -+ -+ iinfo = au_ii(inode); -+ if (!iinfo) -+ return; -+ dpri("i-1: bstart %d, bend %d, gen %d\n", -+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode, NULL)); -+ if (iinfo->ii_bstart < 0) -+ return; -+ hn = 0; -+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) { -+ hn = !!au_hn(iinfo->ii_hinode + bindex); -+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn, -+ iinfo->ii_hinode[0 + bindex].hi_whdentry); -+ } -+} -+ -+void au_dpri_dalias(struct inode *inode) -+{ -+ struct dentry *d; -+ -+ spin_lock(&inode->i_lock); -+ hlist_for_each_entry(d, &inode->i_dentry, d_alias) -+ au_dpri_dentry(d); -+ spin_unlock(&inode->i_lock); -+} -+ -+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry) -+{ -+ struct dentry *wh = NULL; -+ int hn; -+ struct au_iinfo *iinfo; -+ -+ if (!dentry || IS_ERR(dentry)) { -+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry)); -+ return -1; -+ } -+ /* do not call dget_parent() here */ -+ /* note: access d_xxx without d_lock */ -+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n", -+ bindex, dentry, dentry, -+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??", -+ d_count(dentry), dentry->d_flags, -+ d_unhashed(dentry) ? "un" : ""); -+ hn = -1; -+ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) { -+ iinfo = au_ii(dentry->d_inode); -+ if (iinfo) { -+ hn = !!au_hn(iinfo->ii_hinode + bindex); -+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry; -+ } -+ } -+ do_pri_inode(bindex, dentry->d_inode, hn, wh); -+ return 0; -+} -+ -+void au_dpri_dentry(struct dentry *dentry) -+{ -+ struct au_dinfo *dinfo; -+ aufs_bindex_t bindex; -+ int err; -+ struct au_hdentry *hdp; -+ -+ err = do_pri_dentry(-1, dentry); -+ if (err || !au_test_aufs(dentry->d_sb)) -+ return; -+ -+ dinfo = au_di(dentry); -+ if (!dinfo) -+ return; -+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d, tmp %d\n", -+ dinfo->di_bstart, dinfo->di_bend, -+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry), -+ dinfo->di_tmpfile); -+ if (dinfo->di_bstart < 0) -+ return; -+ hdp = dinfo->di_hdentry; -+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++) -+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry); -+} -+ -+static int do_pri_file(aufs_bindex_t bindex, struct file *file) -+{ -+ char a[32]; -+ -+ if (!file || IS_ERR(file)) { -+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file)); -+ return -1; -+ } -+ a[0] = 0; -+ if (bindex < 0 -+ && file->f_dentry -+ && au_test_aufs(file->f_dentry->d_sb) -+ && au_fi(file)) -+ snprintf(a, sizeof(a), ", gen %d, mmapped %d", -+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped)); -+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n", -+ bindex, file->f_mode, file->f_flags, (long)file_count(file), -+ file->f_version, file->f_pos, a); -+ if (file->f_dentry) -+ do_pri_dentry(bindex, file->f_dentry); -+ return 0; -+} -+ -+void au_dpri_file(struct file *file) -+{ -+ struct au_finfo *finfo; -+ struct au_fidir *fidir; -+ struct au_hfile *hfile; -+ aufs_bindex_t bindex; -+ int err; -+ -+ err = do_pri_file(-1, file); -+ if (err || !file->f_dentry || !au_test_aufs(file->f_dentry->d_sb)) -+ return; -+ -+ finfo = au_fi(file); -+ if (!finfo) -+ return; -+ if (finfo->fi_btop < 0) -+ return; -+ fidir = finfo->fi_hdir; -+ if (!fidir) -+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file); -+ else -+ for (bindex = finfo->fi_btop; -+ bindex >= 0 && bindex <= fidir->fd_bbot; -+ bindex++) { -+ hfile = fidir->fd_hfile + bindex; -+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL); -+ } -+} -+ -+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br) -+{ -+ struct vfsmount *mnt; -+ struct super_block *sb; -+ -+ if (!br || IS_ERR(br)) -+ goto out; -+ mnt = au_br_mnt(br); -+ if (!mnt || IS_ERR(mnt)) -+ goto out; -+ sb = mnt->mnt_sb; -+ if (!sb || IS_ERR(sb)) -+ goto out; -+ -+ dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, " -+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, " -+ "xino %d\n", -+ bindex, br->br_perm, br->br_id, atomic_read(&br->br_count), -+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev), -+ sb->s_flags, sb->s_count, -+ atomic_read(&sb->s_active), !!br->br_xino.xi_file); -+ return 0; -+ -+out: -+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br)); -+ return -1; -+} -+ -+void au_dpri_sb(struct super_block *sb) -+{ -+ struct au_sbinfo *sbinfo; -+ aufs_bindex_t bindex; -+ int err; -+ /* to reuduce stack size */ -+ struct { -+ struct vfsmount mnt; -+ struct au_branch fake; -+ } *a; -+ -+ /* this function can be called from magic sysrq */ -+ a = kzalloc(sizeof(*a), GFP_ATOMIC); -+ if (unlikely(!a)) { -+ dpri("no memory\n"); -+ return; -+ } -+ -+ a->mnt.mnt_sb = sb; -+ a->fake.br_perm = 0; -+ a->fake.br_path.mnt = &a->mnt; -+ a->fake.br_xino.xi_file = NULL; -+ atomic_set(&a->fake.br_count, 0); -+ smp_mb(); /* atomic_set */ -+ err = do_pri_br(-1, &a->fake); -+ kfree(a); -+ dpri("dev 0x%x\n", sb->s_dev); -+ if (err || !au_test_aufs(sb)) -+ return; -+ -+ sbinfo = au_sbi(sb); -+ if (!sbinfo) -+ return; -+ dpri("nw %d, gen %u, kobj %d\n", -+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation, -+ atomic_read(&sbinfo->si_kobj.kref.refcount)); -+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++) -+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_dbg_sleep_jiffy(int jiffy) -+{ -+ while (jiffy) -+ jiffy = schedule_timeout_uninterruptible(jiffy); -+} -+ -+void au_dbg_iattr(struct iattr *ia) -+{ -+#define AuBit(name) \ -+ do { \ -+ if (ia->ia_valid & ATTR_ ## name) \ -+ dpri(#name "\n"); \ -+ } while (0) -+ AuBit(MODE); -+ AuBit(UID); -+ AuBit(GID); -+ AuBit(SIZE); -+ AuBit(ATIME); -+ AuBit(MTIME); -+ AuBit(CTIME); -+ AuBit(ATIME_SET); -+ AuBit(MTIME_SET); -+ AuBit(FORCE); -+ AuBit(ATTR_FLAG); -+ AuBit(KILL_SUID); -+ AuBit(KILL_SGID); -+ AuBit(FILE); -+ AuBit(KILL_PRIV); -+ AuBit(OPEN); -+ AuBit(TIMES_SET); -+#undef AuBit -+ dpri("ia_file %p\n", ia->ia_file); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line) -+{ -+ struct inode *h_inode, *inode = dentry->d_inode; -+ struct dentry *h_dentry; -+ aufs_bindex_t bindex, bend, bi; -+ -+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */) -+ return; -+ -+ bend = au_dbend(dentry); -+ bi = au_ibend(inode); -+ if (bi < bend) -+ bend = bi; -+ bindex = au_dbstart(dentry); -+ bi = au_ibstart(inode); -+ if (bi > bindex) -+ bindex = bi; -+ -+ for (; bindex <= bend; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; -+ h_inode = au_h_iptr(inode, bindex); -+ if (unlikely(h_inode != h_dentry->d_inode)) { -+ au_debug_on(); -+ AuDbg("b%d, %s:%d\n", bindex, func, line); -+ AuDbgDentry(dentry); -+ AuDbgInode(inode); -+ au_debug_off(); -+ BUG(); -+ } -+ } -+} -+ -+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen) -+{ -+ struct dentry *parent; -+ -+ parent = dget_parent(dentry); -+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)); -+ AuDebugOn(IS_ROOT(dentry)); -+ AuDebugOn(au_digen_test(parent, sigen)); -+ dput(parent); -+} -+ -+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen) -+{ -+ struct dentry *parent; -+ struct inode *inode; -+ -+ parent = dget_parent(dentry); -+ inode = dentry->d_inode; -+ AuDebugOn(inode && S_ISDIR(dentry->d_inode->i_mode)); -+ AuDebugOn(au_digen_test(parent, sigen)); -+ dput(parent); -+} -+ -+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen) -+{ -+ int err, i, j; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry **dentries; -+ -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ AuDebugOn(err); -+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1); -+ AuDebugOn(err); -+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) { -+ dpage = dpages.dpages + i; -+ dentries = dpage->dentries; -+ for (j = dpage->ndentry - 1; !err && j >= 0; j--) -+ AuDebugOn(au_digen_test(dentries[j], sigen)); -+ } -+ au_dpages_free(&dpages); -+} -+ -+void au_dbg_verify_kthread(void) -+{ -+ if (au_wkq_test()) { -+ au_dbg_blocked(); -+ /* -+ * It may be recursive, but udba=notify between two aufs mounts, -+ * where a single ro branch is shared, is not a problem. -+ */ -+ /* WARN_ON(1); */ -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo __maybe_unused) -+{ -+#ifdef AuForceNoPlink -+ au_opt_clr(sbinfo->si_mntflags, PLINK); -+#endif -+#ifdef AuForceNoXino -+ au_opt_clr(sbinfo->si_mntflags, XINO); -+#endif -+#ifdef AuForceNoRefrof -+ au_opt_clr(sbinfo->si_mntflags, REFROF); -+#endif -+#ifdef AuForceHnotify -+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_HNOTIFY); -+#endif -+#ifdef AuForceRd0 -+ sbinfo->si_rdblk = 0; -+ sbinfo->si_rdhash = 0; -+#endif -+} -+ -+int __init au_debug_init(void) -+{ -+ aufs_bindex_t bindex; -+ struct au_vdir_destr destr; -+ -+ bindex = -1; -+ AuDebugOn(bindex >= 0); -+ -+ destr.len = -1; -+ AuDebugOn(destr.len < NAME_MAX); -+ -+#ifdef CONFIG_4KSTACKS -+ pr_warn("CONFIG_4KSTACKS is defined.\n"); -+#endif -+ -+#ifdef AuForceNoBrs -+ sysaufs_brs = 0; -+#endif -+ -+ return 0; -+} -diff --git fs/aufs/debug.h fs/aufs/debug.h -new file mode 100644 -index 0000000..09727de ---- /dev/null -+++ fs/aufs/debug.h -@@ -0,0 +1,234 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * debug print functions -+ */ -+ -+#ifndef __AUFS_DEBUG_H__ -+#define __AUFS_DEBUG_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_AUFS_DEBUG -+#define AuDebugOn(a) BUG_ON(a) -+ -+/* module parameter */ -+extern atomic_t aufs_debug; -+static inline void au_debug_on(void) -+{ -+ atomic_inc(&aufs_debug); -+} -+static inline void au_debug_off(void) -+{ -+ atomic_dec_if_positive(&aufs_debug); -+} -+ -+static inline int au_debug_test(void) -+{ -+ return atomic_read(&aufs_debug) > 0; -+} -+#else -+#define AuDebugOn(a) do {} while (0) -+AuStubVoid(au_debug_on, void) -+AuStubVoid(au_debug_off, void) -+AuStubInt0(au_debug_test, void) -+#endif /* CONFIG_AUFS_DEBUG */ -+ -+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t) -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* debug print */ -+ -+#define AuDbg(fmt, ...) do { \ -+ if (au_debug_test()) \ -+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \ -+} while (0) -+#define AuLabel(l) AuDbg(#l "\n") -+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__) -+#define AuWarn1(fmt, ...) do { \ -+ static unsigned char _c; \ -+ if (!_c++) \ -+ pr_warn(fmt, ##__VA_ARGS__); \ -+} while (0) -+ -+#define AuErr1(fmt, ...) do { \ -+ static unsigned char _c; \ -+ if (!_c++) \ -+ pr_err(fmt, ##__VA_ARGS__); \ -+} while (0) -+ -+#define AuIOErr1(fmt, ...) do { \ -+ static unsigned char _c; \ -+ if (!_c++) \ -+ AuIOErr(fmt, ##__VA_ARGS__); \ -+} while (0) -+ -+#define AuUnsupportMsg "This operation is not supported." \ -+ " Please report this application to aufs-users ML." -+#define AuUnsupport(fmt, ...) do { \ -+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \ -+ dump_stack(); \ -+} while (0) -+ -+#define AuTraceErr(e) do { \ -+ if (unlikely((e) < 0)) \ -+ AuDbg("err %d\n", (int)(e)); \ -+} while (0) -+ -+#define AuTraceErrPtr(p) do { \ -+ if (IS_ERR(p)) \ -+ AuDbg("err %ld\n", PTR_ERR(p)); \ -+} while (0) -+ -+/* dirty macros for debug print, use with "%.*s" and caution */ -+#define AuLNPair(qstr) (qstr)->len, (qstr)->name -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_sbinfo; -+struct au_finfo; -+struct dentry; -+#ifdef CONFIG_AUFS_DEBUG -+extern char *au_plevel; -+struct au_nhash; -+void au_dpri_whlist(struct au_nhash *whlist); -+struct au_vdir; -+void au_dpri_vdir(struct au_vdir *vdir); -+struct inode; -+void au_dpri_inode(struct inode *inode); -+void au_dpri_dalias(struct inode *inode); -+void au_dpri_dentry(struct dentry *dentry); -+struct file; -+void au_dpri_file(struct file *filp); -+struct super_block; -+void au_dpri_sb(struct super_block *sb); -+ -+void au_dbg_sleep_jiffy(int jiffy); -+struct iattr; -+void au_dbg_iattr(struct iattr *ia); -+ -+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__) -+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line); -+void au_dbg_verify_dir_parent(struct dentry *dentry, unsigned int sigen); -+void au_dbg_verify_nondir_parent(struct dentry *dentry, unsigned int sigen); -+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen); -+void au_dbg_verify_kthread(void); -+ -+int __init au_debug_init(void); -+void au_debug_sbinfo_init(struct au_sbinfo *sbinfo); -+#define AuDbgWhlist(w) do { \ -+ AuDbg(#w "\n"); \ -+ au_dpri_whlist(w); \ -+} while (0) -+ -+#define AuDbgVdir(v) do { \ -+ AuDbg(#v "\n"); \ -+ au_dpri_vdir(v); \ -+} while (0) -+ -+#define AuDbgInode(i) do { \ -+ AuDbg(#i "\n"); \ -+ au_dpri_inode(i); \ -+} while (0) -+ -+#define AuDbgDAlias(i) do { \ -+ AuDbg(#i "\n"); \ -+ au_dpri_dalias(i); \ -+} while (0) -+ -+#define AuDbgDentry(d) do { \ -+ AuDbg(#d "\n"); \ -+ au_dpri_dentry(d); \ -+} while (0) -+ -+#define AuDbgFile(f) do { \ -+ AuDbg(#f "\n"); \ -+ au_dpri_file(f); \ -+} while (0) -+ -+#define AuDbgSb(sb) do { \ -+ AuDbg(#sb "\n"); \ -+ au_dpri_sb(sb); \ -+} while (0) -+ -+#define AuDbgSleep(sec) do { \ -+ AuDbg("sleep %d sec\n", sec); \ -+ ssleep(sec); \ -+} while (0) -+ -+#define AuDbgSleepJiffy(jiffy) do { \ -+ AuDbg("sleep %d jiffies\n", jiffy); \ -+ au_dbg_sleep_jiffy(jiffy); \ -+} while (0) -+ -+#define AuDbgIAttr(ia) do { \ -+ AuDbg("ia_valid 0x%x\n", (ia)->ia_valid); \ -+ au_dbg_iattr(ia); \ -+} while (0) -+ -+#define AuDbgSym(addr) do { \ -+ char sym[KSYM_SYMBOL_LEN]; \ -+ sprint_symbol(sym, (unsigned long)addr); \ -+ AuDbg("%s\n", sym); \ -+} while (0) -+ -+#define AuInfoSym(addr) do { \ -+ char sym[KSYM_SYMBOL_LEN]; \ -+ sprint_symbol(sym, (unsigned long)addr); \ -+ AuInfo("%s\n", sym); \ -+} while (0) -+#else -+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry) -+AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen) -+AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry, -+ unsigned int sigen) -+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen) -+AuStubVoid(au_dbg_verify_kthread, void) -+AuStubInt0(__init au_debug_init, void) -+AuStubVoid(au_debug_sbinfo_init, struct au_sbinfo *sbinfo) -+ -+#define AuDbgWhlist(w) do {} while (0) -+#define AuDbgVdir(v) do {} while (0) -+#define AuDbgInode(i) do {} while (0) -+#define AuDbgDAlias(i) do {} while (0) -+#define AuDbgDentry(d) do {} while (0) -+#define AuDbgFile(f) do {} while (0) -+#define AuDbgSb(sb) do {} while (0) -+#define AuDbgSleep(sec) do {} while (0) -+#define AuDbgSleepJiffy(jiffy) do {} while (0) -+#define AuDbgIAttr(ia) do {} while (0) -+#define AuDbgSym(addr) do {} while (0) -+#define AuInfoSym(addr) do {} while (0) -+#endif /* CONFIG_AUFS_DEBUG */ -+ -+/* ---------------------------------------------------------------------- */ -+ -+#ifdef CONFIG_AUFS_MAGIC_SYSRQ -+int __init au_sysrq_init(void); -+void au_sysrq_fin(void); -+ -+#ifdef CONFIG_HW_CONSOLE -+#define au_dbg_blocked() do { \ -+ WARN_ON(1); \ -+ handle_sysrq('w'); \ -+} while (0) -+#else -+AuStubVoid(au_dbg_blocked, void) -+#endif -+ -+#else -+AuStubInt0(__init au_sysrq_init, void) -+AuStubVoid(au_sysrq_fin, void) -+AuStubVoid(au_dbg_blocked, void) -+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */ -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_DEBUG_H__ */ -diff --git fs/aufs/dentry.c fs/aufs/dentry.c -new file mode 100644 -index 0000000..d305f93 ---- /dev/null -+++ fs/aufs/dentry.c -@@ -0,0 +1,1081 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * lookup and dentry operations -+ */ -+ -+#include -+#include "aufs.h" -+ -+#define AuLkup_ALLOW_NEG 1 -+#define AuLkup_IGNORE_PERM (1 << 1) -+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name) -+#define au_fset_lkup(flags, name) \ -+ do { (flags) |= AuLkup_##name; } while (0) -+#define au_fclr_lkup(flags, name) \ -+ do { (flags) &= ~AuLkup_##name; } while (0) -+ -+struct au_do_lookup_args { -+ unsigned int flags; -+ mode_t type; -+}; -+ -+/* -+ * returns positive/negative dentry, NULL or an error. -+ * NULL means whiteout-ed or not-found. -+ */ -+static struct dentry* -+au_do_lookup(struct dentry *h_parent, struct dentry *dentry, -+ aufs_bindex_t bindex, struct qstr *wh_name, -+ struct au_do_lookup_args *args) -+{ -+ struct dentry *h_dentry; -+ struct inode *h_inode, *inode; -+ struct au_branch *br; -+ int wh_found, opq; -+ unsigned char wh_able; -+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG); -+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags, -+ IGNORE_PERM); -+ -+ wh_found = 0; -+ br = au_sbr(dentry->d_sb, bindex); -+ wh_able = !!au_br_whable(br->br_perm); -+ if (wh_able) -+ wh_found = au_wh_test(h_parent, wh_name, /*try_sio*/0); -+ h_dentry = ERR_PTR(wh_found); -+ if (!wh_found) -+ goto real_lookup; -+ if (unlikely(wh_found < 0)) -+ goto out; -+ -+ /* We found a whiteout */ -+ /* au_set_dbend(dentry, bindex); */ -+ au_set_dbwh(dentry, bindex); -+ if (!allow_neg) -+ return NULL; /* success */ -+ -+real_lookup: -+ if (!ignore_perm) -+ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent); -+ else -+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent); -+ if (IS_ERR(h_dentry)) -+ goto out; -+ -+ h_inode = h_dentry->d_inode; -+ if (!h_inode) { -+ if (!allow_neg) -+ goto out_neg; -+ } else if (wh_found -+ || (args->type && args->type != (h_inode->i_mode & S_IFMT))) -+ goto out_neg; -+ -+ if (au_dbend(dentry) <= bindex) -+ au_set_dbend(dentry, bindex); -+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry)) -+ au_set_dbstart(dentry, bindex); -+ au_set_h_dptr(dentry, bindex, h_dentry); -+ -+ inode = dentry->d_inode; -+ if (!h_inode || !S_ISDIR(h_inode->i_mode) || !wh_able -+ || (inode && !S_ISDIR(inode->i_mode))) -+ goto out; /* success */ -+ -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ opq = au_diropq_test(h_dentry); -+ mutex_unlock(&h_inode->i_mutex); -+ if (opq > 0) -+ au_set_dbdiropq(dentry, bindex); -+ else if (unlikely(opq < 0)) { -+ au_set_h_dptr(dentry, bindex, NULL); -+ h_dentry = ERR_PTR(opq); -+ } -+ goto out; -+ -+out_neg: -+ dput(h_dentry); -+ h_dentry = NULL; -+out: -+ return h_dentry; -+} -+ -+static int au_test_shwh(struct super_block *sb, const struct qstr *name) -+{ -+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH) -+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))) -+ return -EPERM; -+ return 0; -+} -+ -+/* -+ * returns the number of lower positive dentries, -+ * otherwise an error. -+ * can be called at unlinking with @type is zero. -+ */ -+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type) -+{ -+ int npositive, err; -+ aufs_bindex_t bindex, btail, bdiropq; -+ unsigned char isdir, dirperm1; -+ struct qstr whname; -+ struct au_do_lookup_args args = { -+ .flags = 0, -+ .type = type -+ }; -+ const struct qstr *name = &dentry->d_name; -+ struct dentry *parent; -+ struct inode *inode; -+ struct super_block *sb; -+ -+ sb = dentry->d_sb; -+ err = au_test_shwh(sb, name); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_wh_name_alloc(&whname, name); -+ if (unlikely(err)) -+ goto out; -+ -+ inode = dentry->d_inode; -+ isdir = !!(inode && S_ISDIR(inode->i_mode)); -+ if (!type) -+ au_fset_lkup(args.flags, ALLOW_NEG); -+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1); -+ -+ npositive = 0; -+ parent = dget_parent(dentry); -+ btail = au_dbtaildir(parent); -+ for (bindex = bstart; bindex <= btail; bindex++) { -+ struct dentry *h_parent, *h_dentry; -+ struct inode *h_inode, *h_dir; -+ -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry) { -+ if (h_dentry->d_inode) -+ npositive++; -+ if (type != S_IFDIR) -+ break; -+ continue; -+ } -+ h_parent = au_h_dptr(parent, bindex); -+ if (!h_parent) -+ continue; -+ h_dir = h_parent->d_inode; -+ if (!h_dir || !S_ISDIR(h_dir->i_mode)) -+ continue; -+ -+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT); -+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname, -+ &args); -+ mutex_unlock(&h_dir->i_mutex); -+ err = PTR_ERR(h_dentry); -+ if (IS_ERR(h_dentry)) -+ goto out_parent; -+ au_fclr_lkup(args.flags, ALLOW_NEG); -+ if (dirperm1) -+ au_fset_lkup(args.flags, IGNORE_PERM); -+ -+ if (au_dbwh(dentry) >= 0) -+ break; -+ if (!h_dentry) -+ continue; -+ h_inode = h_dentry->d_inode; -+ if (!h_inode) -+ continue; -+ npositive++; -+ if (!args.type) -+ args.type = h_inode->i_mode & S_IFMT; -+ if (args.type != S_IFDIR) -+ break; -+ else if (isdir) { -+ /* the type of lower may be different */ -+ bdiropq = au_dbdiropq(dentry); -+ if (bdiropq >= 0 && bdiropq <= bindex) -+ break; -+ } -+ } -+ -+ if (npositive) { -+ AuLabel(positive); -+ au_update_dbstart(dentry); -+ } -+ err = npositive; -+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE) -+ && au_dbstart(dentry) < 0)) { -+ err = -EIO; -+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n", -+ dentry, err); -+ } -+ -+out_parent: -+ dput(parent); -+ kfree(whname.name); -+out: -+ return err; -+} -+ -+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent) -+{ -+ struct dentry *dentry; -+ int wkq_err; -+ -+ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC)) -+ dentry = vfsub_lkup_one(name, parent); -+ else { -+ struct vfsub_lkup_one_args args = { -+ .errp = &dentry, -+ .name = name, -+ .parent = parent -+ }; -+ -+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args); -+ if (unlikely(wkq_err)) -+ dentry = ERR_PTR(wkq_err); -+ } -+ -+ return dentry; -+} -+ -+/* -+ * lookup @dentry on @bindex which should be negative. -+ */ -+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh) -+{ -+ int err; -+ struct dentry *parent, *h_parent, *h_dentry; -+ struct au_branch *br; -+ -+ parent = dget_parent(dentry); -+ h_parent = au_h_dptr(parent, bindex); -+ br = au_sbr(dentry->d_sb, bindex); -+ if (wh) -+ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name); -+ else -+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent); -+ err = PTR_ERR(h_dentry); -+ if (IS_ERR(h_dentry)) -+ goto out; -+ if (unlikely(h_dentry->d_inode)) { -+ err = -EIO; -+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex); -+ dput(h_dentry); -+ goto out; -+ } -+ -+ err = 0; -+ if (bindex < au_dbstart(dentry)) -+ au_set_dbstart(dentry, bindex); -+ if (au_dbend(dentry) < bindex) -+ au_set_dbend(dentry, bindex); -+ au_set_h_dptr(dentry, bindex, h_dentry); -+ -+out: -+ dput(parent); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* subset of struct inode */ -+struct au_iattr { -+ unsigned long i_ino; -+ /* unsigned int i_nlink; */ -+ kuid_t i_uid; -+ kgid_t i_gid; -+ u64 i_version; -+/* -+ loff_t i_size; -+ blkcnt_t i_blocks; -+*/ -+ umode_t i_mode; -+}; -+ -+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode) -+{ -+ ia->i_ino = h_inode->i_ino; -+ /* ia->i_nlink = h_inode->i_nlink; */ -+ ia->i_uid = h_inode->i_uid; -+ ia->i_gid = h_inode->i_gid; -+ ia->i_version = h_inode->i_version; -+/* -+ ia->i_size = h_inode->i_size; -+ ia->i_blocks = h_inode->i_blocks; -+*/ -+ ia->i_mode = (h_inode->i_mode & S_IFMT); -+} -+ -+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode) -+{ -+ return ia->i_ino != h_inode->i_ino -+ /* || ia->i_nlink != h_inode->i_nlink */ -+ || !uid_eq(ia->i_uid, h_inode->i_uid) -+ || !gid_eq(ia->i_gid, h_inode->i_gid) -+ || ia->i_version != h_inode->i_version -+/* -+ || ia->i_size != h_inode->i_size -+ || ia->i_blocks != h_inode->i_blocks -+*/ -+ || ia->i_mode != (h_inode->i_mode & S_IFMT); -+} -+ -+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent, -+ struct au_branch *br) -+{ -+ int err; -+ struct au_iattr ia; -+ struct inode *h_inode; -+ struct dentry *h_d; -+ struct super_block *h_sb; -+ -+ err = 0; -+ memset(&ia, -1, sizeof(ia)); -+ h_sb = h_dentry->d_sb; -+ h_inode = h_dentry->d_inode; -+ if (h_inode) -+ au_iattr_save(&ia, h_inode); -+ else if (au_test_nfs(h_sb) || au_test_fuse(h_sb)) -+ /* nfs d_revalidate may return 0 for negative dentry */ -+ /* fuse d_revalidate always return 0 for negative dentry */ -+ goto out; -+ -+ /* main purpose is namei.c:cached_lookup() and d_revalidate */ -+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent); -+ err = PTR_ERR(h_d); -+ if (IS_ERR(h_d)) -+ goto out; -+ -+ err = 0; -+ if (unlikely(h_d != h_dentry -+ || h_d->d_inode != h_inode -+ || (h_inode && au_iattr_test(&ia, h_inode)))) -+ err = au_busy_or_stale(); -+ dput(h_d); -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir, -+ struct dentry *h_parent, struct au_branch *br) -+{ -+ int err; -+ -+ err = 0; -+ if (udba == AuOpt_UDBA_REVAL -+ && !au_test_fs_remote(h_dentry->d_sb)) { -+ IMustLock(h_dir); -+ err = (h_dentry->d_parent->d_inode != h_dir); -+ } else if (udba != AuOpt_UDBA_NONE) -+ err = au_h_verify_dentry(h_dentry, h_parent, br); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent) -+{ -+ int err; -+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq; -+ struct au_hdentry tmp, *p, *q; -+ struct au_dinfo *dinfo; -+ struct super_block *sb; -+ -+ DiMustWriteLock(dentry); -+ -+ sb = dentry->d_sb; -+ dinfo = au_di(dentry); -+ bend = dinfo->di_bend; -+ bwh = dinfo->di_bwh; -+ bdiropq = dinfo->di_bdiropq; -+ p = dinfo->di_hdentry + dinfo->di_bstart; -+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) { -+ if (!p->hd_dentry) -+ continue; -+ -+ new_bindex = au_br_index(sb, p->hd_id); -+ if (new_bindex == bindex) -+ continue; -+ -+ if (dinfo->di_bwh == bindex) -+ bwh = new_bindex; -+ if (dinfo->di_bdiropq == bindex) -+ bdiropq = new_bindex; -+ if (new_bindex < 0) { -+ au_hdput(p); -+ p->hd_dentry = NULL; -+ continue; -+ } -+ -+ /* swap two lower dentries, and loop again */ -+ q = dinfo->di_hdentry + new_bindex; -+ tmp = *q; -+ *q = *p; -+ *p = tmp; -+ if (tmp.hd_dentry) { -+ bindex--; -+ p--; -+ } -+ } -+ -+ dinfo->di_bwh = -1; -+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh)) -+ dinfo->di_bwh = bwh; -+ -+ dinfo->di_bdiropq = -1; -+ if (bdiropq >= 0 -+ && bdiropq <= au_sbend(sb) -+ && au_sbr_whable(sb, bdiropq)) -+ dinfo->di_bdiropq = bdiropq; -+ -+ err = -EIO; -+ dinfo->di_bstart = -1; -+ dinfo->di_bend = -1; -+ bend = au_dbend(parent); -+ p = dinfo->di_hdentry; -+ for (bindex = 0; bindex <= bend; bindex++, p++) -+ if (p->hd_dentry) { -+ dinfo->di_bstart = bindex; -+ break; -+ } -+ -+ if (dinfo->di_bstart >= 0) { -+ p = dinfo->di_hdentry + bend; -+ for (bindex = bend; bindex >= 0; bindex--, p--) -+ if (p->hd_dentry) { -+ dinfo->di_bend = bindex; -+ err = 0; -+ break; -+ } -+ } -+ -+ return err; -+} -+ -+static void au_do_hide(struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ inode = dentry->d_inode; -+ if (inode) { -+ if (!S_ISDIR(inode->i_mode)) { -+ if (inode->i_nlink && !d_unhashed(dentry)) -+ drop_nlink(inode); -+ } else { -+ clear_nlink(inode); -+ /* stop next lookup */ -+ inode->i_flags |= S_DEAD; -+ } -+ smp_mb(); /* necessary? */ -+ } -+ d_drop(dentry); -+} -+ -+static int au_hide_children(struct dentry *parent) -+{ -+ int err, i, j, ndentry; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry *dentry; -+ -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_dcsub_pages(&dpages, parent, NULL, NULL); -+ if (unlikely(err)) -+ goto out_dpages; -+ -+ /* in reverse order */ -+ for (i = dpages.ndpage - 1; i >= 0; i--) { -+ dpage = dpages.dpages + i; -+ ndentry = dpage->ndentry; -+ for (j = ndentry - 1; j >= 0; j--) { -+ dentry = dpage->dentries[j]; -+ if (dentry != parent) -+ au_do_hide(dentry); -+ } -+ } -+ -+out_dpages: -+ au_dpages_free(&dpages); -+out: -+ return err; -+} -+ -+static void au_hide(struct dentry *dentry) -+{ -+ int err; -+ struct inode *inode; -+ -+ AuDbgDentry(dentry); -+ inode = dentry->d_inode; -+ if (inode && S_ISDIR(inode->i_mode)) { -+ /* shrink_dcache_parent(dentry); */ -+ err = au_hide_children(dentry); -+ if (unlikely(err)) -+ AuIOErr("%pd, failed hiding children, ignored %d\n", -+ dentry, err); -+ } -+ au_do_hide(dentry); -+} -+ -+/* -+ * By adding a dirty branch, a cached dentry may be affected in various ways. -+ * -+ * a dirty branch is added -+ * - on the top of layers -+ * - in the middle of layers -+ * - to the bottom of layers -+ * -+ * on the added branch there exists -+ * - a whiteout -+ * - a diropq -+ * - a same named entry -+ * + exist -+ * * negative --> positive -+ * * positive --> positive -+ * - type is unchanged -+ * - type is changed -+ * + doesn't exist -+ * * negative --> negative -+ * * positive --> negative (rejected by au_br_del() for non-dir case) -+ * - none -+ */ -+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo, -+ struct au_dinfo *tmp) -+{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ struct { -+ struct dentry *dentry; -+ struct inode *inode; -+ mode_t mode; -+ } orig_h, tmp_h; -+ struct au_hdentry *hd; -+ struct inode *inode, *h_inode; -+ struct dentry *h_dentry; -+ -+ err = 0; -+ AuDebugOn(dinfo->di_bstart < 0); -+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry; -+ orig_h.inode = orig_h.dentry->d_inode; -+ orig_h.mode = 0; -+ if (orig_h.inode) -+ orig_h.mode = orig_h.inode->i_mode & S_IFMT; -+ memset(&tmp_h, 0, sizeof(tmp_h)); -+ if (tmp->di_bstart >= 0) { -+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry; -+ tmp_h.inode = tmp_h.dentry->d_inode; -+ if (tmp_h.inode) -+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT; -+ } -+ -+ inode = dentry->d_inode; -+ if (!orig_h.inode) { -+ AuDbg("nagative originally\n"); -+ if (inode) { -+ au_hide(dentry); -+ goto out; -+ } -+ AuDebugOn(inode); -+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend); -+ AuDebugOn(dinfo->di_bdiropq != -1); -+ -+ if (!tmp_h.inode) { -+ AuDbg("negative --> negative\n"); -+ /* should have only one negative lower */ -+ if (tmp->di_bstart >= 0 -+ && tmp->di_bstart < dinfo->di_bstart) { -+ AuDebugOn(tmp->di_bstart != tmp->di_bend); -+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend); -+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL); -+ au_di_cp(dinfo, tmp); -+ hd = tmp->di_hdentry + tmp->di_bstart; -+ au_set_h_dptr(dentry, tmp->di_bstart, -+ dget(hd->hd_dentry)); -+ } -+ au_dbg_verify_dinode(dentry); -+ } else { -+ AuDbg("negative --> positive\n"); -+ /* -+ * similar to the behaviour of creating with bypassing -+ * aufs. -+ * unhash it in order to force an error in the -+ * succeeding create operation. -+ * we should not set S_DEAD here. -+ */ -+ d_drop(dentry); -+ /* au_di_swap(tmp, dinfo); */ -+ au_dbg_verify_dinode(dentry); -+ } -+ } else { -+ AuDbg("positive originally\n"); -+ /* inode may be NULL */ -+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode); -+ if (!tmp_h.inode) { -+ AuDbg("positive --> negative\n"); -+ /* or bypassing aufs */ -+ au_hide(dentry); -+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart) -+ dinfo->di_bwh = tmp->di_bwh; -+ if (inode) -+ err = au_refresh_hinode_self(inode); -+ au_dbg_verify_dinode(dentry); -+ } else if (orig_h.mode == tmp_h.mode) { -+ AuDbg("positive --> positive, same type\n"); -+ if (!S_ISDIR(orig_h.mode) -+ && dinfo->di_bstart > tmp->di_bstart) { -+ /* -+ * similar to the behaviour of removing and -+ * creating. -+ */ -+ au_hide(dentry); -+ if (inode) -+ err = au_refresh_hinode_self(inode); -+ au_dbg_verify_dinode(dentry); -+ } else { -+ /* fill empty slots */ -+ if (dinfo->di_bstart > tmp->di_bstart) -+ dinfo->di_bstart = tmp->di_bstart; -+ if (dinfo->di_bend < tmp->di_bend) -+ dinfo->di_bend = tmp->di_bend; -+ dinfo->di_bwh = tmp->di_bwh; -+ dinfo->di_bdiropq = tmp->di_bdiropq; -+ hd = tmp->di_hdentry; -+ bend = dinfo->di_bend; -+ for (bindex = tmp->di_bstart; bindex <= bend; -+ bindex++) { -+ if (au_h_dptr(dentry, bindex)) -+ continue; -+ h_dentry = hd[bindex].hd_dentry; -+ if (!h_dentry) -+ continue; -+ h_inode = h_dentry->d_inode; -+ AuDebugOn(!h_inode); -+ AuDebugOn(orig_h.mode -+ != (h_inode->i_mode -+ & S_IFMT)); -+ au_set_h_dptr(dentry, bindex, -+ dget(h_dentry)); -+ } -+ err = au_refresh_hinode(inode, dentry); -+ au_dbg_verify_dinode(dentry); -+ } -+ } else { -+ AuDbg("positive --> positive, different type\n"); -+ /* similar to the behaviour of removing and creating */ -+ au_hide(dentry); -+ if (inode) -+ err = au_refresh_hinode_self(inode); -+ au_dbg_verify_dinode(dentry); -+ } -+ } -+ -+out: -+ return err; -+} -+ -+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent) -+{ -+ int err, ebrange; -+ unsigned int sigen; -+ struct au_dinfo *dinfo, *tmp; -+ struct super_block *sb; -+ struct inode *inode; -+ -+ DiMustWriteLock(dentry); -+ AuDebugOn(IS_ROOT(dentry)); -+ AuDebugOn(!parent->d_inode); -+ -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ sigen = au_sigen(sb); -+ err = au_digen_test(parent, sigen); -+ if (unlikely(err)) -+ goto out; -+ -+ dinfo = au_di(dentry); -+ err = au_di_realloc(dinfo, au_sbend(sb) + 1); -+ if (unlikely(err)) -+ goto out; -+ ebrange = au_dbrange_test(dentry); -+ if (!ebrange) -+ ebrange = au_do_refresh_hdentry(dentry, parent); -+ -+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) { -+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0); -+ if (inode) -+ err = au_refresh_hinode_self(inode); -+ au_dbg_verify_dinode(dentry); -+ if (!err) -+ goto out_dgen; /* success */ -+ goto out; -+ } -+ -+ /* temporary dinfo */ -+ AuDbgDentry(dentry); -+ err = -ENOMEM; -+ tmp = au_di_alloc(sb, AuLsc_DI_TMP); -+ if (unlikely(!tmp)) -+ goto out; -+ au_di_swap(tmp, dinfo); -+ /* returns the number of positive dentries */ -+ /* -+ * if current working dir is removed, it returns an error. -+ * but the dentry is legal. -+ */ -+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0); -+ AuDbgDentry(dentry); -+ au_di_swap(tmp, dinfo); -+ if (err == -ENOENT) -+ err = 0; -+ if (err >= 0) { -+ /* compare/refresh by dinfo */ -+ AuDbgDentry(dentry); -+ err = au_refresh_by_dinfo(dentry, dinfo, tmp); -+ au_dbg_verify_dinode(dentry); -+ AuTraceErr(err); -+ } -+ au_rw_write_unlock(&tmp->di_rwsem); -+ au_di_free(tmp); -+ if (unlikely(err)) -+ goto out; -+ -+out_dgen: -+ au_update_digen(dentry); -+out: -+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) { -+ AuIOErr("failed refreshing %pd, %d\n", dentry, err); -+ AuDbgDentry(dentry); -+ } -+ AuTraceErr(err); -+ return err; -+} -+ -+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags, -+ struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ int err, valid; -+ -+ err = 0; -+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE)) -+ goto out; -+ -+ AuDbg("b%d\n", bindex); -+ /* -+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs, -+ * due to whiteout and branch permission. -+ */ -+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE -+ | LOOKUP_FOLLOW | LOOKUP_EXCL); -+ /* it may return tri-state */ -+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags); -+ -+ if (unlikely(valid < 0)) -+ err = valid; -+ else if (!valid) -+ err = -EINVAL; -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* todo: remove this */ -+static int h_d_revalidate(struct dentry *dentry, struct inode *inode, -+ unsigned int flags, int do_udba) -+{ -+ int err; -+ umode_t mode, h_mode; -+ aufs_bindex_t bindex, btail, bstart, ibs, ibe; -+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile; -+ struct inode *h_inode, *h_cached_inode; -+ struct dentry *h_dentry; -+ struct qstr *name, *h_name; -+ -+ err = 0; -+ plus = 0; -+ mode = 0; -+ ibs = -1; -+ ibe = -1; -+ unhashed = !!d_unhashed(dentry); -+ is_root = !!IS_ROOT(dentry); -+ name = &dentry->d_name; -+ tmpfile = au_di(dentry)->di_tmpfile; -+ -+ /* -+ * Theoretically, REVAL test should be unnecessary in case of -+ * {FS,I}NOTIFY. -+ * But {fs,i}notify doesn't fire some necessary events, -+ * IN_ATTRIB for atime/nlink/pageio -+ * Let's do REVAL test too. -+ */ -+ if (do_udba && inode) { -+ mode = (inode->i_mode & S_IFMT); -+ plus = (inode->i_nlink > 0); -+ ibs = au_ibstart(inode); -+ ibe = au_ibend(inode); -+ } -+ -+ bstart = au_dbstart(dentry); -+ btail = bstart; -+ if (inode && S_ISDIR(inode->i_mode)) -+ btail = au_dbtaildir(dentry); -+ for (bindex = bstart; bindex <= btail; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; -+ -+ AuDbg("b%d, %pd\n", bindex, h_dentry); -+ h_nfs = !!au_test_nfs(h_dentry->d_sb); -+ spin_lock(&h_dentry->d_lock); -+ h_name = &h_dentry->d_name; -+ if (unlikely(do_udba -+ && !is_root -+ && ((!h_nfs -+ && (unhashed != !!d_unhashed(h_dentry) -+ || (!tmpfile -+ && !au_qstreq(name, h_name)) -+ )) -+ || (h_nfs -+ && !(flags & LOOKUP_OPEN) -+ && (h_dentry->d_flags -+ & DCACHE_NFSFS_RENAMED))) -+ )) { -+ int h_unhashed; -+ -+ h_unhashed = d_unhashed(h_dentry); -+ spin_unlock(&h_dentry->d_lock); -+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n", -+ unhashed, h_unhashed, dentry, h_dentry); -+ goto err; -+ } -+ spin_unlock(&h_dentry->d_lock); -+ -+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex); -+ if (unlikely(err)) -+ /* do not goto err, to keep the errno */ -+ break; -+ -+ /* todo: plink too? */ -+ if (!do_udba) -+ continue; -+ -+ /* UDBA tests */ -+ h_inode = h_dentry->d_inode; -+ if (unlikely(!!inode != !!h_inode)) -+ goto err; -+ -+ h_plus = plus; -+ h_mode = mode; -+ h_cached_inode = h_inode; -+ if (h_inode) { -+ h_mode = (h_inode->i_mode & S_IFMT); -+ h_plus = (h_inode->i_nlink > 0); -+ } -+ if (inode && ibs <= bindex && bindex <= ibe) -+ h_cached_inode = au_h_iptr(inode, bindex); -+ -+ if (!h_nfs) { -+ if (unlikely(plus != h_plus && !tmpfile)) -+ goto err; -+ } else { -+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED) -+ && !is_root -+ && !IS_ROOT(h_dentry) -+ && unhashed != d_unhashed(h_dentry))) -+ goto err; -+ } -+ if (unlikely(mode != h_mode -+ || h_cached_inode != h_inode)) -+ goto err; -+ continue; -+ -+err: -+ err = -EINVAL; -+ break; -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+/* todo: consolidate with do_refresh() and au_reval_for_attr() */ -+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen) -+{ -+ int err; -+ struct dentry *parent; -+ -+ if (!au_digen_test(dentry, sigen)) -+ return 0; -+ -+ parent = dget_parent(dentry); -+ di_read_lock_parent(parent, AuLock_IR); -+ AuDebugOn(au_digen_test(parent, sigen)); -+ au_dbg_verify_gen(parent, sigen); -+ err = au_refresh_dentry(dentry, parent); -+ di_read_unlock(parent, AuLock_IR); -+ dput(parent); -+ AuTraceErr(err); -+ return err; -+} -+ -+int au_reval_dpath(struct dentry *dentry, unsigned int sigen) -+{ -+ int err; -+ struct dentry *d, *parent; -+ struct inode *inode; -+ -+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR)) -+ return simple_reval_dpath(dentry, sigen); -+ -+ /* slow loop, keep it simple and stupid */ -+ /* cf: au_cpup_dirs() */ -+ err = 0; -+ parent = NULL; -+ while (au_digen_test(dentry, sigen)) { -+ d = dentry; -+ while (1) { -+ dput(parent); -+ parent = dget_parent(d); -+ if (!au_digen_test(parent, sigen)) -+ break; -+ d = parent; -+ } -+ -+ inode = d->d_inode; -+ if (d != dentry) -+ di_write_lock_child2(d); -+ -+ /* someone might update our dentry while we were sleeping */ -+ if (au_digen_test(d, sigen)) { -+ /* -+ * todo: consolidate with simple_reval_dpath(), -+ * do_refresh() and au_reval_for_attr(). -+ */ -+ di_read_lock_parent(parent, AuLock_IR); -+ err = au_refresh_dentry(d, parent); -+ di_read_unlock(parent, AuLock_IR); -+ } -+ -+ if (d != dentry) -+ di_write_unlock(d); -+ dput(parent); -+ if (unlikely(err)) -+ break; -+ } -+ -+ return err; -+} -+ -+/* -+ * if valid returns 1, otherwise 0. -+ */ -+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags) -+{ -+ int valid, err; -+ unsigned int sigen; -+ unsigned char do_udba; -+ struct super_block *sb; -+ struct inode *inode; -+ -+ /* todo: support rcu-walk? */ -+ if (flags & LOOKUP_RCU) -+ return -ECHILD; -+ -+ valid = 0; -+ if (unlikely(!au_di(dentry))) -+ goto out; -+ -+ inode = dentry->d_inode; -+ if (inode && is_bad_inode(inode)) -+ goto out; -+ -+ valid = 1; -+ sb = dentry->d_sb; -+ /* -+ * todo: very ugly -+ * i_mutex of parent dir may be held, -+ * but we should not return 'invalid' due to busy. -+ */ -+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM); -+ if (unlikely(err)) { -+ valid = err; -+ AuTraceErr(err); -+ goto out; -+ } -+ if (unlikely(au_dbrange_test(dentry))) { -+ err = -EINVAL; -+ AuTraceErr(err); -+ goto out_dgrade; -+ } -+ -+ sigen = au_sigen(sb); -+ if (au_digen_test(dentry, sigen)) { -+ AuDebugOn(IS_ROOT(dentry)); -+ err = au_reval_dpath(dentry, sigen); -+ if (unlikely(err)) { -+ AuTraceErr(err); -+ goto out_dgrade; -+ } -+ } -+ di_downgrade_lock(dentry, AuLock_IR); -+ -+ err = -EINVAL; -+ if (!(flags & LOOKUP_OPEN) -+ && inode -+ && !(inode->i_state && I_LINKABLE) -+ && (IS_DEADDIR(inode) || !inode->i_nlink)) -+ goto out_inval; -+ -+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE); -+ if (do_udba && inode) { -+ aufs_bindex_t bstart = au_ibstart(inode); -+ struct inode *h_inode; -+ -+ if (bstart >= 0) { -+ h_inode = au_h_iptr(inode, bstart); -+ if (h_inode && au_test_higen(inode, h_inode)) -+ goto out_inval; -+ } -+ } -+ -+ err = h_d_revalidate(dentry, inode, flags, do_udba); -+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) { -+ err = -EIO; -+ AuDbg("both of real entry and whiteout found, %p, err %d\n", -+ dentry, err); -+ } -+ goto out_inval; -+ -+out_dgrade: -+ di_downgrade_lock(dentry, AuLock_IR); -+out_inval: -+ aufs_read_unlock(dentry, AuLock_IR); -+ AuTraceErr(err); -+ valid = !err; -+out: -+ if (!valid) { -+ AuDbg("%pd invalid, %d\n", dentry, valid); -+ d_drop(dentry); -+ } -+ return valid; -+} -+ -+static void aufs_d_release(struct dentry *dentry) -+{ -+ if (au_di(dentry)) { -+ au_di_fin(dentry); -+ au_hn_di_reinit(dentry); -+ } -+} -+ -+const struct dentry_operations aufs_dop = { -+ .d_revalidate = aufs_d_revalidate, -+ .d_weak_revalidate = aufs_d_revalidate, -+ .d_release = aufs_d_release -+}; -diff --git fs/aufs/dentry.h fs/aufs/dentry.h -new file mode 100644 -index 0000000..2b44b33 ---- /dev/null -+++ fs/aufs/dentry.h -@@ -0,0 +1,220 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * lookup and dentry operations -+ */ -+ -+#ifndef __AUFS_DENTRY_H__ -+#define __AUFS_DENTRY_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include "rwsem.h" -+ -+struct au_hdentry { -+ struct dentry *hd_dentry; -+ aufs_bindex_t hd_id; -+}; -+ -+struct au_dinfo { -+ atomic_t di_generation; -+ -+ struct au_rwsem di_rwsem; -+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq; -+ unsigned char di_tmpfile; /* to allow the different name */ -+ struct au_hdentry *di_hdentry; -+} ____cacheline_aligned_in_smp; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* dentry.c */ -+extern const struct dentry_operations aufs_dop; -+struct au_branch; -+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent); -+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir, -+ struct dentry *h_parent, struct au_branch *br); -+ -+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type); -+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh); -+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent); -+int au_reval_dpath(struct dentry *dentry, unsigned int sigen); -+ -+/* dinfo.c */ -+void au_di_init_once(void *_di); -+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc); -+void au_di_free(struct au_dinfo *dinfo); -+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b); -+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src); -+int au_di_init(struct dentry *dentry); -+void au_di_fin(struct dentry *dentry); -+int au_di_realloc(struct au_dinfo *dinfo, int nbr); -+ -+void di_read_lock(struct dentry *d, int flags, unsigned int lsc); -+void di_read_unlock(struct dentry *d, int flags); -+void di_downgrade_lock(struct dentry *d, int flags); -+void di_write_lock(struct dentry *d, unsigned int lsc); -+void di_write_unlock(struct dentry *d); -+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir); -+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir); -+void di_write_unlock2(struct dentry *d1, struct dentry *d2); -+ -+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex); -+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex); -+aufs_bindex_t au_dbtail(struct dentry *dentry); -+aufs_bindex_t au_dbtaildir(struct dentry *dentry); -+ -+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_dentry); -+int au_digen_test(struct dentry *dentry, unsigned int sigen); -+int au_dbrange_test(struct dentry *dentry); -+void au_update_digen(struct dentry *dentry); -+void au_update_dbrange(struct dentry *dentry, int do_put_zero); -+void au_update_dbstart(struct dentry *dentry); -+void au_update_dbend(struct dentry *dentry); -+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry); -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct au_dinfo *au_di(struct dentry *dentry) -+{ -+ return dentry->d_fsdata; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* lock subclass for dinfo */ -+enum { -+ AuLsc_DI_CHILD, /* child first */ -+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */ -+ AuLsc_DI_CHILD3, /* copyup dirs */ -+ AuLsc_DI_PARENT, -+ AuLsc_DI_PARENT2, -+ AuLsc_DI_PARENT3, -+ AuLsc_DI_TMP /* temp for replacing dinfo */ -+}; -+ -+/* -+ * di_read_lock_child, di_write_lock_child, -+ * di_read_lock_child2, di_write_lock_child2, -+ * di_read_lock_child3, di_write_lock_child3, -+ * di_read_lock_parent, di_write_lock_parent, -+ * di_read_lock_parent2, di_write_lock_parent2, -+ * di_read_lock_parent3, di_write_lock_parent3, -+ */ -+#define AuReadLockFunc(name, lsc) \ -+static inline void di_read_lock_##name(struct dentry *d, int flags) \ -+{ di_read_lock(d, flags, AuLsc_DI_##lsc); } -+ -+#define AuWriteLockFunc(name, lsc) \ -+static inline void di_write_lock_##name(struct dentry *d) \ -+{ di_write_lock(d, AuLsc_DI_##lsc); } -+ -+#define AuRWLockFuncs(name, lsc) \ -+ AuReadLockFunc(name, lsc) \ -+ AuWriteLockFunc(name, lsc) -+ -+AuRWLockFuncs(child, CHILD); -+AuRWLockFuncs(child2, CHILD2); -+AuRWLockFuncs(child3, CHILD3); -+AuRWLockFuncs(parent, PARENT); -+AuRWLockFuncs(parent2, PARENT2); -+AuRWLockFuncs(parent3, PARENT3); -+ -+#undef AuReadLockFunc -+#undef AuWriteLockFunc -+#undef AuRWLockFuncs -+ -+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem) -+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem) -+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem) -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* todo: memory barrier? */ -+static inline unsigned int au_digen(struct dentry *d) -+{ -+ return atomic_read(&au_di(d)->di_generation); -+} -+ -+static inline void au_h_dentry_init(struct au_hdentry *hdentry) -+{ -+ hdentry->hd_dentry = NULL; -+} -+ -+static inline void au_hdput(struct au_hdentry *hd) -+{ -+ if (hd) -+ dput(hd->hd_dentry); -+} -+ -+static inline aufs_bindex_t au_dbstart(struct dentry *dentry) -+{ -+ DiMustAnyLock(dentry); -+ return au_di(dentry)->di_bstart; -+} -+ -+static inline aufs_bindex_t au_dbend(struct dentry *dentry) -+{ -+ DiMustAnyLock(dentry); -+ return au_di(dentry)->di_bend; -+} -+ -+static inline aufs_bindex_t au_dbwh(struct dentry *dentry) -+{ -+ DiMustAnyLock(dentry); -+ return au_di(dentry)->di_bwh; -+} -+ -+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry) -+{ -+ DiMustAnyLock(dentry); -+ return au_di(dentry)->di_bdiropq; -+} -+ -+/* todo: hard/soft set? */ -+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ DiMustWriteLock(dentry); -+ au_di(dentry)->di_bstart = bindex; -+} -+ -+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ DiMustWriteLock(dentry); -+ au_di(dentry)->di_bend = bindex; -+} -+ -+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ DiMustWriteLock(dentry); -+ /* dbwh can be outside of bstart - bend range */ -+ au_di(dentry)->di_bwh = bindex; -+} -+ -+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ DiMustWriteLock(dentry); -+ au_di(dentry)->di_bdiropq = bindex; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+#ifdef CONFIG_AUFS_HNOTIFY -+static inline void au_digen_dec(struct dentry *d) -+{ -+ atomic_dec(&au_di(d)->di_generation); -+} -+ -+static inline void au_hn_di_reinit(struct dentry *dentry) -+{ -+ dentry->d_fsdata = NULL; -+} -+#else -+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused) -+#endif /* CONFIG_AUFS_HNOTIFY */ -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_DENTRY_H__ */ -diff --git fs/aufs/dinfo.c fs/aufs/dinfo.c -new file mode 100644 -index 0000000..147a72f ---- /dev/null -+++ fs/aufs/dinfo.c -@@ -0,0 +1,531 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * dentry private data -+ */ -+ -+#include "aufs.h" -+ -+void au_di_init_once(void *_dinfo) -+{ -+ struct au_dinfo *dinfo = _dinfo; -+ static struct lock_class_key aufs_di; -+ -+ au_rw_init(&dinfo->di_rwsem); -+ au_rw_class(&dinfo->di_rwsem, &aufs_di); -+} -+ -+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc) -+{ -+ struct au_dinfo *dinfo; -+ int nbr, i; -+ -+ dinfo = au_cache_alloc_dinfo(); -+ if (unlikely(!dinfo)) -+ goto out; -+ -+ nbr = au_sbend(sb) + 1; -+ if (nbr <= 0) -+ nbr = 1; -+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS); -+ if (dinfo->di_hdentry) { -+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc); -+ dinfo->di_bstart = -1; -+ dinfo->di_bend = -1; -+ dinfo->di_bwh = -1; -+ dinfo->di_bdiropq = -1; -+ dinfo->di_tmpfile = 0; -+ for (i = 0; i < nbr; i++) -+ dinfo->di_hdentry[i].hd_id = -1; -+ goto out; -+ } -+ -+ au_cache_free_dinfo(dinfo); -+ dinfo = NULL; -+ -+out: -+ return dinfo; -+} -+ -+void au_di_free(struct au_dinfo *dinfo) -+{ -+ struct au_hdentry *p; -+ aufs_bindex_t bend, bindex; -+ -+ /* dentry may not be revalidated */ -+ bindex = dinfo->di_bstart; -+ if (bindex >= 0) { -+ bend = dinfo->di_bend; -+ p = dinfo->di_hdentry + bindex; -+ while (bindex++ <= bend) -+ au_hdput(p++); -+ } -+ kfree(dinfo->di_hdentry); -+ au_cache_free_dinfo(dinfo); -+} -+ -+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b) -+{ -+ struct au_hdentry *p; -+ aufs_bindex_t bi; -+ -+ AuRwMustWriteLock(&a->di_rwsem); -+ AuRwMustWriteLock(&b->di_rwsem); -+ -+#define DiSwap(v, name) \ -+ do { \ -+ v = a->di_##name; \ -+ a->di_##name = b->di_##name; \ -+ b->di_##name = v; \ -+ } while (0) -+ -+ DiSwap(p, hdentry); -+ DiSwap(bi, bstart); -+ DiSwap(bi, bend); -+ DiSwap(bi, bwh); -+ DiSwap(bi, bdiropq); -+ /* smp_mb(); */ -+ -+#undef DiSwap -+} -+ -+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src) -+{ -+ AuRwMustWriteLock(&dst->di_rwsem); -+ AuRwMustWriteLock(&src->di_rwsem); -+ -+ dst->di_bstart = src->di_bstart; -+ dst->di_bend = src->di_bend; -+ dst->di_bwh = src->di_bwh; -+ dst->di_bdiropq = src->di_bdiropq; -+ /* smp_mb(); */ -+} -+ -+int au_di_init(struct dentry *dentry) -+{ -+ int err; -+ struct super_block *sb; -+ struct au_dinfo *dinfo; -+ -+ err = 0; -+ sb = dentry->d_sb; -+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD); -+ if (dinfo) { -+ atomic_set(&dinfo->di_generation, au_sigen(sb)); -+ /* smp_mb(); */ /* atomic_set */ -+ dentry->d_fsdata = dinfo; -+ } else -+ err = -ENOMEM; -+ -+ return err; -+} -+ -+void au_di_fin(struct dentry *dentry) -+{ -+ struct au_dinfo *dinfo; -+ -+ dinfo = au_di(dentry); -+ AuRwDestroy(&dinfo->di_rwsem); -+ au_di_free(dinfo); -+} -+ -+int au_di_realloc(struct au_dinfo *dinfo, int nbr) -+{ -+ int err, sz; -+ struct au_hdentry *hdp; -+ -+ AuRwMustWriteLock(&dinfo->di_rwsem); -+ -+ err = -ENOMEM; -+ sz = sizeof(*hdp) * (dinfo->di_bend + 1); -+ if (!sz) -+ sz = sizeof(*hdp); -+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS); -+ if (hdp) { -+ dinfo->di_hdentry = hdp; -+ err = 0; -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void do_ii_write_lock(struct inode *inode, unsigned int lsc) -+{ -+ switch (lsc) { -+ case AuLsc_DI_CHILD: -+ ii_write_lock_child(inode); -+ break; -+ case AuLsc_DI_CHILD2: -+ ii_write_lock_child2(inode); -+ break; -+ case AuLsc_DI_CHILD3: -+ ii_write_lock_child3(inode); -+ break; -+ case AuLsc_DI_PARENT: -+ ii_write_lock_parent(inode); -+ break; -+ case AuLsc_DI_PARENT2: -+ ii_write_lock_parent2(inode); -+ break; -+ case AuLsc_DI_PARENT3: -+ ii_write_lock_parent3(inode); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static void do_ii_read_lock(struct inode *inode, unsigned int lsc) -+{ -+ switch (lsc) { -+ case AuLsc_DI_CHILD: -+ ii_read_lock_child(inode); -+ break; -+ case AuLsc_DI_CHILD2: -+ ii_read_lock_child2(inode); -+ break; -+ case AuLsc_DI_CHILD3: -+ ii_read_lock_child3(inode); -+ break; -+ case AuLsc_DI_PARENT: -+ ii_read_lock_parent(inode); -+ break; -+ case AuLsc_DI_PARENT2: -+ ii_read_lock_parent2(inode); -+ break; -+ case AuLsc_DI_PARENT3: -+ ii_read_lock_parent3(inode); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+void di_read_lock(struct dentry *d, int flags, unsigned int lsc) -+{ -+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc); -+ if (d->d_inode) { -+ if (au_ftest_lock(flags, IW)) -+ do_ii_write_lock(d->d_inode, lsc); -+ else if (au_ftest_lock(flags, IR)) -+ do_ii_read_lock(d->d_inode, lsc); -+ } -+} -+ -+void di_read_unlock(struct dentry *d, int flags) -+{ -+ if (d->d_inode) { -+ if (au_ftest_lock(flags, IW)) { -+ au_dbg_verify_dinode(d); -+ ii_write_unlock(d->d_inode); -+ } else if (au_ftest_lock(flags, IR)) { -+ au_dbg_verify_dinode(d); -+ ii_read_unlock(d->d_inode); -+ } -+ } -+ au_rw_read_unlock(&au_di(d)->di_rwsem); -+} -+ -+void di_downgrade_lock(struct dentry *d, int flags) -+{ -+ if (d->d_inode && au_ftest_lock(flags, IR)) -+ ii_downgrade_lock(d->d_inode); -+ au_rw_dgrade_lock(&au_di(d)->di_rwsem); -+} -+ -+void di_write_lock(struct dentry *d, unsigned int lsc) -+{ -+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc); -+ if (d->d_inode) -+ do_ii_write_lock(d->d_inode, lsc); -+} -+ -+void di_write_unlock(struct dentry *d) -+{ -+ au_dbg_verify_dinode(d); -+ if (d->d_inode) -+ ii_write_unlock(d->d_inode); -+ au_rw_write_unlock(&au_di(d)->di_rwsem); -+} -+ -+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir) -+{ -+ AuDebugOn(d1 == d2 -+ || d1->d_inode == d2->d_inode -+ || d1->d_sb != d2->d_sb); -+ -+ if (isdir && au_test_subdir(d1, d2)) { -+ di_write_lock_child(d1); -+ di_write_lock_child2(d2); -+ } else { -+ /* there should be no races */ -+ di_write_lock_child(d2); -+ di_write_lock_child2(d1); -+ } -+} -+ -+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir) -+{ -+ AuDebugOn(d1 == d2 -+ || d1->d_inode == d2->d_inode -+ || d1->d_sb != d2->d_sb); -+ -+ if (isdir && au_test_subdir(d1, d2)) { -+ di_write_lock_parent(d1); -+ di_write_lock_parent2(d2); -+ } else { -+ /* there should be no races */ -+ di_write_lock_parent(d2); -+ di_write_lock_parent2(d1); -+ } -+} -+ -+void di_write_unlock2(struct dentry *d1, struct dentry *d2) -+{ -+ di_write_unlock(d1); -+ if (d1->d_inode == d2->d_inode) -+ au_rw_write_unlock(&au_di(d2)->di_rwsem); -+ else -+ di_write_unlock(d2); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ struct dentry *d; -+ -+ DiMustAnyLock(dentry); -+ -+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry)) -+ return NULL; -+ AuDebugOn(bindex < 0); -+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry; -+ AuDebugOn(d && d_count(d) <= 0); -+ return d; -+} -+ -+/* -+ * extended version of au_h_dptr(). -+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or -+ * error. -+ */ -+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ struct dentry *h_dentry; -+ struct inode *inode, *h_inode; -+ -+ inode = dentry->d_inode; -+ AuDebugOn(!inode); -+ -+ h_dentry = NULL; -+ if (au_dbstart(dentry) <= bindex -+ && bindex <= au_dbend(dentry)) -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry && !au_d_linkable(h_dentry)) { -+ dget(h_dentry); -+ goto out; /* success */ -+ } -+ -+ AuDebugOn(bindex < au_ibstart(inode)); -+ AuDebugOn(au_ibend(inode) < bindex); -+ h_inode = au_h_iptr(inode, bindex); -+ h_dentry = d_find_alias(h_inode); -+ if (h_dentry) { -+ if (!IS_ERR(h_dentry)) { -+ if (!au_d_linkable(h_dentry)) -+ goto out; /* success */ -+ dput(h_dentry); -+ } else -+ goto out; -+ } -+ -+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) { -+ h_dentry = au_plink_lkup(inode, bindex); -+ AuDebugOn(!h_dentry); -+ if (!IS_ERR(h_dentry)) { -+ if (!au_d_hashed_positive(h_dentry)) -+ goto out; /* success */ -+ dput(h_dentry); -+ h_dentry = NULL; -+ } -+ } -+ -+out: -+ AuDbgDentry(h_dentry); -+ return h_dentry; -+} -+ -+aufs_bindex_t au_dbtail(struct dentry *dentry) -+{ -+ aufs_bindex_t bend, bwh; -+ -+ bend = au_dbend(dentry); -+ if (0 <= bend) { -+ bwh = au_dbwh(dentry); -+ if (!bwh) -+ return bwh; -+ if (0 < bwh && bwh < bend) -+ return bwh - 1; -+ } -+ return bend; -+} -+ -+aufs_bindex_t au_dbtaildir(struct dentry *dentry) -+{ -+ aufs_bindex_t bend, bopq; -+ -+ bend = au_dbtail(dentry); -+ if (0 <= bend) { -+ bopq = au_dbdiropq(dentry); -+ if (0 <= bopq && bopq < bend) -+ bend = bopq; -+ } -+ return bend; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_dentry) -+{ -+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex; -+ struct au_branch *br; -+ -+ DiMustWriteLock(dentry); -+ -+ au_hdput(hd); -+ hd->hd_dentry = h_dentry; -+ if (h_dentry) { -+ br = au_sbr(dentry->d_sb, bindex); -+ hd->hd_id = br->br_id; -+ } -+} -+ -+int au_dbrange_test(struct dentry *dentry) -+{ -+ int err; -+ aufs_bindex_t bstart, bend; -+ -+ err = 0; -+ bstart = au_dbstart(dentry); -+ bend = au_dbend(dentry); -+ if (bstart >= 0) -+ AuDebugOn(bend < 0 && bstart > bend); -+ else { -+ err = -EIO; -+ AuDebugOn(bend >= 0); -+ } -+ -+ return err; -+} -+ -+int au_digen_test(struct dentry *dentry, unsigned int sigen) -+{ -+ int err; -+ -+ err = 0; -+ if (unlikely(au_digen(dentry) != sigen -+ || au_iigen_test(dentry->d_inode, sigen))) -+ err = -EIO; -+ -+ return err; -+} -+ -+void au_update_digen(struct dentry *dentry) -+{ -+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb)); -+ /* smp_mb(); */ /* atomic_set */ -+} -+ -+void au_update_dbrange(struct dentry *dentry, int do_put_zero) -+{ -+ struct au_dinfo *dinfo; -+ struct dentry *h_d; -+ struct au_hdentry *hdp; -+ -+ DiMustWriteLock(dentry); -+ -+ dinfo = au_di(dentry); -+ if (!dinfo || dinfo->di_bstart < 0) -+ return; -+ -+ hdp = dinfo->di_hdentry; -+ if (do_put_zero) { -+ aufs_bindex_t bindex, bend; -+ -+ bend = dinfo->di_bend; -+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) { -+ h_d = hdp[0 + bindex].hd_dentry; -+ if (h_d && !h_d->d_inode) -+ au_set_h_dptr(dentry, bindex, NULL); -+ } -+ } -+ -+ dinfo->di_bstart = -1; -+ while (++dinfo->di_bstart <= dinfo->di_bend) -+ if (hdp[0 + dinfo->di_bstart].hd_dentry) -+ break; -+ if (dinfo->di_bstart > dinfo->di_bend) { -+ dinfo->di_bstart = -1; -+ dinfo->di_bend = -1; -+ return; -+ } -+ -+ dinfo->di_bend++; -+ while (0 <= --dinfo->di_bend) -+ if (hdp[0 + dinfo->di_bend].hd_dentry) -+ break; -+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0); -+} -+ -+void au_update_dbstart(struct dentry *dentry) -+{ -+ aufs_bindex_t bindex, bend; -+ struct dentry *h_dentry; -+ -+ bend = au_dbend(dentry); -+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; -+ if (h_dentry->d_inode) { -+ au_set_dbstart(dentry, bindex); -+ return; -+ } -+ au_set_h_dptr(dentry, bindex, NULL); -+ } -+} -+ -+void au_update_dbend(struct dentry *dentry) -+{ -+ aufs_bindex_t bindex, bstart; -+ struct dentry *h_dentry; -+ -+ bstart = au_dbstart(dentry); -+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; -+ if (h_dentry->d_inode) { -+ au_set_dbend(dentry, bindex); -+ return; -+ } -+ au_set_h_dptr(dentry, bindex, NULL); -+ } -+} -+ -+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry) -+{ -+ aufs_bindex_t bindex, bend; -+ -+ bend = au_dbend(dentry); -+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) -+ if (au_h_dptr(dentry, bindex) == h_dentry) -+ return bindex; -+ return -1; -+} -diff --git fs/aufs/dir.c fs/aufs/dir.c -new file mode 100644 -index 0000000..c93c842 ---- /dev/null -+++ fs/aufs/dir.c -@@ -0,0 +1,630 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * directory operations -+ */ -+ -+#include -+#include "aufs.h" -+ -+void au_add_nlink(struct inode *dir, struct inode *h_dir) -+{ -+ unsigned int nlink; -+ -+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode)); -+ -+ nlink = dir->i_nlink; -+ nlink += h_dir->i_nlink - 2; -+ if (h_dir->i_nlink < 2) -+ nlink += 2; -+ smp_mb(); /* for i_nlink */ -+ /* 0 can happen in revaliding */ -+ set_nlink(dir, nlink); -+} -+ -+void au_sub_nlink(struct inode *dir, struct inode *h_dir) -+{ -+ unsigned int nlink; -+ -+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode)); -+ -+ nlink = dir->i_nlink; -+ nlink -= h_dir->i_nlink - 2; -+ if (h_dir->i_nlink < 2) -+ nlink -= 2; -+ smp_mb(); /* for i_nlink */ -+ /* nlink == 0 means the branch-fs is broken */ -+ set_nlink(dir, nlink); -+} -+ -+loff_t au_dir_size(struct file *file, struct dentry *dentry) -+{ -+ loff_t sz; -+ aufs_bindex_t bindex, bend; -+ struct file *h_file; -+ struct dentry *h_dentry; -+ -+ sz = 0; -+ if (file) { -+ AuDebugOn(!file_inode(file)); -+ AuDebugOn(!S_ISDIR(file_inode(file)->i_mode)); -+ -+ bend = au_fbend_dir(file); -+ for (bindex = au_fbstart(file); -+ bindex <= bend && sz < KMALLOC_MAX_SIZE; -+ bindex++) { -+ h_file = au_hf_dir(file, bindex); -+ if (h_file && file_inode(h_file)) -+ sz += vfsub_f_size_read(h_file); -+ } -+ } else { -+ AuDebugOn(!dentry); -+ AuDebugOn(!dentry->d_inode); -+ AuDebugOn(!S_ISDIR(dentry->d_inode->i_mode)); -+ -+ bend = au_dbtaildir(dentry); -+ for (bindex = au_dbstart(dentry); -+ bindex <= bend && sz < KMALLOC_MAX_SIZE; -+ bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry && h_dentry->d_inode) -+ sz += i_size_read(h_dentry->d_inode); -+ } -+ } -+ if (sz < KMALLOC_MAX_SIZE) -+ sz = roundup_pow_of_two(sz); -+ if (sz > KMALLOC_MAX_SIZE) -+ sz = KMALLOC_MAX_SIZE; -+ else if (sz < NAME_MAX) { -+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX); -+ sz = AUFS_RDBLK_DEF; -+ } -+ return sz; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int reopen_dir(struct file *file) -+{ -+ int err; -+ unsigned int flags; -+ aufs_bindex_t bindex, btail, bstart; -+ struct dentry *dentry, *h_dentry; -+ struct file *h_file; -+ -+ /* open all lower dirs */ -+ dentry = file->f_dentry; -+ bstart = au_dbstart(dentry); -+ for (bindex = au_fbstart(file); bindex < bstart; bindex++) -+ au_set_h_fptr(file, bindex, NULL); -+ au_set_fbstart(file, bstart); -+ -+ btail = au_dbtaildir(dentry); -+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--) -+ au_set_h_fptr(file, bindex, NULL); -+ au_set_fbend_dir(file, btail); -+ -+ flags = vfsub_file_flags(file); -+ for (bindex = bstart; bindex <= btail; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; -+ h_file = au_hf_dir(file, bindex); -+ if (h_file) -+ continue; -+ -+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; /* close all? */ -+ au_set_h_fptr(file, bindex, h_file); -+ } -+ au_update_figen(file); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ err = 0; -+ -+out: -+ return err; -+} -+ -+static int do_open_dir(struct file *file, int flags) -+{ -+ int err; -+ aufs_bindex_t bindex, btail; -+ struct dentry *dentry, *h_dentry; -+ struct file *h_file; -+ -+ FiMustWriteLock(file); -+ -+ err = 0; -+ dentry = file->f_dentry; -+ file->f_version = dentry->d_inode->i_version; -+ bindex = au_dbstart(dentry); -+ au_set_fbstart(file, bindex); -+ btail = au_dbtaildir(dentry); -+ au_set_fbend_dir(file, btail); -+ for (; !err && bindex <= btail; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!h_dentry) -+ continue; -+ -+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0); -+ if (IS_ERR(h_file)) { -+ err = PTR_ERR(h_file); -+ break; -+ } -+ au_set_h_fptr(file, bindex, h_file); -+ } -+ au_update_figen(file); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ if (!err) -+ return 0; /* success */ -+ -+ /* close all */ -+ for (bindex = au_fbstart(file); bindex <= btail; bindex++) -+ au_set_h_fptr(file, bindex, NULL); -+ au_set_fbstart(file, -1); -+ au_set_fbend_dir(file, -1); -+ -+ return err; -+} -+ -+static int aufs_open_dir(struct inode *inode __maybe_unused, -+ struct file *file) -+{ -+ int err; -+ struct super_block *sb; -+ struct au_fidir *fidir; -+ -+ err = -ENOMEM; -+ sb = file->f_dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ fidir = au_fidir_alloc(sb); -+ if (fidir) { -+ err = au_do_open(file, do_open_dir, fidir); -+ if (unlikely(err)) -+ kfree(fidir); -+ } -+ si_read_unlock(sb); -+ return err; -+} -+ -+static int aufs_release_dir(struct inode *inode __maybe_unused, -+ struct file *file) -+{ -+ struct au_vdir *vdir_cache; -+ struct au_finfo *finfo; -+ struct au_fidir *fidir; -+ aufs_bindex_t bindex, bend; -+ -+ finfo = au_fi(file); -+ fidir = finfo->fi_hdir; -+ if (fidir) { -+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */ -+ if (vdir_cache) -+ au_vdir_free(vdir_cache); -+ -+ bindex = finfo->fi_btop; -+ if (bindex >= 0) { -+ /* -+ * calls fput() instead of filp_close(), -+ * since no dnotify or lock for the lower file. -+ */ -+ bend = fidir->fd_bbot; -+ for (; bindex <= bend; bindex++) -+ au_set_h_fptr(file, bindex, NULL); -+ } -+ kfree(fidir); -+ finfo->fi_hdir = NULL; -+ } -+ au_finfo_fin(file); -+ return 0; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_do_flush_dir(struct file *file, fl_owner_t id) -+{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ struct file *h_file; -+ -+ err = 0; -+ bend = au_fbend_dir(file); -+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) { -+ h_file = au_hf_dir(file, bindex); -+ if (h_file) -+ err = vfsub_flush(h_file, id); -+ } -+ return err; -+} -+ -+static int aufs_flush_dir(struct file *file, fl_owner_t id) -+{ -+ return au_do_flush(file, id, au_do_flush_dir); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync) -+{ -+ int err; -+ aufs_bindex_t bend, bindex; -+ struct inode *inode; -+ struct super_block *sb; -+ -+ err = 0; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ bend = au_dbend(dentry); -+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) { -+ struct path h_path; -+ -+ if (au_test_ro(sb, bindex, inode)) -+ continue; -+ h_path.dentry = au_h_dptr(dentry, bindex); -+ if (!h_path.dentry) -+ continue; -+ -+ h_path.mnt = au_sbr_mnt(sb, bindex); -+ err = vfsub_fsync(NULL, &h_path, datasync); -+ } -+ -+ return err; -+} -+ -+static int au_do_fsync_dir(struct file *file, int datasync) -+{ -+ int err; -+ aufs_bindex_t bend, bindex; -+ struct file *h_file; -+ struct super_block *sb; -+ struct inode *inode; -+ -+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; -+ -+ sb = file->f_dentry->d_sb; -+ inode = file_inode(file); -+ bend = au_fbend_dir(file); -+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) { -+ h_file = au_hf_dir(file, bindex); -+ if (!h_file || au_test_ro(sb, bindex, inode)) -+ continue; -+ -+ err = vfsub_fsync(h_file, &h_file->f_path, datasync); -+ } -+ -+out: -+ return err; -+} -+ -+/* -+ * @file may be NULL -+ */ -+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end, -+ int datasync) -+{ -+ int err; -+ struct dentry *dentry; -+ struct super_block *sb; -+ struct mutex *mtx; -+ -+ err = 0; -+ dentry = file->f_dentry; -+ mtx = &dentry->d_inode->i_mutex; -+ mutex_lock(mtx); -+ sb = dentry->d_sb; -+ si_noflush_read_lock(sb); -+ if (file) -+ err = au_do_fsync_dir(file, datasync); -+ else { -+ di_write_lock_child(dentry); -+ err = au_do_fsync_dir_no_file(dentry, datasync); -+ } -+ au_cpup_attr_timesizes(dentry->d_inode); -+ di_write_unlock(dentry); -+ if (file) -+ fi_write_unlock(file); -+ -+ si_read_unlock(sb); -+ mutex_unlock(mtx); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int aufs_iterate(struct file *file, struct dir_context *ctx) -+{ -+ int err; -+ struct dentry *dentry; -+ struct inode *inode, *h_inode; -+ struct super_block *sb; -+ -+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos); -+ -+ dentry = file->f_dentry; -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; -+ err = au_alive_dir(dentry); -+ if (!err) -+ err = au_vdir_init(file); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) -+ goto out_unlock; -+ -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ if (!au_test_nfsd()) { -+ err = au_vdir_fill_de(file, ctx); -+ fsstack_copy_attr_atime(inode, h_inode); -+ } else { -+ /* -+ * nfsd filldir may call lookup_one_len(), vfs_getattr(), -+ * encode_fh() and others. -+ */ -+ atomic_inc(&h_inode->i_count); -+ di_read_unlock(dentry, AuLock_IR); -+ si_read_unlock(sb); -+ err = au_vdir_fill_de(file, ctx); -+ fsstack_copy_attr_atime(inode, h_inode); -+ fi_write_unlock(file); -+ iput(h_inode); -+ -+ AuTraceErr(err); -+ return err; -+ } -+ -+out_unlock: -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+out: -+ si_read_unlock(sb); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+#define AuTestEmpty_WHONLY 1 -+#define AuTestEmpty_CALLED (1 << 1) -+#define AuTestEmpty_SHWH (1 << 2) -+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name) -+#define au_fset_testempty(flags, name) \ -+ do { (flags) |= AuTestEmpty_##name; } while (0) -+#define au_fclr_testempty(flags, name) \ -+ do { (flags) &= ~AuTestEmpty_##name; } while (0) -+ -+#ifndef CONFIG_AUFS_SHWH -+#undef AuTestEmpty_SHWH -+#define AuTestEmpty_SHWH 0 -+#endif -+ -+struct test_empty_arg { -+ struct dir_context ctx; -+ struct au_nhash *whlist; -+ unsigned int flags; -+ int err; -+ aufs_bindex_t bindex; -+}; -+ -+static int test_empty_cb(struct dir_context *ctx, const char *__name, -+ int namelen, loff_t offset __maybe_unused, u64 ino, -+ unsigned int d_type) -+{ -+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg, -+ ctx); -+ char *name = (void *)__name; -+ -+ arg->err = 0; -+ au_fset_testempty(arg->flags, CALLED); -+ /* smp_mb(); */ -+ if (name[0] == '.' -+ && (namelen == 1 || (name[1] == '.' && namelen == 2))) -+ goto out; /* success */ -+ -+ if (namelen <= AUFS_WH_PFX_LEN -+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { -+ if (au_ftest_testempty(arg->flags, WHONLY) -+ && !au_nhash_test_known_wh(arg->whlist, name, namelen)) -+ arg->err = -ENOTEMPTY; -+ goto out; -+ } -+ -+ name += AUFS_WH_PFX_LEN; -+ namelen -= AUFS_WH_PFX_LEN; -+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen)) -+ arg->err = au_nhash_append_wh -+ (arg->whlist, name, namelen, ino, d_type, arg->bindex, -+ au_ftest_testempty(arg->flags, SHWH)); -+ -+out: -+ /* smp_mb(); */ -+ AuTraceErr(arg->err); -+ return arg->err; -+} -+ -+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg) -+{ -+ int err; -+ struct file *h_file; -+ -+ h_file = au_h_open(dentry, arg->bindex, -+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE, -+ /*file*/NULL, /*force_wr*/0); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out; -+ -+ err = 0; -+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE) -+ && !file_inode(h_file)->i_nlink) -+ goto out_put; -+ -+ do { -+ arg->err = 0; -+ au_fclr_testempty(arg->flags, CALLED); -+ /* smp_mb(); */ -+ err = vfsub_iterate_dir(h_file, &arg->ctx); -+ if (err >= 0) -+ err = arg->err; -+ } while (!err && au_ftest_testempty(arg->flags, CALLED)); -+ -+out_put: -+ fput(h_file); -+ au_sbr_put(dentry->d_sb, arg->bindex); -+out: -+ return err; -+} -+ -+struct do_test_empty_args { -+ int *errp; -+ struct dentry *dentry; -+ struct test_empty_arg *arg; -+}; -+ -+static void call_do_test_empty(void *args) -+{ -+ struct do_test_empty_args *a = args; -+ *a->errp = do_test_empty(a->dentry, a->arg); -+} -+ -+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg) -+{ -+ int err, wkq_err; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ -+ h_dentry = au_h_dptr(dentry, arg->bindex); -+ h_inode = h_dentry->d_inode; -+ /* todo: i_mode changes anytime? */ -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ); -+ mutex_unlock(&h_inode->i_mutex); -+ if (!err) -+ err = do_test_empty(dentry, arg); -+ else { -+ struct do_test_empty_args args = { -+ .errp = &err, -+ .dentry = dentry, -+ .arg = arg -+ }; -+ unsigned int flags = arg->flags; -+ -+ wkq_err = au_wkq_wait(call_do_test_empty, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ arg->flags = flags; -+ } -+ -+ return err; -+} -+ -+int au_test_empty_lower(struct dentry *dentry) -+{ -+ int err; -+ unsigned int rdhash; -+ aufs_bindex_t bindex, bstart, btail; -+ struct au_nhash whlist; -+ struct test_empty_arg arg = { -+ .ctx = { -+ .actor = au_diractor(test_empty_cb) -+ } -+ }; -+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg); -+ -+ SiMustAnyLock(dentry->d_sb); -+ -+ rdhash = au_sbi(dentry->d_sb)->si_rdhash; -+ if (!rdhash) -+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry)); -+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ -+ arg.flags = 0; -+ arg.whlist = &whlist; -+ bstart = au_dbstart(dentry); -+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)) -+ au_fset_testempty(arg.flags, SHWH); -+ test_empty = do_test_empty; -+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)) -+ test_empty = sio_test_empty; -+ arg.bindex = bstart; -+ err = test_empty(dentry, &arg); -+ if (unlikely(err)) -+ goto out_whlist; -+ -+ au_fset_testempty(arg.flags, WHONLY); -+ btail = au_dbtaildir(dentry); -+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) { -+ struct dentry *h_dentry; -+ -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry && h_dentry->d_inode) { -+ arg.bindex = bindex; -+ err = test_empty(dentry, &arg); -+ } -+ } -+ -+out_whlist: -+ au_nhash_wh_free(&whlist); -+out: -+ return err; -+} -+ -+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist) -+{ -+ int err; -+ struct test_empty_arg arg = { -+ .ctx = { -+ .actor = au_diractor(test_empty_cb) -+ } -+ }; -+ aufs_bindex_t bindex, btail; -+ -+ err = 0; -+ arg.whlist = whlist; -+ arg.flags = AuTestEmpty_WHONLY; -+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)) -+ au_fset_testempty(arg.flags, SHWH); -+ btail = au_dbtaildir(dentry); -+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) { -+ struct dentry *h_dentry; -+ -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry && h_dentry->d_inode) { -+ arg.bindex = bindex; -+ err = sio_test_empty(dentry, &arg); -+ } -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+const struct file_operations aufs_dir_fop = { -+ .owner = THIS_MODULE, -+ .llseek = default_llseek, -+ .read = generic_read_dir, -+ .iterate = aufs_iterate, -+ .unlocked_ioctl = aufs_ioctl_dir, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = aufs_compat_ioctl_dir, -+#endif -+ .open = aufs_open_dir, -+ .release = aufs_release_dir, -+ .flush = aufs_flush_dir, -+ .fsync = aufs_fsync_dir -+}; -diff --git fs/aufs/dir.h fs/aufs/dir.h -new file mode 100644 -index 0000000..10efa57 ---- /dev/null -+++ fs/aufs/dir.h -@@ -0,0 +1,123 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * directory operations -+ */ -+ -+#ifndef __AUFS_DIR_H__ -+#define __AUFS_DIR_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* need to be faster and smaller */ -+ -+struct au_nhash { -+ unsigned int nh_num; -+ struct hlist_head *nh_head; -+}; -+ -+struct au_vdir_destr { -+ unsigned char len; -+ unsigned char name[0]; -+} __packed; -+ -+struct au_vdir_dehstr { -+ struct hlist_node hash; -+ struct au_vdir_destr *str; -+} ____cacheline_aligned_in_smp; -+ -+struct au_vdir_de { -+ ino_t de_ino; -+ unsigned char de_type; -+ /* caution: packed */ -+ struct au_vdir_destr de_str; -+} __packed; -+ -+struct au_vdir_wh { -+ struct hlist_node wh_hash; -+#ifdef CONFIG_AUFS_SHWH -+ ino_t wh_ino; -+ aufs_bindex_t wh_bindex; -+ unsigned char wh_type; -+#else -+ aufs_bindex_t wh_bindex; -+#endif -+ /* caution: packed */ -+ struct au_vdir_destr wh_str; -+} __packed; -+ -+union au_vdir_deblk_p { -+ unsigned char *deblk; -+ struct au_vdir_de *de; -+}; -+ -+struct au_vdir { -+ unsigned char **vd_deblk; -+ unsigned long vd_nblk; -+ struct { -+ unsigned long ul; -+ union au_vdir_deblk_p p; -+ } vd_last; -+ -+ unsigned long vd_version; -+ unsigned int vd_deblk_sz; -+ unsigned long vd_jiffy; -+} ____cacheline_aligned_in_smp; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* dir.c */ -+extern const struct file_operations aufs_dir_fop; -+void au_add_nlink(struct inode *dir, struct inode *h_dir); -+void au_sub_nlink(struct inode *dir, struct inode *h_dir); -+loff_t au_dir_size(struct file *file, struct dentry *dentry); -+int au_test_empty_lower(struct dentry *dentry); -+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist); -+ -+/* vdir.c */ -+unsigned int au_rdhash_est(loff_t sz); -+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp); -+void au_nhash_wh_free(struct au_nhash *whlist); -+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt, -+ int limit); -+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen); -+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino, -+ unsigned int d_type, aufs_bindex_t bindex, -+ unsigned char shwh); -+void au_vdir_free(struct au_vdir *vdir); -+int au_vdir_init(struct file *file); -+int au_vdir_fill_de(struct file *file, struct dir_context *ctx); -+ -+/* ioctl.c */ -+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg); -+ -+#ifdef CONFIG_AUFS_RDU -+/* rdu.c */ -+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -+#ifdef CONFIG_COMPAT -+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg); -+#endif -+#else -+static inline long au_rdu_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ return -EINVAL; -+} -+#ifdef CONFIG_COMPAT -+static inline long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ return -EINVAL; -+} -+#endif -+#endif -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_DIR_H__ */ -diff --git fs/aufs/dynop.c fs/aufs/dynop.c -new file mode 100644 -index 0000000..518727f ---- /dev/null -+++ fs/aufs/dynop.c -@@ -0,0 +1,366 @@ -+/* -+ * Copyright (C) 2010-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * dynamically customizable operations for regular files -+ */ -+ -+#include "aufs.h" -+ -+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop) -+ -+/* -+ * How large will these lists be? -+ * Usually just a few elements, 20-30 at most for each, I guess. -+ */ -+static struct au_splhead dynop[AuDyLast]; -+ -+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op) -+{ -+ struct au_dykey *key, *tmp; -+ struct list_head *head; -+ -+ key = NULL; -+ head = &spl->head; -+ rcu_read_lock(); -+ list_for_each_entry_rcu(tmp, head, dk_list) -+ if (tmp->dk_op.dy_hop == h_op) { -+ key = tmp; -+ kref_get(&key->dk_kref); -+ break; -+ } -+ rcu_read_unlock(); -+ -+ return key; -+} -+ -+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key) -+{ -+ struct au_dykey **k, *found; -+ const void *h_op = key->dk_op.dy_hop; -+ int i; -+ -+ found = NULL; -+ k = br->br_dykey; -+ for (i = 0; i < AuBrDynOp; i++) -+ if (k[i]) { -+ if (k[i]->dk_op.dy_hop == h_op) { -+ found = k[i]; -+ break; -+ } -+ } else -+ break; -+ if (!found) { -+ spin_lock(&br->br_dykey_lock); -+ for (; i < AuBrDynOp; i++) -+ if (k[i]) { -+ if (k[i]->dk_op.dy_hop == h_op) { -+ found = k[i]; -+ break; -+ } -+ } else { -+ k[i] = key; -+ break; -+ } -+ spin_unlock(&br->br_dykey_lock); -+ BUG_ON(i == AuBrDynOp); /* expand the array */ -+ } -+ -+ return found; -+} -+ -+/* kref_get() if @key is already added */ -+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key) -+{ -+ struct au_dykey *tmp, *found; -+ struct list_head *head; -+ const void *h_op = key->dk_op.dy_hop; -+ -+ found = NULL; -+ head = &spl->head; -+ spin_lock(&spl->spin); -+ list_for_each_entry(tmp, head, dk_list) -+ if (tmp->dk_op.dy_hop == h_op) { -+ kref_get(&tmp->dk_kref); -+ found = tmp; -+ break; -+ } -+ if (!found) -+ list_add_rcu(&key->dk_list, head); -+ spin_unlock(&spl->spin); -+ -+ if (!found) -+ DyPrSym(key); -+ return found; -+} -+ -+static void dy_free_rcu(struct rcu_head *rcu) -+{ -+ struct au_dykey *key; -+ -+ key = container_of(rcu, struct au_dykey, dk_rcu); -+ DyPrSym(key); -+ kfree(key); -+} -+ -+static void dy_free(struct kref *kref) -+{ -+ struct au_dykey *key; -+ struct au_splhead *spl; -+ -+ key = container_of(kref, struct au_dykey, dk_kref); -+ spl = dynop + key->dk_op.dy_type; -+ au_spl_del_rcu(&key->dk_list, spl); -+ call_rcu(&key->dk_rcu, dy_free_rcu); -+} -+ -+void au_dy_put(struct au_dykey *key) -+{ -+ kref_put(&key->dk_kref, dy_free); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *)) -+ -+#ifdef CONFIG_AUFS_DEBUG -+#define DyDbgDeclare(cnt) unsigned int cnt = 0 -+#define DyDbgInc(cnt) do { cnt++; } while (0) -+#else -+#define DyDbgDeclare(cnt) do {} while (0) -+#define DyDbgInc(cnt) do {} while (0) -+#endif -+ -+#define DySet(func, dst, src, h_op, h_sb) do { \ -+ DyDbgInc(cnt); \ -+ if (h_op->func) { \ -+ if (src.func) \ -+ dst.func = src.func; \ -+ else \ -+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \ -+ } \ -+} while (0) -+ -+#define DySetForce(func, dst, src) do { \ -+ AuDebugOn(!src.func); \ -+ DyDbgInc(cnt); \ -+ dst.func = src.func; \ -+} while (0) -+ -+#define DySetAop(func) \ -+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb) -+#define DySetAopForce(func) \ -+ DySetForce(func, dyaop->da_op, aufs_aop) -+ -+static void dy_aop(struct au_dykey *key, const void *h_op, -+ struct super_block *h_sb __maybe_unused) -+{ -+ struct au_dyaop *dyaop = (void *)key; -+ const struct address_space_operations *h_aop = h_op; -+ DyDbgDeclare(cnt); -+ -+ AuDbg("%s\n", au_sbtype(h_sb)); -+ -+ DySetAop(writepage); -+ DySetAopForce(readpage); /* force */ -+ DySetAop(writepages); -+ DySetAop(set_page_dirty); -+ DySetAop(readpages); -+ DySetAop(write_begin); -+ DySetAop(write_end); -+ DySetAop(bmap); -+ DySetAop(invalidatepage); -+ DySetAop(releasepage); -+ DySetAop(freepage); -+ /* these two will be changed according to an aufs mount option */ -+ DySetAop(direct_IO); -+ DySetAop(get_xip_mem); -+ DySetAop(migratepage); -+ DySetAop(launder_page); -+ DySetAop(is_partially_uptodate); -+ DySetAop(is_dirty_writeback); -+ DySetAop(error_remove_page); -+ DySetAop(swap_activate); -+ DySetAop(swap_deactivate); -+ -+ DyDbgSize(cnt, *h_aop); -+ dyaop->da_get_xip_mem = h_aop->get_xip_mem; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void dy_bug(struct kref *kref) -+{ -+ BUG(); -+} -+ -+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br) -+{ -+ struct au_dykey *key, *old; -+ struct au_splhead *spl; -+ struct op { -+ unsigned int sz; -+ void (*set)(struct au_dykey *key, const void *h_op, -+ struct super_block *h_sb __maybe_unused); -+ }; -+ static const struct op a[] = { -+ [AuDy_AOP] = { -+ .sz = sizeof(struct au_dyaop), -+ .set = dy_aop -+ } -+ }; -+ const struct op *p; -+ -+ spl = dynop + op->dy_type; -+ key = dy_gfind_get(spl, op->dy_hop); -+ if (key) -+ goto out_add; /* success */ -+ -+ p = a + op->dy_type; -+ key = kzalloc(p->sz, GFP_NOFS); -+ if (unlikely(!key)) { -+ key = ERR_PTR(-ENOMEM); -+ goto out; -+ } -+ -+ key->dk_op.dy_hop = op->dy_hop; -+ kref_init(&key->dk_kref); -+ p->set(key, op->dy_hop, au_br_sb(br)); -+ old = dy_gadd(spl, key); -+ if (old) { -+ kfree(key); -+ key = old; -+ } -+ -+out_add: -+ old = dy_bradd(br, key); -+ if (old) -+ /* its ref-count should never be zero here */ -+ kref_put(&key->dk_kref, dy_bug); -+out: -+ return key; -+} -+ -+/* ---------------------------------------------------------------------- */ -+/* -+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it. -+ * This behaviour is neccessary to return an error from open(O_DIRECT) instead -+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes -+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error. -+ * See the aufs manual in detail. -+ * -+ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the -+ * performance of fadvise() and madvise() may be affected. -+ */ -+static void dy_adx(struct au_dyaop *dyaop, int do_dx) -+{ -+ if (!do_dx) { -+ dyaop->da_op.direct_IO = NULL; -+ dyaop->da_op.get_xip_mem = NULL; -+ } else { -+ dyaop->da_op.direct_IO = aufs_aop.direct_IO; -+ dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem; -+ if (!dyaop->da_get_xip_mem) -+ dyaop->da_op.get_xip_mem = NULL; -+ } -+} -+ -+static struct au_dyaop *dy_aget(struct au_branch *br, -+ const struct address_space_operations *h_aop, -+ int do_dx) -+{ -+ struct au_dyaop *dyaop; -+ struct au_dynop op; -+ -+ op.dy_type = AuDy_AOP; -+ op.dy_haop = h_aop; -+ dyaop = (void *)dy_get(&op, br); -+ if (IS_ERR(dyaop)) -+ goto out; -+ dy_adx(dyaop, do_dx); -+ -+out: -+ return dyaop; -+} -+ -+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode) -+{ -+ int err, do_dx; -+ struct super_block *sb; -+ struct au_branch *br; -+ struct au_dyaop *dyaop; -+ -+ AuDebugOn(!S_ISREG(h_inode->i_mode)); -+ IiMustWriteLock(inode); -+ -+ sb = inode->i_sb; -+ br = au_sbr(sb, bindex); -+ do_dx = !!au_opt_test(au_mntflags(sb), DIO); -+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx); -+ err = PTR_ERR(dyaop); -+ if (IS_ERR(dyaop)) -+ /* unnecessary to call dy_fput() */ -+ goto out; -+ -+ err = 0; -+ inode->i_mapping->a_ops = &dyaop->da_op; -+ -+out: -+ return err; -+} -+ -+/* -+ * Is it safe to replace a_ops during the inode/file is in operation? -+ * Yes, I hope so. -+ */ -+int au_dy_irefresh(struct inode *inode) -+{ -+ int err; -+ aufs_bindex_t bstart; -+ struct inode *h_inode; -+ -+ err = 0; -+ if (S_ISREG(inode->i_mode)) { -+ bstart = au_ibstart(inode); -+ h_inode = au_h_iptr(inode, bstart); -+ err = au_dy_iaop(inode, bstart, h_inode); -+ } -+ return err; -+} -+ -+void au_dy_arefresh(int do_dx) -+{ -+ struct au_splhead *spl; -+ struct list_head *head; -+ struct au_dykey *key; -+ -+ spl = dynop + AuDy_AOP; -+ head = &spl->head; -+ spin_lock(&spl->spin); -+ list_for_each_entry(key, head, dk_list) -+ dy_adx((void *)key, do_dx); -+ spin_unlock(&spl->spin); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void __init au_dy_init(void) -+{ -+ int i; -+ -+ /* make sure that 'struct au_dykey *' can be any type */ -+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key)); -+ -+ for (i = 0; i < AuDyLast; i++) -+ au_spl_init(dynop + i); -+} -+ -+void au_dy_fin(void) -+{ -+ int i; -+ -+ for (i = 0; i < AuDyLast; i++) -+ WARN_ON(!list_empty(&dynop[i].head)); -+} -diff --git fs/aufs/dynop.h fs/aufs/dynop.h -new file mode 100644 -index 0000000..17c9eaa ---- /dev/null -+++ fs/aufs/dynop.h -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2010-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * dynamically customizable operations (for regular files only) -+ */ -+ -+#ifndef __AUFS_DYNOP_H__ -+#define __AUFS_DYNOP_H__ -+ -+#ifdef __KERNEL__ -+ -+#include "inode.h" -+ -+enum {AuDy_AOP, AuDyLast}; -+ -+struct au_dynop { -+ int dy_type; -+ union { -+ const void *dy_hop; -+ const struct address_space_operations *dy_haop; -+ }; -+}; -+ -+struct au_dykey { -+ union { -+ struct list_head dk_list; -+ struct rcu_head dk_rcu; -+ }; -+ struct au_dynop dk_op; -+ -+ /* -+ * during I am in the branch local array, kref is gotten. when the -+ * branch is removed, kref is put. -+ */ -+ struct kref dk_kref; -+}; -+ -+/* stop unioning since their sizes are very different from each other */ -+struct au_dyaop { -+ struct au_dykey da_key; -+ struct address_space_operations da_op; /* not const */ -+ int (*da_get_xip_mem)(struct address_space *, pgoff_t, int, -+ void **, unsigned long *); -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* dynop.c */ -+struct au_branch; -+void au_dy_put(struct au_dykey *key); -+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode); -+int au_dy_irefresh(struct inode *inode); -+void au_dy_arefresh(int do_dio); -+ -+void __init au_dy_init(void); -+void au_dy_fin(void); -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_DYNOP_H__ */ -diff --git fs/aufs/export.c fs/aufs/export.c -new file mode 100644 -index 0000000..b7ce9c1 ---- /dev/null -+++ fs/aufs/export.c -@@ -0,0 +1,818 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * export via nfs -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "../fs/mount.h" -+#include "aufs.h" -+ -+union conv { -+#ifdef CONFIG_AUFS_INO_T_64 -+ __u32 a[2]; -+#else -+ __u32 a[1]; -+#endif -+ ino_t ino; -+}; -+ -+static ino_t decode_ino(__u32 *a) -+{ -+ union conv u; -+ -+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a)); -+ u.a[0] = a[0]; -+#ifdef CONFIG_AUFS_INO_T_64 -+ u.a[1] = a[1]; -+#endif -+ return u.ino; -+} -+ -+static void encode_ino(__u32 *a, ino_t ino) -+{ -+ union conv u; -+ -+ u.ino = ino; -+ a[0] = u.a[0]; -+#ifdef CONFIG_AUFS_INO_T_64 -+ a[1] = u.a[1]; -+#endif -+} -+ -+/* NFS file handle */ -+enum { -+ Fh_br_id, -+ Fh_sigen, -+#ifdef CONFIG_AUFS_INO_T_64 -+ /* support 64bit inode number */ -+ Fh_ino1, -+ Fh_ino2, -+ Fh_dir_ino1, -+ Fh_dir_ino2, -+#else -+ Fh_ino1, -+ Fh_dir_ino1, -+#endif -+ Fh_igen, -+ Fh_h_type, -+ Fh_tail, -+ -+ Fh_ino = Fh_ino1, -+ Fh_dir_ino = Fh_dir_ino1 -+}; -+ -+static int au_test_anon(struct dentry *dentry) -+{ -+ /* note: read d_flags without d_lock */ -+ return !!(dentry->d_flags & DCACHE_DISCONNECTED); -+} -+ -+int au_test_nfsd(void) -+{ -+ int ret; -+ struct task_struct *tsk = current; -+ char comm[sizeof(tsk->comm)]; -+ -+ ret = 0; -+ if (tsk->flags & PF_KTHREAD) { -+ get_task_comm(comm, tsk); -+ ret = !strcmp(comm, "nfsd"); -+ } -+ -+ return ret; -+} -+ -+/* ---------------------------------------------------------------------- */ -+/* inode generation external table */ -+ -+void au_xigen_inc(struct inode *inode) -+{ -+ loff_t pos; -+ ssize_t sz; -+ __u32 igen; -+ struct super_block *sb; -+ struct au_sbinfo *sbinfo; -+ -+ sb = inode->i_sb; -+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO)); -+ -+ sbinfo = au_sbi(sb); -+ pos = inode->i_ino; -+ pos *= sizeof(igen); -+ igen = inode->i_generation + 1; -+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen, -+ sizeof(igen), &pos); -+ if (sz == sizeof(igen)) -+ return; /* success */ -+ -+ if (unlikely(sz >= 0)) -+ AuIOErr("xigen error (%zd)\n", sz); -+} -+ -+int au_xigen_new(struct inode *inode) -+{ -+ int err; -+ loff_t pos; -+ ssize_t sz; -+ struct super_block *sb; -+ struct au_sbinfo *sbinfo; -+ struct file *file; -+ -+ err = 0; -+ /* todo: dirty, at mount time */ -+ if (inode->i_ino == AUFS_ROOT_INO) -+ goto out; -+ sb = inode->i_sb; -+ SiMustAnyLock(sb); -+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO))) -+ goto out; -+ -+ err = -EFBIG; -+ pos = inode->i_ino; -+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) { -+ AuIOErr1("too large i%lld\n", pos); -+ goto out; -+ } -+ pos *= sizeof(inode->i_generation); -+ -+ err = 0; -+ sbinfo = au_sbi(sb); -+ file = sbinfo->si_xigen; -+ BUG_ON(!file); -+ -+ if (vfsub_f_size_read(file) -+ < pos + sizeof(inode->i_generation)) { -+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next); -+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation, -+ sizeof(inode->i_generation), &pos); -+ } else -+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation, -+ sizeof(inode->i_generation), &pos); -+ if (sz == sizeof(inode->i_generation)) -+ goto out; /* success */ -+ -+ err = sz; -+ if (unlikely(sz >= 0)) { -+ err = -EIO; -+ AuIOErr("xigen error (%zd)\n", sz); -+ } -+ -+out: -+ return err; -+} -+ -+int au_xigen_set(struct super_block *sb, struct file *base) -+{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ struct file *file; -+ -+ SiMustWriteLock(sb); -+ -+ sbinfo = au_sbi(sb); -+ file = au_xino_create2(base, sbinfo->si_xigen); -+ err = PTR_ERR(file); -+ if (IS_ERR(file)) -+ goto out; -+ err = 0; -+ if (sbinfo->si_xigen) -+ fput(sbinfo->si_xigen); -+ sbinfo->si_xigen = file; -+ -+out: -+ return err; -+} -+ -+void au_xigen_clr(struct super_block *sb) -+{ -+ struct au_sbinfo *sbinfo; -+ -+ SiMustWriteLock(sb); -+ -+ sbinfo = au_sbi(sb); -+ if (sbinfo->si_xigen) { -+ fput(sbinfo->si_xigen); -+ sbinfo->si_xigen = NULL; -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino, -+ ino_t dir_ino) -+{ -+ struct dentry *dentry, *d; -+ struct inode *inode; -+ unsigned int sigen; -+ -+ dentry = NULL; -+ inode = ilookup(sb, ino); -+ if (!inode) -+ goto out; -+ -+ dentry = ERR_PTR(-ESTALE); -+ sigen = au_sigen(sb); -+ if (unlikely(is_bad_inode(inode) -+ || IS_DEADDIR(inode) -+ || sigen != au_iigen(inode, NULL))) -+ goto out_iput; -+ -+ dentry = NULL; -+ if (!dir_ino || S_ISDIR(inode->i_mode)) -+ dentry = d_find_alias(inode); -+ else { -+ spin_lock(&inode->i_lock); -+ hlist_for_each_entry(d, &inode->i_dentry, d_alias) { -+ spin_lock(&d->d_lock); -+ if (!au_test_anon(d) -+ && d->d_parent->d_inode->i_ino == dir_ino) { -+ dentry = dget_dlock(d); -+ spin_unlock(&d->d_lock); -+ break; -+ } -+ spin_unlock(&d->d_lock); -+ } -+ spin_unlock(&inode->i_lock); -+ } -+ if (unlikely(dentry && au_digen_test(dentry, sigen))) { -+ /* need to refresh */ -+ dput(dentry); -+ dentry = NULL; -+ } -+ -+out_iput: -+ iput(inode); -+out: -+ AuTraceErrPtr(dentry); -+ return dentry; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* todo: dirty? */ -+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */ -+ -+struct au_compare_mnt_args { -+ /* input */ -+ struct super_block *sb; -+ -+ /* output */ -+ struct vfsmount *mnt; -+}; -+ -+static int au_compare_mnt(struct vfsmount *mnt, void *arg) -+{ -+ struct au_compare_mnt_args *a = arg; -+ -+ if (mnt->mnt_sb != a->sb) -+ return 0; -+ a->mnt = mntget(mnt); -+ return 1; -+} -+ -+static struct vfsmount *au_mnt_get(struct super_block *sb) -+{ -+ int err; -+ struct path root; -+ struct au_compare_mnt_args args = { -+ .sb = sb -+ }; -+ -+ get_fs_root(current->fs, &root); -+ rcu_read_lock(); -+ err = iterate_mounts(au_compare_mnt, &args, root.mnt); -+ rcu_read_unlock(); -+ path_put(&root); -+ AuDebugOn(!err); -+ AuDebugOn(!args.mnt); -+ return args.mnt; -+} -+ -+struct au_nfsd_si_lock { -+ unsigned int sigen; -+ aufs_bindex_t bindex, br_id; -+ unsigned char force_lock; -+}; -+ -+static int si_nfsd_read_lock(struct super_block *sb, -+ struct au_nfsd_si_lock *nsi_lock) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ -+ si_read_lock(sb, AuLock_FLUSH); -+ -+ /* branch id may be wrapped around */ -+ err = 0; -+ bindex = au_br_index(sb, nsi_lock->br_id); -+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb)) -+ goto out; /* success */ -+ -+ err = -ESTALE; -+ bindex = -1; -+ if (!nsi_lock->force_lock) -+ si_read_unlock(sb); -+ -+out: -+ nsi_lock->bindex = bindex; -+ return err; -+} -+ -+struct find_name_by_ino { -+ struct dir_context ctx; -+ int called, found; -+ ino_t ino; -+ char *name; -+ int namelen; -+}; -+ -+static int -+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen, -+ loff_t offset, u64 ino, unsigned int d_type) -+{ -+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino, -+ ctx); -+ -+ a->called++; -+ if (a->ino != ino) -+ return 0; -+ -+ memcpy(a->name, name, namelen); -+ a->namelen = namelen; -+ a->found = 1; -+ return 1; -+} -+ -+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino, -+ struct au_nfsd_si_lock *nsi_lock) -+{ -+ struct dentry *dentry, *parent; -+ struct file *file; -+ struct inode *dir; -+ struct find_name_by_ino arg = { -+ .ctx = { -+ .actor = au_diractor(find_name_by_ino) -+ } -+ }; -+ int err; -+ -+ parent = path->dentry; -+ if (nsi_lock) -+ si_read_unlock(parent->d_sb); -+ file = vfsub_dentry_open(path, au_dir_roflags); -+ dentry = (void *)file; -+ if (IS_ERR(file)) -+ goto out; -+ -+ dentry = ERR_PTR(-ENOMEM); -+ arg.name = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!arg.name)) -+ goto out_file; -+ arg.ino = ino; -+ arg.found = 0; -+ do { -+ arg.called = 0; -+ /* smp_mb(); */ -+ err = vfsub_iterate_dir(file, &arg.ctx); -+ } while (!err && !arg.found && arg.called); -+ dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_name; -+ /* instead of ENOENT */ -+ dentry = ERR_PTR(-ESTALE); -+ if (!arg.found) -+ goto out_name; -+ -+ /* do not call vfsub_lkup_one() */ -+ dir = parent->d_inode; -+ mutex_lock(&dir->i_mutex); -+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen); -+ mutex_unlock(&dir->i_mutex); -+ AuTraceErrPtr(dentry); -+ if (IS_ERR(dentry)) -+ goto out_name; -+ AuDebugOn(au_test_anon(dentry)); -+ if (unlikely(!dentry->d_inode)) { -+ dput(dentry); -+ dentry = ERR_PTR(-ENOENT); -+ } -+ -+out_name: -+ free_page((unsigned long)arg.name); -+out_file: -+ fput(file); -+out: -+ if (unlikely(nsi_lock -+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0)) -+ if (!IS_ERR(dentry)) { -+ dput(dentry); -+ dentry = ERR_PTR(-ESTALE); -+ } -+ AuTraceErrPtr(dentry); -+ return dentry; -+} -+ -+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino, -+ ino_t dir_ino, -+ struct au_nfsd_si_lock *nsi_lock) -+{ -+ struct dentry *dentry; -+ struct path path; -+ -+ if (dir_ino != AUFS_ROOT_INO) { -+ path.dentry = decode_by_ino(sb, dir_ino, 0); -+ dentry = path.dentry; -+ if (!path.dentry || IS_ERR(path.dentry)) -+ goto out; -+ AuDebugOn(au_test_anon(path.dentry)); -+ } else -+ path.dentry = dget(sb->s_root); -+ -+ path.mnt = au_mnt_get(sb); -+ dentry = au_lkup_by_ino(&path, ino, nsi_lock); -+ path_put(&path); -+ -+out: -+ AuTraceErrPtr(dentry); -+ return dentry; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int h_acceptable(void *expv, struct dentry *dentry) -+{ -+ return 1; -+} -+ -+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath, -+ char *buf, int len, struct super_block *sb) -+{ -+ char *p; -+ int n; -+ struct path path; -+ -+ p = d_path(h_rootpath, buf, len); -+ if (IS_ERR(p)) -+ goto out; -+ n = strlen(p); -+ -+ path.mnt = h_rootpath->mnt; -+ path.dentry = h_parent; -+ p = d_path(&path, buf, len); -+ if (IS_ERR(p)) -+ goto out; -+ if (n != 1) -+ p += n; -+ -+ path.mnt = au_mnt_get(sb); -+ path.dentry = sb->s_root; -+ p = d_path(&path, buf, len - strlen(p)); -+ mntput(path.mnt); -+ if (IS_ERR(p)) -+ goto out; -+ if (n != 1) -+ p[strlen(p)] = '/'; -+ -+out: -+ AuTraceErrPtr(p); -+ return p; -+} -+ -+static -+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh, -+ int fh_len, struct au_nfsd_si_lock *nsi_lock) -+{ -+ struct dentry *dentry, *h_parent, *root; -+ struct super_block *h_sb; -+ char *pathname, *p; -+ struct vfsmount *h_mnt; -+ struct au_branch *br; -+ int err; -+ struct path path; -+ -+ br = au_sbr(sb, nsi_lock->bindex); -+ h_mnt = au_br_mnt(br); -+ h_sb = h_mnt->mnt_sb; -+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */ -+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail), -+ fh_len - Fh_tail, fh[Fh_h_type], -+ h_acceptable, /*context*/NULL); -+ dentry = h_parent; -+ if (unlikely(!h_parent || IS_ERR(h_parent))) { -+ AuWarn1("%s decode_fh failed, %ld\n", -+ au_sbtype(h_sb), PTR_ERR(h_parent)); -+ goto out; -+ } -+ dentry = NULL; -+ if (unlikely(au_test_anon(h_parent))) { -+ AuWarn1("%s decode_fh returned a disconnected dentry\n", -+ au_sbtype(h_sb)); -+ goto out_h_parent; -+ } -+ -+ dentry = ERR_PTR(-ENOMEM); -+ pathname = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!pathname)) -+ goto out_h_parent; -+ -+ root = sb->s_root; -+ path.mnt = h_mnt; -+ di_read_lock_parent(root, !AuLock_IR); -+ path.dentry = au_h_dptr(root, nsi_lock->bindex); -+ di_read_unlock(root, !AuLock_IR); -+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb); -+ dentry = (void *)p; -+ if (IS_ERR(p)) -+ goto out_pathname; -+ -+ si_read_unlock(sb); -+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); -+ dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_relock; -+ -+ dentry = ERR_PTR(-ENOENT); -+ AuDebugOn(au_test_anon(path.dentry)); -+ if (unlikely(!path.dentry->d_inode)) -+ goto out_path; -+ -+ if (ino != path.dentry->d_inode->i_ino) -+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL); -+ else -+ dentry = dget(path.dentry); -+ -+out_path: -+ path_put(&path); -+out_relock: -+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0)) -+ if (!IS_ERR(dentry)) { -+ dput(dentry); -+ dentry = ERR_PTR(-ESTALE); -+ } -+out_pathname: -+ free_page((unsigned long)pathname); -+out_h_parent: -+ dput(h_parent); -+out: -+ AuTraceErrPtr(dentry); -+ return dentry; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct dentry * -+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, -+ int fh_type) -+{ -+ struct dentry *dentry; -+ __u32 *fh = fid->raw; -+ struct au_branch *br; -+ ino_t ino, dir_ino; -+ struct au_nfsd_si_lock nsi_lock = { -+ .force_lock = 0 -+ }; -+ -+ dentry = ERR_PTR(-ESTALE); -+ /* it should never happen, but the file handle is unreliable */ -+ if (unlikely(fh_len < Fh_tail)) -+ goto out; -+ nsi_lock.sigen = fh[Fh_sigen]; -+ nsi_lock.br_id = fh[Fh_br_id]; -+ -+ /* branch id may be wrapped around */ -+ br = NULL; -+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock))) -+ goto out; -+ nsi_lock.force_lock = 1; -+ -+ /* is this inode still cached? */ -+ ino = decode_ino(fh + Fh_ino); -+ /* it should never happen */ -+ if (unlikely(ino == AUFS_ROOT_INO)) -+ goto out; -+ -+ dir_ino = decode_ino(fh + Fh_dir_ino); -+ dentry = decode_by_ino(sb, ino, dir_ino); -+ if (IS_ERR(dentry)) -+ goto out_unlock; -+ if (dentry) -+ goto accept; -+ -+ /* is the parent dir cached? */ -+ br = au_sbr(sb, nsi_lock.bindex); -+ atomic_inc(&br->br_count); -+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock); -+ if (IS_ERR(dentry)) -+ goto out_unlock; -+ if (dentry) -+ goto accept; -+ -+ /* lookup path */ -+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock); -+ if (IS_ERR(dentry)) -+ goto out_unlock; -+ if (unlikely(!dentry)) -+ /* todo?: make it ESTALE */ -+ goto out_unlock; -+ -+accept: -+ if (!au_digen_test(dentry, au_sigen(sb)) -+ && dentry->d_inode->i_generation == fh[Fh_igen]) -+ goto out_unlock; /* success */ -+ -+ dput(dentry); -+ dentry = ERR_PTR(-ESTALE); -+out_unlock: -+ if (br) -+ atomic_dec(&br->br_count); -+ si_read_unlock(sb); -+out: -+ AuTraceErrPtr(dentry); -+ return dentry; -+} -+ -+#if 0 /* reserved for future use */ -+/* support subtreecheck option */ -+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid, -+ int fh_len, int fh_type) -+{ -+ struct dentry *parent; -+ __u32 *fh = fid->raw; -+ ino_t dir_ino; -+ -+ dir_ino = decode_ino(fh + Fh_dir_ino); -+ parent = decode_by_ino(sb, dir_ino, 0); -+ if (IS_ERR(parent)) -+ goto out; -+ if (!parent) -+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]), -+ dir_ino, fh, fh_len); -+ -+out: -+ AuTraceErrPtr(parent); -+ return parent; -+} -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, -+ struct inode *dir) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ struct super_block *sb, *h_sb; -+ struct dentry *dentry, *parent, *h_parent; -+ struct inode *h_dir; -+ struct au_branch *br; -+ -+ err = -ENOSPC; -+ if (unlikely(*max_len <= Fh_tail)) { -+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len); -+ goto out; -+ } -+ -+ err = FILEID_ROOT; -+ if (inode->i_ino == AUFS_ROOT_INO) { -+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO); -+ goto out; -+ } -+ -+ h_parent = NULL; -+ sb = inode->i_sb; -+ err = si_read_lock(sb, AuLock_FLUSH); -+ if (unlikely(err)) -+ goto out; -+ -+#ifdef CONFIG_AUFS_DEBUG -+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO))) -+ AuWarn1("NFS-exporting requires xino\n"); -+#endif -+ err = -EIO; -+ parent = NULL; -+ ii_read_lock_child(inode); -+ bindex = au_ibstart(inode); -+ if (!dir) { -+ dentry = d_find_alias(inode); -+ if (unlikely(!dentry)) -+ goto out_unlock; -+ AuDebugOn(au_test_anon(dentry)); -+ parent = dget_parent(dentry); -+ dput(dentry); -+ if (unlikely(!parent)) -+ goto out_unlock; -+ dir = parent->d_inode; -+ } -+ -+ ii_read_lock_parent(dir); -+ h_dir = au_h_iptr(dir, bindex); -+ ii_read_unlock(dir); -+ if (unlikely(!h_dir)) -+ goto out_parent; -+ h_parent = d_find_alias(h_dir); -+ if (unlikely(!h_parent)) -+ goto out_hparent; -+ -+ err = -EPERM; -+ br = au_sbr(sb, bindex); -+ h_sb = au_br_sb(br); -+ if (unlikely(!h_sb->s_export_op)) { -+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb)); -+ goto out_hparent; -+ } -+ -+ fh[Fh_br_id] = br->br_id; -+ fh[Fh_sigen] = au_sigen(sb); -+ encode_ino(fh + Fh_ino, inode->i_ino); -+ encode_ino(fh + Fh_dir_ino, dir->i_ino); -+ fh[Fh_igen] = inode->i_generation; -+ -+ *max_len -= Fh_tail; -+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail), -+ max_len, -+ /*connectable or subtreecheck*/0); -+ err = fh[Fh_h_type]; -+ *max_len += Fh_tail; -+ /* todo: macros? */ -+ if (err != FILEID_INVALID) -+ err = 99; -+ else -+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb)); -+ -+out_hparent: -+ dput(h_parent); -+out_parent: -+ dput(parent); -+out_unlock: -+ ii_read_unlock(inode); -+ si_read_unlock(sb); -+out: -+ if (unlikely(err < 0)) -+ err = FILEID_INVALID; -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int aufs_commit_metadata(struct inode *inode) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ struct super_block *sb; -+ struct inode *h_inode; -+ int (*f)(struct inode *inode); -+ -+ sb = inode->i_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ ii_write_lock_child(inode); -+ bindex = au_ibstart(inode); -+ AuDebugOn(bindex < 0); -+ h_inode = au_h_iptr(inode, bindex); -+ -+ f = h_inode->i_sb->s_export_op->commit_metadata; -+ if (f) -+ err = f(h_inode); -+ else { -+ struct writeback_control wbc = { -+ .sync_mode = WB_SYNC_ALL, -+ .nr_to_write = 0 /* metadata only */ -+ }; -+ -+ err = sync_inode(h_inode, &wbc); -+ } -+ -+ au_cpup_attr_timesizes(inode); -+ ii_write_unlock(inode); -+ si_read_unlock(sb); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct export_operations aufs_export_op = { -+ .fh_to_dentry = aufs_fh_to_dentry, -+ /* .fh_to_parent = aufs_fh_to_parent, */ -+ .encode_fh = aufs_encode_fh, -+ .commit_metadata = aufs_commit_metadata -+}; -+ -+void au_export_init(struct super_block *sb) -+{ -+ struct au_sbinfo *sbinfo; -+ __u32 u; -+ -+ sb->s_export_op = &aufs_export_op; -+ sbinfo = au_sbi(sb); -+ sbinfo->si_xigen = NULL; -+ get_random_bytes(&u, sizeof(u)); -+ BUILD_BUG_ON(sizeof(u) != sizeof(int)); -+ atomic_set(&sbinfo->si_xigen_next, u); -+} -diff --git fs/aufs/f_op.c fs/aufs/f_op.c -new file mode 100644 -index 0000000..510919e ---- /dev/null -+++ fs/aufs/f_op.c -@@ -0,0 +1,769 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * file and vm operations -+ */ -+ -+#include -+#include -+#include -+#include -+#include "aufs.h" -+ -+int au_do_open_nondir(struct file *file, int flags) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ struct file *h_file; -+ struct dentry *dentry; -+ struct au_finfo *finfo; -+ struct inode *h_inode; -+ -+ FiMustWriteLock(file); -+ -+ err = 0; -+ dentry = file->f_dentry; -+ finfo = au_fi(file); -+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop)); -+ atomic_set(&finfo->fi_mmapped, 0); -+ bindex = au_dbstart(dentry); -+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0); -+ if (IS_ERR(h_file)) -+ err = PTR_ERR(h_file); -+ else { -+ if ((flags & __O_TMPFILE) -+ && !(flags & O_EXCL)) { -+ h_inode = file_inode(h_file); -+ spin_lock(&h_inode->i_lock); -+ h_inode->i_state |= I_LINKABLE; -+ spin_unlock(&h_inode->i_lock); -+ } -+ au_set_fbstart(file, bindex); -+ au_set_h_fptr(file, bindex, h_file); -+ au_update_figen(file); -+ finfo->fi_file = file; -+ au_sphl_add(&finfo->fi_hlist, &au_sbi(dentry->d_sb)->si_files); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ } -+ -+ return err; -+} -+ -+static int aufs_open_nondir(struct inode *inode __maybe_unused, -+ struct file *file) -+{ -+ int err; -+ struct super_block *sb; -+ -+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n", -+ file, vfsub_file_flags(file), file->f_mode); -+ -+ sb = file->f_dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ err = au_do_open(file, au_do_open_nondir, /*fidir*/NULL); -+ si_read_unlock(sb); -+ return err; -+} -+ -+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file) -+{ -+ struct au_finfo *finfo; -+ aufs_bindex_t bindex; -+ -+ finfo = au_fi(file); -+ au_sphl_del(&finfo->fi_hlist, &au_sbi(file->f_dentry->d_sb)->si_files); -+ bindex = finfo->fi_btop; -+ if (bindex >= 0) -+ au_set_h_fptr(file, bindex, NULL); -+ -+ au_finfo_fin(file); -+ return 0; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_do_flush_nondir(struct file *file, fl_owner_t id) -+{ -+ int err; -+ struct file *h_file; -+ -+ err = 0; -+ h_file = au_hf_top(file); -+ if (h_file) -+ err = vfsub_flush(h_file, id); -+ return err; -+} -+ -+static int aufs_flush_nondir(struct file *file, fl_owner_t id) -+{ -+ return au_do_flush(file, id, au_do_flush_nondir); -+} -+ -+/* ---------------------------------------------------------------------- */ -+/* -+ * read and write functions acquire [fdi]_rwsem once, but release before -+ * mmap_sem. This is because to stop a race condition between mmap(2). -+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping -+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in -+ * read functions after [fdi]_rwsem are released, but it should be harmless. -+ */ -+ -+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count, -+ loff_t *ppos) -+{ -+ ssize_t err; -+ struct dentry *dentry; -+ struct file *h_file; -+ struct super_block *sb; -+ -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); -+ if (unlikely(err)) -+ goto out; -+ -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ di_read_unlock(dentry, AuLock_IR); -+ fi_read_unlock(file); -+ -+ /* filedata may be obsoleted by concurrent copyup, but no problem */ -+ err = vfsub_read_u(h_file, buf, count, ppos); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ /* update without lock, I don't think it a problem */ -+ fsstack_copy_attr_atime(dentry->d_inode, file_inode(h_file)); -+ fput(h_file); -+ -+out: -+ si_read_unlock(sb); -+ return err; -+} -+ -+/* -+ * todo: very ugly -+ * it locks both of i_mutex and si_rwsem for read in safe. -+ * if the plink maintenance mode continues forever (that is the problem), -+ * may loop forever. -+ */ -+static void au_mtx_and_read_lock(struct inode *inode) -+{ -+ int err; -+ struct super_block *sb = inode->i_sb; -+ -+ while (1) { -+ mutex_lock(&inode->i_mutex); -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (!err) -+ break; -+ mutex_unlock(&inode->i_mutex); -+ si_read_lock(sb, AuLock_NOPLMW); -+ si_read_unlock(sb); -+ } -+} -+ -+static ssize_t aufs_write(struct file *file, const char __user *ubuf, -+ size_t count, loff_t *ppos) -+{ -+ ssize_t err; -+ struct au_pin pin; -+ struct dentry *dentry; -+ struct super_block *sb; -+ struct inode *inode; -+ struct file *h_file; -+ char __user *buf = (char __user *)ubuf; -+ -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ au_mtx_and_read_lock(inode); -+ -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) { -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ goto out; -+ } -+ -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ au_unpin(&pin); -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ -+ err = vfsub_write_u(h_file, buf, count, ppos); -+ ii_write_lock_child(inode); -+ au_cpup_attr_timesizes(inode); -+ inode->i_mode = file_inode(h_file)->i_mode; -+ ii_write_unlock(inode); -+ fput(h_file); -+ -+out: -+ si_read_unlock(sb); -+ mutex_unlock(&inode->i_mutex); -+ return err; -+} -+ -+static ssize_t au_do_aio(struct file *h_file, int rw, struct kiocb *kio, -+ const struct iovec *iov, unsigned long nv, loff_t pos) -+{ -+ ssize_t err; -+ struct file *file; -+ ssize_t (*func)(struct kiocb *, const struct iovec *, unsigned long, -+ loff_t); -+ -+ err = security_file_permission(h_file, rw); -+ if (unlikely(err)) -+ goto out; -+ -+ err = -ENOSYS; -+ func = NULL; -+ if (rw == MAY_READ) -+ func = h_file->f_op->aio_read; -+ else if (rw == MAY_WRITE) -+ func = h_file->f_op->aio_write; -+ if (func) { -+ file = kio->ki_filp; -+ kio->ki_filp = h_file; -+ lockdep_off(); -+ err = func(kio, iov, nv, pos); -+ lockdep_on(); -+ kio->ki_filp = file; -+ } else -+ /* currently there is no such fs */ -+ WARN_ON_ONCE(1); -+ -+out: -+ return err; -+} -+ -+static ssize_t aufs_aio_read(struct kiocb *kio, const struct iovec *iov, -+ unsigned long nv, loff_t pos) -+{ -+ ssize_t err; -+ struct file *file, *h_file; -+ struct dentry *dentry; -+ struct super_block *sb; -+ -+ file = kio->ki_filp; -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); -+ if (unlikely(err)) -+ goto out; -+ -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ di_read_unlock(dentry, AuLock_IR); -+ fi_read_unlock(file); -+ -+ err = au_do_aio(h_file, MAY_READ, kio, iov, nv, pos); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ /* update without lock, I don't think it a problem */ -+ fsstack_copy_attr_atime(dentry->d_inode, file_inode(h_file)); -+ fput(h_file); -+ -+out: -+ si_read_unlock(sb); -+ return err; -+} -+ -+static ssize_t aufs_aio_write(struct kiocb *kio, const struct iovec *iov, -+ unsigned long nv, loff_t pos) -+{ -+ ssize_t err; -+ struct au_pin pin; -+ struct dentry *dentry; -+ struct inode *inode; -+ struct file *file, *h_file; -+ struct super_block *sb; -+ -+ file = kio->ki_filp; -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ au_mtx_and_read_lock(inode); -+ -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) { -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ goto out; -+ } -+ -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ au_unpin(&pin); -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ -+ err = au_do_aio(h_file, MAY_WRITE, kio, iov, nv, pos); -+ ii_write_lock_child(inode); -+ au_cpup_attr_timesizes(inode); -+ inode->i_mode = file_inode(h_file)->i_mode; -+ ii_write_unlock(inode); -+ fput(h_file); -+ -+out: -+ si_read_unlock(sb); -+ mutex_unlock(&inode->i_mutex); -+ return err; -+} -+ -+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos, -+ struct pipe_inode_info *pipe, size_t len, -+ unsigned int flags) -+{ -+ ssize_t err; -+ struct file *h_file; -+ struct dentry *dentry; -+ struct super_block *sb; -+ -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); -+ if (unlikely(err)) -+ goto out; -+ -+ err = -EINVAL; -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ if (au_test_loopback_kthread()) { -+ au_warn_loopback(h_file->f_dentry->d_sb); -+ if (file->f_mapping != h_file->f_mapping) { -+ file->f_mapping = h_file->f_mapping; -+ smp_mb(); /* unnecessary? */ -+ } -+ } -+ di_read_unlock(dentry, AuLock_IR); -+ fi_read_unlock(file); -+ -+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags); -+ /* todo: necessasry? */ -+ /* file->f_ra = h_file->f_ra; */ -+ /* update without lock, I don't think it a problem */ -+ fsstack_copy_attr_atime(dentry->d_inode, file_inode(h_file)); -+ fput(h_file); -+ -+out: -+ si_read_unlock(sb); -+ return err; -+} -+ -+static ssize_t -+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos, -+ size_t len, unsigned int flags) -+{ -+ ssize_t err; -+ struct au_pin pin; -+ struct dentry *dentry; -+ struct inode *inode; -+ struct file *h_file; -+ struct super_block *sb; -+ -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ au_mtx_and_read_lock(inode); -+ -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) { -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ goto out; -+ } -+ -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ au_unpin(&pin); -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ -+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags); -+ ii_write_lock_child(inode); -+ au_cpup_attr_timesizes(inode); -+ inode->i_mode = file_inode(h_file)->i_mode; -+ ii_write_unlock(inode); -+ fput(h_file); -+ -+out: -+ si_read_unlock(sb); -+ mutex_unlock(&inode->i_mutex); -+ return err; -+} -+ -+static long aufs_fallocate(struct file *file, int mode, loff_t offset, -+ loff_t len) -+{ -+ long err; -+ struct au_pin pin; -+ struct dentry *dentry; -+ struct super_block *sb; -+ struct inode *inode; -+ struct file *h_file; -+ -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ au_mtx_and_read_lock(inode); -+ -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) { -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ goto out; -+ } -+ -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ au_unpin(&pin); -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+ -+ lockdep_off(); -+ err = do_fallocate(h_file, mode, offset, len); -+ lockdep_on(); -+ ii_write_lock_child(inode); -+ au_cpup_attr_timesizes(inode); -+ inode->i_mode = file_inode(h_file)->i_mode; -+ ii_write_unlock(inode); -+ fput(h_file); -+ -+out: -+ si_read_unlock(sb); -+ mutex_unlock(&inode->i_mutex); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * The locking order around current->mmap_sem. -+ * - in most and regular cases -+ * file I/O syscall -- aufs_read() or something -+ * -- si_rwsem for read -- mmap_sem -+ * (Note that [fdi]i_rwsem are released before mmap_sem). -+ * - in mmap case -+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem -+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for -+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in -+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though. -+ * It means that when aufs acquires si_rwsem for write, the process should never -+ * acquire mmap_sem. -+ * -+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a -+ * problem either since any directory is not able to be mmap-ed. -+ * The similar scenario is applied to aufs_readlink() too. -+ */ -+ -+#if 0 /* stop calling security_file_mmap() */ -+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */ -+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b) -+ -+static unsigned long au_arch_prot_conv(unsigned long flags) -+{ -+ /* currently ppc64 only */ -+#ifdef CONFIG_PPC64 -+ /* cf. linux/arch/powerpc/include/asm/mman.h */ -+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO); -+ return AuConv_VM_PROT(flags, SAO); -+#else -+ AuDebugOn(arch_calc_vm_prot_bits(-1)); -+ return 0; -+#endif -+} -+ -+static unsigned long au_prot_conv(unsigned long flags) -+{ -+ return AuConv_VM_PROT(flags, READ) -+ | AuConv_VM_PROT(flags, WRITE) -+ | AuConv_VM_PROT(flags, EXEC) -+ | au_arch_prot_conv(flags); -+} -+ -+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */ -+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b) -+ -+static unsigned long au_flag_conv(unsigned long flags) -+{ -+ return AuConv_VM_MAP(flags, GROWSDOWN) -+ | AuConv_VM_MAP(flags, DENYWRITE) -+ | AuConv_VM_MAP(flags, LOCKED); -+} -+#endif -+ -+static int aufs_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ int err; -+ aufs_bindex_t bstart; -+ const unsigned char wlock -+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED); -+ struct dentry *dentry; -+ struct super_block *sb; -+ struct file *h_file; -+ struct au_branch *br; -+ struct au_pin pin; -+ -+ AuDbgVmRegion(file, vma); -+ -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ lockdep_off(); -+ si_read_lock(sb, AuLock_NOPLMW); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; -+ -+ if (wlock) { -+ err = au_ready_to_write(file, -1, &pin); -+ di_write_unlock(dentry); -+ if (unlikely(err)) { -+ fi_write_unlock(file); -+ goto out; -+ } -+ au_unpin(&pin); -+ } else -+ di_write_unlock(dentry); -+ -+ bstart = au_fbstart(file); -+ br = au_sbr(sb, bstart); -+ h_file = au_hf_top(file); -+ get_file(h_file); -+ au_set_mmapped(file); -+ fi_write_unlock(file); -+ lockdep_on(); -+ -+ au_vm_file_reset(vma, h_file); -+ /* -+ * we cannot call security_mmap_file() here since it may acquire -+ * mmap_sem or i_mutex. -+ * -+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags), -+ * au_flag_conv(vma->vm_flags)); -+ */ -+ if (!err) -+ err = h_file->f_op->mmap(h_file, vma); -+ if (unlikely(err)) -+ goto out_reset; -+ -+ au_vm_prfile_set(vma, file); -+ /* update without lock, I don't think it a problem */ -+ fsstack_copy_attr_atime(file_inode(file), file_inode(h_file)); -+ goto out_fput; /* success */ -+ -+out_reset: -+ au_unset_mmapped(file); -+ au_vm_file_reset(vma, file); -+out_fput: -+ fput(h_file); -+ lockdep_off(); -+out: -+ si_read_unlock(sb); -+ lockdep_on(); -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end, -+ int datasync) -+{ -+ int err; -+ struct au_pin pin; -+ struct dentry *dentry; -+ struct inode *inode; -+ struct file *h_file; -+ struct super_block *sb; -+ -+ dentry = file->f_dentry; -+ inode = dentry->d_inode; -+ sb = dentry->d_sb; -+ mutex_lock(&inode->i_mutex); -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (unlikely(err)) -+ goto out; -+ -+ err = 0; /* -EBADF; */ /* posix? */ -+ if (unlikely(!(file->f_mode & FMODE_WRITE))) -+ goto out_si; -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out_si; -+ -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) -+ goto out_unlock; -+ au_unpin(&pin); -+ -+ err = -EINVAL; -+ h_file = au_hf_top(file); -+ err = vfsub_fsync(h_file, &h_file->f_path, datasync); -+ au_cpup_attr_timesizes(inode); -+ -+out_unlock: -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+out_si: -+ si_read_unlock(sb); -+out: -+ mutex_unlock(&inode->i_mutex); -+ return err; -+} -+ -+/* no one supports this operation, currently */ -+#if 0 -+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync) -+{ -+ int err; -+ struct au_pin pin; -+ struct dentry *dentry; -+ struct inode *inode; -+ struct file *file, *h_file; -+ -+ file = kio->ki_filp; -+ dentry = file->f_dentry; -+ inode = dentry->d_inode; -+ au_mtx_and_read_lock(inode); -+ -+ err = 0; /* -EBADF; */ /* posix? */ -+ if (unlikely(!(file->f_mode & FMODE_WRITE))) -+ goto out; -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_ready_to_write(file, -1, &pin); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) -+ goto out_unlock; -+ au_unpin(&pin); -+ -+ err = -ENOSYS; -+ h_file = au_hf_top(file); -+ if (h_file->f_op->aio_fsync) { -+ struct mutex *h_mtx; -+ -+ h_mtx = &file_inode(h_file)->i_mutex; -+ if (!is_sync_kiocb(kio)) { -+ get_file(h_file); -+ fput(file); -+ } -+ kio->ki_filp = h_file; -+ err = h_file->f_op->aio_fsync(kio, datasync); -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ if (!err) -+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); -+ /*ignore*/ -+ au_cpup_attr_timesizes(inode); -+ mutex_unlock(h_mtx); -+ } -+ -+out_unlock: -+ di_read_unlock(dentry, AuLock_IR); -+ fi_write_unlock(file); -+out: -+ si_read_unlock(inode->sb); -+ mutex_unlock(&inode->i_mutex); -+ return err; -+} -+#endif -+ -+static int aufs_fasync(int fd, struct file *file, int flag) -+{ -+ int err; -+ struct file *h_file; -+ struct dentry *dentry; -+ struct super_block *sb; -+ -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); -+ if (unlikely(err)) -+ goto out; -+ -+ h_file = au_hf_top(file); -+ if (h_file->f_op->fasync) -+ err = h_file->f_op->fasync(fd, h_file, flag); -+ -+ di_read_unlock(dentry, AuLock_IR); -+ fi_read_unlock(file); -+ -+out: -+ si_read_unlock(sb); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* no one supports this operation, currently */ -+#if 0 -+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset, -+ size_t len, loff_t *pos , int more) -+{ -+} -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+const struct file_operations aufs_file_fop = { -+ .owner = THIS_MODULE, -+ -+ .llseek = default_llseek, -+ -+ .read = aufs_read, -+ .write = aufs_write, -+ .aio_read = aufs_aio_read, -+ .aio_write = aufs_aio_write, -+#ifdef CONFIG_AUFS_POLL -+ .poll = aufs_poll, -+#endif -+ .unlocked_ioctl = aufs_ioctl_nondir, -+#ifdef CONFIG_COMPAT -+ .compat_ioctl = aufs_compat_ioctl_nondir, -+#endif -+ .mmap = aufs_mmap, -+ .open = aufs_open_nondir, -+ .flush = aufs_flush_nondir, -+ .release = aufs_release_nondir, -+ .fsync = aufs_fsync_nondir, -+ /* .aio_fsync = aufs_aio_fsync_nondir, */ -+ .fasync = aufs_fasync, -+ /* .sendpage = aufs_sendpage, */ -+ .splice_write = aufs_splice_write, -+ .splice_read = aufs_splice_read, -+#if 0 -+ .aio_splice_write = aufs_aio_splice_write, -+ .aio_splice_read = aufs_aio_splice_read, -+#endif -+ .fallocate = aufs_fallocate -+}; -diff --git fs/aufs/file.c fs/aufs/file.c -new file mode 100644 -index 0000000..13e9691 ---- /dev/null -+++ fs/aufs/file.c -@@ -0,0 +1,796 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * handling file/dir, and address_space operation -+ */ -+ -+#ifdef CONFIG_AUFS_DEBUG -+#include -+#endif -+#include -+#include "aufs.h" -+ -+/* drop flags for writing */ -+unsigned int au_file_roflags(unsigned int flags) -+{ -+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC); -+ flags |= O_RDONLY | O_NOATIME; -+ return flags; -+} -+ -+/* common functions to regular file and dir */ -+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, -+ struct file *file, int force_wr) -+{ -+ struct file *h_file; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ struct super_block *sb; -+ struct au_branch *br; -+ struct path h_path; -+ int err, exec_flag; -+ -+ /* a race condition can happen between open and unlink/rmdir */ -+ h_file = ERR_PTR(-ENOENT); -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (au_test_nfsd() && !h_dentry) -+ goto out; -+ h_inode = h_dentry->d_inode; -+ if (au_test_nfsd() && !h_inode) -+ goto out; -+ spin_lock(&h_dentry->d_lock); -+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry)) -+ || !h_inode -+ /* || !dentry->d_inode->i_nlink */ -+ ; -+ spin_unlock(&h_dentry->d_lock); -+ if (unlikely(err)) -+ goto out; -+ -+ sb = dentry->d_sb; -+ br = au_sbr(sb, bindex); -+ h_file = ERR_PTR(-EACCES); -+ exec_flag = flags & __FMODE_EXEC; -+ if (exec_flag && (au_br_mnt(br)->mnt_flags & MNT_NOEXEC)) -+ goto out; -+ -+ /* drop flags for writing */ -+ if (au_test_ro(sb, bindex, dentry->d_inode)) { -+ if (force_wr && !(flags & O_WRONLY)) -+ force_wr = 0; -+ flags = au_file_roflags(flags); -+ if (force_wr) { -+ h_file = ERR_PTR(-EROFS); -+ flags = au_file_roflags(flags); -+ if (unlikely(vfsub_native_ro(h_inode) -+ || IS_APPEND(h_inode))) -+ goto out; -+ flags &= ~O_ACCMODE; -+ flags |= O_WRONLY; -+ } -+ } -+ flags &= ~O_CREAT; -+ atomic_inc(&br->br_count); -+ h_path.dentry = h_dentry; -+ h_path.mnt = au_br_mnt(br); -+ h_file = vfsub_dentry_open(&h_path, flags); -+ if (IS_ERR(h_file)) -+ goto out_br; -+ -+ if (exec_flag) { -+ err = deny_write_access(h_file); -+ if (unlikely(err)) { -+ fput(h_file); -+ h_file = ERR_PTR(err); -+ goto out_br; -+ } -+ } -+ fsnotify_open(h_file); -+ goto out; /* success */ -+ -+out_br: -+ atomic_dec(&br->br_count); -+out: -+ return h_file; -+} -+ -+static int au_cmoo(struct dentry *dentry) -+{ -+ int err, cmoo; -+ struct path h_path; -+ struct au_pin pin; -+ struct au_cp_generic cpg = { -+ .dentry = dentry, -+ .bdst = -1, -+ .bsrc = -1, -+ .len = -1, -+ .pin = &pin, -+ .flags = AuCpup_DTIME | AuCpup_HOPEN -+ }; -+ struct inode *inode, *delegated; -+ struct super_block *sb; -+ struct au_branch *br; -+ struct dentry *parent; -+ struct au_hinode *hdir; -+ -+ DiMustWriteLock(dentry); -+ inode = dentry->d_inode; -+ IiMustWriteLock(inode); -+ -+ err = 0; -+ if (IS_ROOT(dentry)) -+ goto out; -+ cpg.bsrc = au_dbstart(dentry); -+ if (!cpg.bsrc) -+ goto out; -+ -+ sb = dentry->d_sb; -+ br = au_sbr(sb, cpg.bsrc); -+ cmoo = au_br_cmoo(br->br_perm); -+ if (!cmoo) -+ goto out; -+ if (!S_ISREG(inode->i_mode)) -+ cmoo &= AuBrAttr_COO_ALL; -+ if (!cmoo) -+ goto out; -+ -+ parent = dget_parent(dentry); -+ di_write_lock_parent(parent); -+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1); -+ cpg.bdst = err; -+ if (unlikely(err < 0)) { -+ err = 0; /* there is no upper writable branch */ -+ goto out_dgrade; -+ } -+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst); -+ -+ /* do not respect the coo attrib for the target branch */ -+ err = au_cpup_dirs(dentry, cpg.bdst); -+ if (unlikely(err)) -+ goto out_dgrade; -+ -+ di_downgrade_lock(parent, AuLock_IR); -+ err = au_pin(&pin, dentry, cpg.bdst, au_opt_udba(sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (!err) { -+ err = au_sio_cpup_simple(&cpg); -+ au_unpin(&pin); -+ } -+ if (!err && (cmoo & AuBrWAttr_MOO)) { -+ /* todo: keep h_dentry? */ -+ h_path.mnt = au_br_mnt(br); -+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc); -+ hdir = au_hi(parent->d_inode, cpg.bsrc); -+ delegated = NULL; -+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT2); -+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, -+ /*force*/1); -+ au_hn_imtx_unlock(hdir); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal unlink\n"); -+ iput(delegated); -+ } -+ if (unlikely(err)) { -+ pr_err("unlink %pd after coo failed (%d), ignored\n", -+ dentry, err); -+ err = 0; -+ } -+ } -+ goto out_parent; -+ -+out_dgrade: -+ di_downgrade_lock(parent, AuLock_IR); -+out_parent: -+ di_read_unlock(parent, AuLock_IR); -+ dput(parent); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int au_do_open(struct file *file, int (*open)(struct file *file, int flags), -+ struct au_fidir *fidir) -+{ -+ int err; -+ struct dentry *dentry; -+ -+ err = au_finfo_init(file, fidir); -+ if (unlikely(err)) -+ goto out; -+ -+ dentry = file->f_dentry; -+ di_write_lock_child(dentry); -+ err = au_cmoo(dentry); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (!err) -+ err = open(file, vfsub_file_flags(file)); -+ di_read_unlock(dentry, AuLock_IR); -+ -+ fi_write_unlock(file); -+ if (unlikely(err)) { -+ au_fi(file)->fi_hdir = NULL; -+ au_finfo_fin(file); -+ } -+ -+out: -+ return err; -+} -+ -+int au_reopen_nondir(struct file *file) -+{ -+ int err; -+ aufs_bindex_t bstart; -+ struct dentry *dentry; -+ struct file *h_file, *h_file_tmp; -+ -+ dentry = file->f_dentry; -+ bstart = au_dbstart(dentry); -+ h_file_tmp = NULL; -+ if (au_fbstart(file) == bstart) { -+ h_file = au_hf_top(file); -+ if (file->f_mode == h_file->f_mode) -+ return 0; /* success */ -+ h_file_tmp = h_file; -+ get_file(h_file_tmp); -+ au_set_h_fptr(file, bstart, NULL); -+ } -+ AuDebugOn(au_fi(file)->fi_hdir); -+ /* -+ * it can happen -+ * file exists on both of rw and ro -+ * open --> dbstart and fbstart are both 0 -+ * prepend a branch as rw, "rw" become ro -+ * remove rw/file -+ * delete the top branch, "rw" becomes rw again -+ * --> dbstart is 1, fbstart is still 0 -+ * write --> fbstart is 0 but dbstart is 1 -+ */ -+ /* AuDebugOn(au_fbstart(file) < bstart); */ -+ -+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC, -+ file, /*force_wr*/0); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) { -+ if (h_file_tmp) { -+ atomic_inc(&au_sbr(dentry->d_sb, bstart)->br_count); -+ au_set_h_fptr(file, bstart, h_file_tmp); -+ h_file_tmp = NULL; -+ } -+ goto out; /* todo: close all? */ -+ } -+ -+ err = 0; -+ au_set_fbstart(file, bstart); -+ au_set_h_fptr(file, bstart, h_file); -+ au_update_figen(file); -+ /* todo: necessary? */ -+ /* file->f_ra = h_file->f_ra; */ -+ -+out: -+ if (h_file_tmp) -+ fput(h_file_tmp); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt, -+ struct dentry *hi_wh) -+{ -+ int err; -+ aufs_bindex_t bstart; -+ struct au_dinfo *dinfo; -+ struct dentry *h_dentry; -+ struct au_hdentry *hdp; -+ -+ dinfo = au_di(file->f_dentry); -+ AuRwMustWriteLock(&dinfo->di_rwsem); -+ -+ bstart = dinfo->di_bstart; -+ dinfo->di_bstart = btgt; -+ hdp = dinfo->di_hdentry; -+ h_dentry = hdp[0 + btgt].hd_dentry; -+ hdp[0 + btgt].hd_dentry = hi_wh; -+ err = au_reopen_nondir(file); -+ hdp[0 + btgt].hd_dentry = h_dentry; -+ dinfo->di_bstart = bstart; -+ -+ return err; -+} -+ -+static int au_ready_to_write_wh(struct file *file, loff_t len, -+ aufs_bindex_t bcpup, struct au_pin *pin) -+{ -+ int err; -+ struct inode *inode, *h_inode; -+ struct dentry *h_dentry, *hi_wh; -+ struct au_cp_generic cpg = { -+ .dentry = file->f_dentry, -+ .bdst = bcpup, -+ .bsrc = -1, -+ .len = len, -+ .pin = pin -+ }; -+ -+ au_update_dbstart(cpg.dentry); -+ inode = cpg.dentry->d_inode; -+ h_inode = NULL; -+ if (au_dbstart(cpg.dentry) <= bcpup -+ && au_dbend(cpg.dentry) >= bcpup) { -+ h_dentry = au_h_dptr(cpg.dentry, bcpup); -+ if (h_dentry) -+ h_inode = h_dentry->d_inode; -+ } -+ hi_wh = au_hi_wh(inode, bcpup); -+ if (!hi_wh && !h_inode) -+ err = au_sio_cpup_wh(&cpg, file); -+ else -+ /* already copied-up after unlink */ -+ err = au_reopen_wh(file, bcpup, hi_wh); -+ -+ if (!err -+ && (inode->i_nlink > 1 -+ || (inode->i_state & I_LINKABLE)) -+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK)) -+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup)); -+ -+ return err; -+} -+ -+/* -+ * prepare the @file for writing. -+ */ -+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin) -+{ -+ int err; -+ aufs_bindex_t dbstart; -+ struct dentry *parent, *h_dentry; -+ struct inode *inode; -+ struct super_block *sb; -+ struct file *h_file; -+ struct au_cp_generic cpg = { -+ .dentry = file->f_dentry, -+ .bdst = -1, -+ .bsrc = -1, -+ .len = len, -+ .pin = pin, -+ .flags = AuCpup_DTIME -+ }; -+ -+ sb = cpg.dentry->d_sb; -+ inode = cpg.dentry->d_inode; -+ cpg.bsrc = au_fbstart(file); -+ err = au_test_ro(sb, cpg.bsrc, inode); -+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) { -+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE, -+ /*flags*/0); -+ goto out; -+ } -+ -+ /* need to cpup or reopen */ -+ parent = dget_parent(cpg.dentry); -+ di_write_lock_parent(parent); -+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry); -+ cpg.bdst = err; -+ if (unlikely(err < 0)) -+ goto out_dgrade; -+ err = 0; -+ -+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) { -+ err = au_cpup_dirs(cpg.dentry, cpg.bdst); -+ if (unlikely(err)) -+ goto out_dgrade; -+ } -+ -+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out_dgrade; -+ -+ h_dentry = au_hf_top(file)->f_dentry; -+ dbstart = au_dbstart(cpg.dentry); -+ if (dbstart <= cpg.bdst) { -+ h_dentry = au_h_dptr(cpg.dentry, cpg.bdst); -+ AuDebugOn(!h_dentry); -+ cpg.bsrc = cpg.bdst; -+ } -+ -+ if (dbstart <= cpg.bdst /* just reopen */ -+ || !d_unhashed(cpg.dentry) /* copyup and reopen */ -+ ) { -+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0); -+ if (IS_ERR(h_file)) -+ err = PTR_ERR(h_file); -+ else { -+ di_downgrade_lock(parent, AuLock_IR); -+ if (dbstart > cpg.bdst) -+ err = au_sio_cpup_simple(&cpg); -+ if (!err) -+ err = au_reopen_nondir(file); -+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file); -+ } -+ } else { /* copyup as wh and reopen */ -+ /* -+ * since writable hfsplus branch is not supported, -+ * h_open_pre/post() are unnecessary. -+ */ -+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin); -+ di_downgrade_lock(parent, AuLock_IR); -+ } -+ -+ if (!err) { -+ au_pin_set_parent_lflag(pin, /*lflag*/0); -+ goto out_dput; /* success */ -+ } -+ au_unpin(pin); -+ goto out_unlock; -+ -+out_dgrade: -+ di_downgrade_lock(parent, AuLock_IR); -+out_unlock: -+ di_read_unlock(parent, AuLock_IR); -+out_dput: -+ dput(parent); -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int au_do_flush(struct file *file, fl_owner_t id, -+ int (*flush)(struct file *file, fl_owner_t id)) -+{ -+ int err; -+ struct super_block *sb; -+ struct inode *inode; -+ -+ inode = file_inode(file); -+ sb = inode->i_sb; -+ si_noflush_read_lock(sb); -+ fi_read_lock(file); -+ ii_read_lock_child(inode); -+ -+ err = flush(file, id); -+ au_cpup_attr_timesizes(inode); -+ -+ ii_read_unlock(inode); -+ fi_read_unlock(file); -+ si_read_unlock(sb); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_file_refresh_by_inode(struct file *file, int *need_reopen) -+{ -+ int err; -+ struct au_pin pin; -+ struct au_finfo *finfo; -+ struct dentry *parent, *hi_wh; -+ struct inode *inode; -+ struct super_block *sb; -+ struct au_cp_generic cpg = { -+ .dentry = file->f_dentry, -+ .bdst = -1, -+ .bsrc = -1, -+ .len = -1, -+ .pin = &pin, -+ .flags = AuCpup_DTIME -+ }; -+ -+ FiMustWriteLock(file); -+ -+ err = 0; -+ finfo = au_fi(file); -+ sb = cpg.dentry->d_sb; -+ inode = cpg.dentry->d_inode; -+ cpg.bdst = au_ibstart(inode); -+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry)) -+ goto out; -+ -+ parent = dget_parent(cpg.dentry); -+ if (au_test_ro(sb, cpg.bdst, inode)) { -+ di_read_lock_parent(parent, !AuLock_IR); -+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry); -+ cpg.bdst = err; -+ di_read_unlock(parent, !AuLock_IR); -+ if (unlikely(err < 0)) -+ goto out_parent; -+ err = 0; -+ } -+ -+ di_read_lock_parent(parent, AuLock_IR); -+ hi_wh = au_hi_wh(inode, cpg.bdst); -+ if (!S_ISDIR(inode->i_mode) -+ && au_opt_test(au_mntflags(sb), PLINK) -+ && au_plink_test(inode) -+ && !d_unhashed(cpg.dentry) -+ && cpg.bdst < au_dbstart(cpg.dentry)) { -+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst); -+ if (unlikely(err)) -+ goto out_unlock; -+ -+ /* always superio. */ -+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (!err) { -+ err = au_sio_cpup_simple(&cpg); -+ au_unpin(&pin); -+ } -+ } else if (hi_wh) { -+ /* already copied-up after unlink */ -+ err = au_reopen_wh(file, cpg.bdst, hi_wh); -+ *need_reopen = 0; -+ } -+ -+out_unlock: -+ di_read_unlock(parent, AuLock_IR); -+out_parent: -+ dput(parent); -+out: -+ return err; -+} -+ -+static void au_do_refresh_dir(struct file *file) -+{ -+ aufs_bindex_t bindex, bend, new_bindex, brid; -+ struct au_hfile *p, tmp, *q; -+ struct au_finfo *finfo; -+ struct super_block *sb; -+ struct au_fidir *fidir; -+ -+ FiMustWriteLock(file); -+ -+ sb = file->f_dentry->d_sb; -+ finfo = au_fi(file); -+ fidir = finfo->fi_hdir; -+ AuDebugOn(!fidir); -+ p = fidir->fd_hfile + finfo->fi_btop; -+ brid = p->hf_br->br_id; -+ bend = fidir->fd_bbot; -+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) { -+ if (!p->hf_file) -+ continue; -+ -+ new_bindex = au_br_index(sb, p->hf_br->br_id); -+ if (new_bindex == bindex) -+ continue; -+ if (new_bindex < 0) { -+ au_set_h_fptr(file, bindex, NULL); -+ continue; -+ } -+ -+ /* swap two lower inode, and loop again */ -+ q = fidir->fd_hfile + new_bindex; -+ tmp = *q; -+ *q = *p; -+ *p = tmp; -+ if (tmp.hf_file) { -+ bindex--; -+ p--; -+ } -+ } -+ -+ p = fidir->fd_hfile; -+ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) { -+ bend = au_sbend(sb); -+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend; -+ finfo->fi_btop++, p++) -+ if (p->hf_file) { -+ if (file_inode(p->hf_file)) -+ break; -+ else -+ au_hfput(p, file); -+ } -+ } else { -+ bend = au_br_index(sb, brid); -+ for (finfo->fi_btop = 0; finfo->fi_btop < bend; -+ finfo->fi_btop++, p++) -+ if (p->hf_file) -+ au_hfput(p, file); -+ bend = au_sbend(sb); -+ } -+ -+ p = fidir->fd_hfile + bend; -+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop; -+ fidir->fd_bbot--, p--) -+ if (p->hf_file) { -+ if (file_inode(p->hf_file)) -+ break; -+ else -+ au_hfput(p, file); -+ } -+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop); -+} -+ -+/* -+ * after branch manipulating, refresh the file. -+ */ -+static int refresh_file(struct file *file, int (*reopen)(struct file *file)) -+{ -+ int err, need_reopen; -+ aufs_bindex_t bend, bindex; -+ struct dentry *dentry; -+ struct au_finfo *finfo; -+ struct au_hfile *hfile; -+ -+ dentry = file->f_dentry; -+ finfo = au_fi(file); -+ if (!finfo->fi_hdir) { -+ hfile = &finfo->fi_htop; -+ AuDebugOn(!hfile->hf_file); -+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id); -+ AuDebugOn(bindex < 0); -+ if (bindex != finfo->fi_btop) -+ au_set_fbstart(file, bindex); -+ } else { -+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1); -+ if (unlikely(err)) -+ goto out; -+ au_do_refresh_dir(file); -+ } -+ -+ err = 0; -+ need_reopen = 1; -+ if (!au_test_mmapped(file)) -+ err = au_file_refresh_by_inode(file, &need_reopen); -+ if (!err && need_reopen && !d_unlinked(dentry)) -+ err = reopen(file); -+ if (!err) { -+ au_update_figen(file); -+ goto out; /* success */ -+ } -+ -+ /* error, close all lower files */ -+ if (finfo->fi_hdir) { -+ bend = au_fbend_dir(file); -+ for (bindex = au_fbstart(file); bindex <= bend; bindex++) -+ au_set_h_fptr(file, bindex, NULL); -+ } -+ -+out: -+ return err; -+} -+ -+/* common function to regular file and dir */ -+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), -+ int wlock) -+{ -+ int err; -+ unsigned int sigen, figen; -+ aufs_bindex_t bstart; -+ unsigned char pseudo_link; -+ struct dentry *dentry; -+ struct inode *inode; -+ -+ err = 0; -+ dentry = file->f_dentry; -+ inode = dentry->d_inode; -+ sigen = au_sigen(dentry->d_sb); -+ fi_write_lock(file); -+ figen = au_figen(file); -+ di_write_lock_child(dentry); -+ bstart = au_dbstart(dentry); -+ pseudo_link = (bstart != au_ibstart(inode)); -+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) { -+ if (!wlock) { -+ di_downgrade_lock(dentry, AuLock_IR); -+ fi_downgrade_lock(file); -+ } -+ goto out; /* success */ -+ } -+ -+ AuDbg("sigen %d, figen %d\n", sigen, figen); -+ if (au_digen_test(dentry, sigen)) { -+ err = au_reval_dpath(dentry, sigen); -+ AuDebugOn(!err && au_digen_test(dentry, sigen)); -+ } -+ -+ if (!err) -+ err = refresh_file(file, reopen); -+ if (!err) { -+ if (!wlock) { -+ di_downgrade_lock(dentry, AuLock_IR); -+ fi_downgrade_lock(file); -+ } -+ } else { -+ di_write_unlock(dentry); -+ fi_write_unlock(file); -+ } -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* cf. aufs_nopage() */ -+/* for madvise(2) */ -+static int aufs_readpage(struct file *file __maybe_unused, struct page *page) -+{ -+ unlock_page(page); -+ return 0; -+} -+ -+/* it will never be called, but necessary to support O_DIRECT */ -+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb, -+ const struct iovec *iov, loff_t offset, -+ unsigned long nr_segs) -+{ BUG(); return 0; } -+ -+/* -+ * it will never be called, but madvise and fadvise behaves differently -+ * when get_xip_mem is defined -+ */ -+static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, -+ int create, void **kmem, unsigned long *pfn) -+{ BUG(); return 0; } -+ -+/* they will never be called. */ -+#ifdef CONFIG_AUFS_DEBUG -+static int aufs_write_begin(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, unsigned flags, -+ struct page **pagep, void **fsdata) -+{ AuUnsupport(); return 0; } -+static int aufs_write_end(struct file *file, struct address_space *mapping, -+ loff_t pos, unsigned len, unsigned copied, -+ struct page *page, void *fsdata) -+{ AuUnsupport(); return 0; } -+static int aufs_writepage(struct page *page, struct writeback_control *wbc) -+{ AuUnsupport(); return 0; } -+ -+static int aufs_set_page_dirty(struct page *page) -+{ AuUnsupport(); return 0; } -+static void aufs_invalidatepage(struct page *page, unsigned int offset, -+ unsigned int length) -+{ AuUnsupport(); } -+static int aufs_releasepage(struct page *page, gfp_t gfp) -+{ AuUnsupport(); return 0; } -+static int aufs_migratepage(struct address_space *mapping, struct page *newpage, -+ struct page *page, enum migrate_mode mode) -+{ AuUnsupport(); return 0; } -+static int aufs_launder_page(struct page *page) -+{ AuUnsupport(); return 0; } -+static int aufs_is_partially_uptodate(struct page *page, -+ unsigned long from, -+ unsigned long count) -+{ AuUnsupport(); return 0; } -+static void aufs_is_dirty_writeback(struct page *page, bool *dirty, -+ bool *writeback) -+{ AuUnsupport(); } -+static int aufs_error_remove_page(struct address_space *mapping, -+ struct page *page) -+{ AuUnsupport(); return 0; } -+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file, -+ sector_t *span) -+{ AuUnsupport(); return 0; } -+static void aufs_swap_deactivate(struct file *file) -+{ AuUnsupport(); } -+#endif /* CONFIG_AUFS_DEBUG */ -+ -+const struct address_space_operations aufs_aop = { -+ .readpage = aufs_readpage, -+ .direct_IO = aufs_direct_IO, -+ .get_xip_mem = aufs_get_xip_mem, -+#ifdef CONFIG_AUFS_DEBUG -+ .writepage = aufs_writepage, -+ /* no writepages, because of writepage */ -+ .set_page_dirty = aufs_set_page_dirty, -+ /* no readpages, because of readpage */ -+ .write_begin = aufs_write_begin, -+ .write_end = aufs_write_end, -+ /* no bmap, no block device */ -+ .invalidatepage = aufs_invalidatepage, -+ .releasepage = aufs_releasepage, -+ .migratepage = aufs_migratepage, -+ .launder_page = aufs_launder_page, -+ .is_partially_uptodate = aufs_is_partially_uptodate, -+ .is_dirty_writeback = aufs_is_dirty_writeback, -+ .error_remove_page = aufs_error_remove_page, -+ .swap_activate = aufs_swap_activate, -+ .swap_deactivate = aufs_swap_deactivate -+#endif /* CONFIG_AUFS_DEBUG */ -+}; -diff --git fs/aufs/file.h fs/aufs/file.h -new file mode 100644 -index 0000000..2c1fa87 ---- /dev/null -+++ fs/aufs/file.h -@@ -0,0 +1,276 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * file operations -+ */ -+ -+#ifndef __AUFS_FILE_H__ -+#define __AUFS_FILE_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include -+#include -+#include "rwsem.h" -+ -+struct au_branch; -+struct au_hfile { -+ struct file *hf_file; -+ struct au_branch *hf_br; -+}; -+ -+struct au_vdir; -+struct au_fidir { -+ aufs_bindex_t fd_bbot; -+ aufs_bindex_t fd_nent; -+ struct au_vdir *fd_vdir_cache; -+ struct au_hfile fd_hfile[]; -+}; -+ -+static inline int au_fidir_sz(int nent) -+{ -+ AuDebugOn(nent < 0); -+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent; -+} -+ -+struct au_finfo { -+ atomic_t fi_generation; -+ -+ struct au_rwsem fi_rwsem; -+ aufs_bindex_t fi_btop; -+ -+ /* do not union them */ -+ struct { /* for non-dir */ -+ struct au_hfile fi_htop; -+ atomic_t fi_mmapped; -+ }; -+ struct au_fidir *fi_hdir; /* for dir only */ -+ -+ struct hlist_node fi_hlist; -+ struct file *fi_file; /* very ugly */ -+} ____cacheline_aligned_in_smp; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* file.c */ -+extern const struct address_space_operations aufs_aop; -+unsigned int au_file_roflags(unsigned int flags); -+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags, -+ struct file *file, int force_wr); -+int au_do_open(struct file *file, int (*open)(struct file *file, int flags), -+ struct au_fidir *fidir); -+int au_reopen_nondir(struct file *file); -+struct au_pin; -+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin); -+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file), -+ int wlock); -+int au_do_flush(struct file *file, fl_owner_t id, -+ int (*flush)(struct file *file, fl_owner_t id)); -+ -+/* poll.c */ -+#ifdef CONFIG_AUFS_POLL -+unsigned int aufs_poll(struct file *file, poll_table *wait); -+#endif -+ -+#ifdef CONFIG_AUFS_BR_HFSPLUS -+/* hfsplus.c */ -+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex, -+ int force_wr); -+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex, -+ struct file *h_file); -+#else -+static inline -+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex, -+ int force_wr) -+{ -+ return NULL; -+} -+ -+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex, -+ struct file *h_file); -+#endif -+ -+/* f_op.c */ -+extern const struct file_operations aufs_file_fop; -+int au_do_open_nondir(struct file *file, int flags); -+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file); -+ -+/* finfo.c */ -+void au_hfput(struct au_hfile *hf, struct file *file); -+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, -+ struct file *h_file); -+ -+void au_update_figen(struct file *file); -+struct au_fidir *au_fidir_alloc(struct super_block *sb); -+int au_fidir_realloc(struct au_finfo *finfo, int nbr); -+ -+void au_fi_init_once(void *_fi); -+void au_finfo_fin(struct file *file); -+int au_finfo_init(struct file *file, struct au_fidir *fidir); -+ -+/* ioctl.c */ -+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg); -+#ifdef CONFIG_COMPAT -+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd, -+ unsigned long arg); -+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd, -+ unsigned long arg); -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct au_finfo *au_fi(struct file *file) -+{ -+ return file->private_data; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * fi_read_lock, fi_write_lock, -+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock -+ */ -+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem); -+ -+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem) -+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem) -+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem) -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* todo: hard/soft set? */ -+static inline aufs_bindex_t au_fbstart(struct file *file) -+{ -+ FiMustAnyLock(file); -+ return au_fi(file)->fi_btop; -+} -+ -+static inline aufs_bindex_t au_fbend_dir(struct file *file) -+{ -+ FiMustAnyLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ return au_fi(file)->fi_hdir->fd_bbot; -+} -+ -+static inline struct au_vdir *au_fvdir_cache(struct file *file) -+{ -+ FiMustAnyLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ return au_fi(file)->fi_hdir->fd_vdir_cache; -+} -+ -+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex) -+{ -+ FiMustWriteLock(file); -+ au_fi(file)->fi_btop = bindex; -+} -+ -+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex) -+{ -+ FiMustWriteLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ au_fi(file)->fi_hdir->fd_bbot = bindex; -+} -+ -+static inline void au_set_fvdir_cache(struct file *file, -+ struct au_vdir *vdir_cache) -+{ -+ FiMustWriteLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache; -+} -+ -+static inline struct file *au_hf_top(struct file *file) -+{ -+ FiMustAnyLock(file); -+ AuDebugOn(au_fi(file)->fi_hdir); -+ return au_fi(file)->fi_htop.hf_file; -+} -+ -+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex) -+{ -+ FiMustAnyLock(file); -+ AuDebugOn(!au_fi(file)->fi_hdir); -+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file; -+} -+ -+/* todo: memory barrier? */ -+static inline unsigned int au_figen(struct file *f) -+{ -+ return atomic_read(&au_fi(f)->fi_generation); -+} -+ -+static inline void au_set_mmapped(struct file *f) -+{ -+ if (atomic_inc_return(&au_fi(f)->fi_mmapped)) -+ return; -+ pr_warn("fi_mmapped wrapped around\n"); -+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped)) -+ ; -+} -+ -+static inline void au_unset_mmapped(struct file *f) -+{ -+ atomic_dec(&au_fi(f)->fi_mmapped); -+} -+ -+static inline int au_test_mmapped(struct file *f) -+{ -+ return atomic_read(&au_fi(f)->fi_mmapped); -+} -+ -+/* customize vma->vm_file */ -+ -+static inline void au_do_vm_file_reset(struct vm_area_struct *vma, -+ struct file *file) -+{ -+ struct file *f; -+ -+ f = vma->vm_file; -+ get_file(file); -+ vma->vm_file = file; -+ fput(f); -+} -+ -+#ifdef CONFIG_MMU -+#define AuDbgVmRegion(file, vma) do {} while (0) -+ -+static inline void au_vm_file_reset(struct vm_area_struct *vma, -+ struct file *file) -+{ -+ au_do_vm_file_reset(vma, file); -+} -+#else -+#define AuDbgVmRegion(file, vma) \ -+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file)) -+ -+static inline void au_vm_file_reset(struct vm_area_struct *vma, -+ struct file *file) -+{ -+ struct file *f; -+ -+ au_do_vm_file_reset(vma, file); -+ f = vma->vm_region->vm_file; -+ get_file(file); -+ vma->vm_region->vm_file = file; -+ fput(f); -+} -+#endif /* CONFIG_MMU */ -+ -+/* handle vma->vm_prfile */ -+static inline void au_vm_prfile_set(struct vm_area_struct *vma, -+ struct file *file) -+{ -+ get_file(file); -+ vma->vm_prfile = file; -+#ifndef CONFIG_MMU -+ get_file(file); -+ vma->vm_region->vm_prfile = file; -+#endif -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_FILE_H__ */ -diff --git fs/aufs/finfo.c fs/aufs/finfo.c -new file mode 100644 -index 0000000..d4f5a2a ---- /dev/null -+++ fs/aufs/finfo.c -@@ -0,0 +1,143 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * file private data -+ */ -+ -+#include "aufs.h" -+ -+void au_hfput(struct au_hfile *hf, struct file *file) -+{ -+ /* todo: direct access f_flags */ -+ if (vfsub_file_flags(file) & __FMODE_EXEC) -+ allow_write_access(hf->hf_file); -+ fput(hf->hf_file); -+ hf->hf_file = NULL; -+ atomic_dec(&hf->hf_br->br_count); -+ hf->hf_br = NULL; -+} -+ -+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val) -+{ -+ struct au_finfo *finfo = au_fi(file); -+ struct au_hfile *hf; -+ struct au_fidir *fidir; -+ -+ fidir = finfo->fi_hdir; -+ if (!fidir) { -+ AuDebugOn(finfo->fi_btop != bindex); -+ hf = &finfo->fi_htop; -+ } else -+ hf = fidir->fd_hfile + bindex; -+ -+ if (hf && hf->hf_file) -+ au_hfput(hf, file); -+ if (val) { -+ FiMustWriteLock(file); -+ hf->hf_file = val; -+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex); -+ } -+} -+ -+void au_update_figen(struct file *file) -+{ -+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry)); -+ /* smp_mb(); */ /* atomic_set */ -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_fidir *au_fidir_alloc(struct super_block *sb) -+{ -+ struct au_fidir *fidir; -+ int nbr; -+ -+ nbr = au_sbend(sb) + 1; -+ if (nbr < 2) -+ nbr = 2; /* initial allocate for 2 branches */ -+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS); -+ if (fidir) { -+ fidir->fd_bbot = -1; -+ fidir->fd_nent = nbr; -+ fidir->fd_vdir_cache = NULL; -+ } -+ -+ return fidir; -+} -+ -+int au_fidir_realloc(struct au_finfo *finfo, int nbr) -+{ -+ int err; -+ struct au_fidir *fidir, *p; -+ -+ AuRwMustWriteLock(&finfo->fi_rwsem); -+ fidir = finfo->fi_hdir; -+ AuDebugOn(!fidir); -+ -+ err = -ENOMEM; -+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr), -+ GFP_NOFS); -+ if (p) { -+ p->fd_nent = nbr; -+ finfo->fi_hdir = p; -+ err = 0; -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_finfo_fin(struct file *file) -+{ -+ struct au_finfo *finfo; -+ -+ au_nfiles_dec(file->f_dentry->d_sb); -+ -+ finfo = au_fi(file); -+ AuDebugOn(finfo->fi_hdir); -+ AuRwDestroy(&finfo->fi_rwsem); -+ au_cache_free_finfo(finfo); -+} -+ -+void au_fi_init_once(void *_finfo) -+{ -+ struct au_finfo *finfo = _finfo; -+ static struct lock_class_key aufs_fi; -+ -+ au_rw_init(&finfo->fi_rwsem); -+ au_rw_class(&finfo->fi_rwsem, &aufs_fi); -+} -+ -+int au_finfo_init(struct file *file, struct au_fidir *fidir) -+{ -+ int err; -+ struct au_finfo *finfo; -+ struct dentry *dentry; -+ -+ err = -ENOMEM; -+ dentry = file->f_dentry; -+ finfo = au_cache_alloc_finfo(); -+ if (unlikely(!finfo)) -+ goto out; -+ -+ err = 0; -+ au_nfiles_inc(dentry->d_sb); -+ /* verbose coding for lock class name */ -+ if (!fidir) -+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcNonDir_FIINFO); -+ else -+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcDir_FIINFO); -+ au_rw_write_lock(&finfo->fi_rwsem); -+ finfo->fi_btop = -1; -+ finfo->fi_hdir = fidir; -+ atomic_set(&finfo->fi_generation, au_digen(dentry)); -+ /* smp_mb(); */ /* atomic_set */ -+ -+ file->private_data = finfo; -+ -+out: -+ return err; -+} -diff --git fs/aufs/fstype.h fs/aufs/fstype.h -new file mode 100644 -index 0000000..04afae7 ---- /dev/null -+++ fs/aufs/fstype.h -@@ -0,0 +1,456 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * judging filesystem type -+ */ -+ -+#ifndef __AUFS_FSTYPE_H__ -+#define __AUFS_FSTYPE_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include -+#include -+ -+static inline int au_test_aufs(struct super_block *sb) -+{ -+ return sb->s_magic == AUFS_SUPER_MAGIC; -+} -+ -+static inline const char *au_sbtype(struct super_block *sb) -+{ -+ return sb->s_type->name; -+} -+ -+static inline int au_test_iso9660(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE) -+ return sb->s_magic == ROMFS_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_romfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE) -+ return sb->s_magic == ISOFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_cramfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE) -+ return sb->s_magic == CRAMFS_MAGIC; -+#endif -+ return 0; -+} -+ -+static inline int au_test_nfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE) -+ return sb->s_magic == NFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_fuse(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE) -+ return sb->s_magic == FUSE_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_xfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE) -+ return sb->s_magic == XFS_SB_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_TMPFS -+ return sb->s_magic == TMPFS_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE) -+ return !strcmp(au_sbtype(sb), "ecryptfs"); -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_ocfs2(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_OCFS2_FS) || defined(CONFIG_OCFS2_FS_MODULE) -+ return sb->s_magic == OCFS2_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_ocfs2_dlmfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_OCFS2_FS_O2CB) || defined(CONFIG_OCFS2_FS_O2CB_MODULE) -+ return sb->s_magic == DLMFS_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_coda(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CODA_FS) || defined(CONFIG_CODA_FS_MODULE) -+ return sb->s_magic == CODA_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_v9fs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_9P_FS) || defined(CONFIG_9P_FS_MODULE) -+ return sb->s_magic == V9FS_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_ext4(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_EXT4_FS) || defined(CONFIG_EXT4_FS_MODULE) -+ return sb->s_magic == EXT4_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_sysv(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_SYSV_FS) || defined(CONFIG_SYSV_FS_MODULE) -+ return !strcmp(au_sbtype(sb), "sysv"); -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_ramfs(struct super_block *sb) -+{ -+ return sb->s_magic == RAMFS_MAGIC; -+} -+ -+static inline int au_test_ubifs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE) -+ return sb->s_magic == UBIFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_procfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_PROC_FS -+ return sb->s_magic == PROC_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_sysfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_SYSFS -+ return sb->s_magic == SYSFS_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_configfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE) -+ return sb->s_magic == CONFIGFS_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_minix(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE) -+ return sb->s_magic == MINIX3_SUPER_MAGIC -+ || sb->s_magic == MINIX2_SUPER_MAGIC -+ || sb->s_magic == MINIX2_SUPER_MAGIC2 -+ || sb->s_magic == MINIX_SUPER_MAGIC -+ || sb->s_magic == MINIX_SUPER_MAGIC2; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_cifs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_CIFS_FS) || defined(CONFIGCIFS_FS_MODULE) -+ return sb->s_magic == CIFS_MAGIC_NUMBER; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_fat(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE) -+ return sb->s_magic == MSDOS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_msdos(struct super_block *sb) -+{ -+ return au_test_fat(sb); -+} -+ -+static inline int au_test_vfat(struct super_block *sb) -+{ -+ return au_test_fat(sb); -+} -+ -+static inline int au_test_securityfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_SECURITYFS -+ return sb->s_magic == SECURITYFS_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_squashfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE) -+ return sb->s_magic == SQUASHFS_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_btrfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE) -+ return sb->s_magic == BTRFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_xenfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE) -+ return sb->s_magic == XENFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_debugfs(struct super_block *sb __maybe_unused) -+{ -+#ifdef CONFIG_DEBUG_FS -+ return sb->s_magic == DEBUGFS_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_nilfs(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE) -+ return sb->s_magic == NILFS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused) -+{ -+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE) -+ return sb->s_magic == HFSPLUS_SUPER_MAGIC; -+#else -+ return 0; -+#endif -+} -+ -+/* ---------------------------------------------------------------------- */ -+/* -+ * they can't be an aufs branch. -+ */ -+static inline int au_test_fs_unsuppoted(struct super_block *sb) -+{ -+ return -+#ifndef CONFIG_AUFS_BR_RAMFS -+ au_test_ramfs(sb) || -+#endif -+ au_test_procfs(sb) -+ || au_test_sysfs(sb) -+ || au_test_configfs(sb) -+ || au_test_debugfs(sb) -+ || au_test_securityfs(sb) -+ || au_test_xenfs(sb) -+ || au_test_ecryptfs(sb) -+ /* || !strcmp(au_sbtype(sb), "unionfs") */ -+ || au_test_aufs(sb); /* will be supported in next version */ -+} -+ -+static inline int au_test_fs_remote(struct super_block *sb) -+{ -+ return !au_test_tmpfs(sb) -+#ifdef CONFIG_AUFS_BR_RAMFS -+ && !au_test_ramfs(sb) -+#endif -+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * Note: these functions (below) are created after reading ->getattr() in all -+ * filesystems under linux/fs. it means we have to do so in every update... -+ */ -+ -+/* -+ * some filesystems require getattr to refresh the inode attributes before -+ * referencing. -+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs) -+ * and leave the work for d_revalidate() -+ */ -+static inline int au_test_fs_refresh_iattr(struct super_block *sb) -+{ -+ return au_test_nfs(sb) -+ || au_test_fuse(sb) -+ /* || au_test_ocfs2(sb) */ /* untested */ -+ /* || au_test_btrfs(sb) */ /* untested */ -+ /* || au_test_coda(sb) */ /* untested */ -+ /* || au_test_v9fs(sb) */ /* untested */ -+ ; -+} -+ -+/* -+ * filesystems which don't maintain i_size or i_blocks. -+ */ -+static inline int au_test_fs_bad_iattr_size(struct super_block *sb) -+{ -+ return au_test_xfs(sb) -+ || au_test_btrfs(sb) -+ || au_test_ubifs(sb) -+ || au_test_hfsplus(sb) /* maintained, but incorrect */ -+ /* || au_test_ext4(sb) */ /* untested */ -+ /* || au_test_ocfs2(sb) */ /* untested */ -+ /* || au_test_ocfs2_dlmfs(sb) */ /* untested */ -+ /* || au_test_sysv(sb) */ /* untested */ -+ /* || au_test_minix(sb) */ /* untested */ -+ ; -+} -+ -+/* -+ * filesystems which don't store the correct value in some of their inode -+ * attributes. -+ */ -+static inline int au_test_fs_bad_iattr(struct super_block *sb) -+{ -+ return au_test_fs_bad_iattr_size(sb) -+ /* || au_test_cifs(sb) */ /* untested */ -+ || au_test_fat(sb) -+ || au_test_msdos(sb) -+ || au_test_vfat(sb); -+} -+ -+/* they don't check i_nlink in link(2) */ -+static inline int au_test_fs_no_limit_nlink(struct super_block *sb) -+{ -+ return au_test_tmpfs(sb) -+#ifdef CONFIG_AUFS_BR_RAMFS -+ || au_test_ramfs(sb) -+#endif -+ || au_test_ubifs(sb) -+ || au_test_hfsplus(sb); -+} -+ -+/* -+ * filesystems which sets S_NOATIME and S_NOCMTIME. -+ */ -+static inline int au_test_fs_notime(struct super_block *sb) -+{ -+ return au_test_nfs(sb) -+ || au_test_fuse(sb) -+ || au_test_ubifs(sb) -+ /* || au_test_cifs(sb) */ /* untested */ -+ ; -+} -+ -+/* -+ * filesystems which requires replacing i_mapping. -+ */ -+static inline int au_test_fs_bad_mapping(struct super_block *sb) -+{ -+ return au_test_fuse(sb) -+ || au_test_ubifs(sb); -+} -+ -+/* temporary support for i#1 in cramfs */ -+static inline int au_test_fs_unique_ino(struct inode *inode) -+{ -+ if (au_test_cramfs(inode->i_sb)) -+ return inode->i_ino != 1; -+ return 1; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * the filesystem where the xino files placed must support i/o after unlink and -+ * maintain i_size and i_blocks. -+ */ -+static inline int au_test_fs_bad_xino(struct super_block *sb) -+{ -+ return au_test_fs_remote(sb) -+ || au_test_fs_bad_iattr_size(sb) -+ /* don't want unnecessary work for xino */ -+ || au_test_aufs(sb) -+ || au_test_ecryptfs(sb) -+ || au_test_nilfs(sb); -+} -+ -+static inline int au_test_fs_trunc_xino(struct super_block *sb) -+{ -+ return au_test_tmpfs(sb) -+ || au_test_ramfs(sb); -+} -+ -+/* -+ * test if the @sb is real-readonly. -+ */ -+static inline int au_test_fs_rr(struct super_block *sb) -+{ -+ return au_test_squashfs(sb) -+ || au_test_iso9660(sb) -+ || au_test_cramfs(sb) -+ || au_test_romfs(sb); -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_FSTYPE_H__ */ -diff --git fs/aufs/hfsnotify.c fs/aufs/hfsnotify.c -new file mode 100644 -index 0000000..1f7dc91 ---- /dev/null -+++ fs/aufs/hfsnotify.c -@@ -0,0 +1,268 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * fsnotify for the lower directories -+ */ -+ -+#include "aufs.h" -+ -+/* FS_IN_IGNORED is unnecessary */ -+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE -+ | FS_CREATE | FS_EVENT_ON_CHILD); -+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq); -+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0); -+ -+static void au_hfsn_free_mark(struct fsnotify_mark *mark) -+{ -+ struct au_hnotify *hn = container_of(mark, struct au_hnotify, -+ hn_mark); -+ AuDbg("here\n"); -+ au_cache_free_hnotify(hn); -+ smp_mb__before_atomic_dec(); -+ if (atomic64_dec_and_test(&au_hfsn_ifree)) -+ wake_up(&au_hfsn_wq); -+} -+ -+static int au_hfsn_alloc(struct au_hinode *hinode) -+{ -+ int err; -+ struct au_hnotify *hn; -+ struct super_block *sb; -+ struct au_branch *br; -+ struct fsnotify_mark *mark; -+ aufs_bindex_t bindex; -+ -+ hn = hinode->hi_notify; -+ sb = hn->hn_aufs_inode->i_sb; -+ bindex = au_br_index(sb, hinode->hi_id); -+ br = au_sbr(sb, bindex); -+ AuDebugOn(!br->br_hfsn); -+ -+ mark = &hn->hn_mark; -+ fsnotify_init_mark(mark, au_hfsn_free_mark); -+ mark->mask = AuHfsnMask; -+ /* -+ * by udba rename or rmdir, aufs assign a new inode to the known -+ * h_inode, so specify 1 to allow dups. -+ */ -+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode, -+ /*mnt*/NULL, /*allow_dups*/1); -+ /* even if err */ -+ fsnotify_put_mark(mark); -+ -+ return err; -+} -+ -+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn) -+{ -+ struct fsnotify_mark *mark; -+ unsigned long long ull; -+ struct fsnotify_group *group; -+ -+ ull = atomic64_inc_return(&au_hfsn_ifree); -+ BUG_ON(!ull); -+ -+ mark = &hn->hn_mark; -+ spin_lock(&mark->lock); -+ group = mark->group; -+ fsnotify_get_group(group); -+ spin_unlock(&mark->lock); -+ fsnotify_destroy_mark(mark, group); -+ fsnotify_put_group(group); -+ -+ /* free hn by myself */ -+ return 0; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set) -+{ -+ struct fsnotify_mark *mark; -+ -+ mark = &hinode->hi_notify->hn_mark; -+ spin_lock(&mark->lock); -+ if (do_set) { -+ AuDebugOn(mark->mask & AuHfsnMask); -+ mark->mask |= AuHfsnMask; -+ } else { -+ AuDebugOn(!(mark->mask & AuHfsnMask)); -+ mark->mask &= ~AuHfsnMask; -+ } -+ spin_unlock(&mark->lock); -+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */ -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* #define AuDbgHnotify */ -+#ifdef AuDbgHnotify -+static char *au_hfsn_name(u32 mask) -+{ -+#ifdef CONFIG_AUFS_DEBUG -+#define test_ret(flag) \ -+ do { \ -+ if (mask & flag) \ -+ return #flag; \ -+ } while (0) -+ test_ret(FS_ACCESS); -+ test_ret(FS_MODIFY); -+ test_ret(FS_ATTRIB); -+ test_ret(FS_CLOSE_WRITE); -+ test_ret(FS_CLOSE_NOWRITE); -+ test_ret(FS_OPEN); -+ test_ret(FS_MOVED_FROM); -+ test_ret(FS_MOVED_TO); -+ test_ret(FS_CREATE); -+ test_ret(FS_DELETE); -+ test_ret(FS_DELETE_SELF); -+ test_ret(FS_MOVE_SELF); -+ test_ret(FS_UNMOUNT); -+ test_ret(FS_Q_OVERFLOW); -+ test_ret(FS_IN_IGNORED); -+ test_ret(FS_IN_ISDIR); -+ test_ret(FS_IN_ONESHOT); -+ test_ret(FS_EVENT_ON_CHILD); -+ return ""; -+#undef test_ret -+#else -+ return "??"; -+#endif -+} -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void au_hfsn_free_group(struct fsnotify_group *group) -+{ -+ struct au_br_hfsnotify *hfsn = group->private; -+ -+ AuDbg("here\n"); -+ kfree(hfsn); -+} -+ -+static int au_hfsn_handle_event(struct fsnotify_group *group, -+ struct inode *inode, -+ struct fsnotify_mark *inode_mark, -+ struct fsnotify_mark *vfsmount_mark, -+ u32 mask, void *data, int data_type, -+ const unsigned char *file_name, u32 cookie) -+{ -+ int err; -+ struct au_hnotify *hnotify; -+ struct inode *h_dir, *h_inode; -+ struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name)); -+ -+ AuDebugOn(data_type != FSNOTIFY_EVENT_INODE); -+ -+ err = 0; -+ /* if FS_UNMOUNT happens, there must be another bug */ -+ AuDebugOn(mask & FS_UNMOUNT); -+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT)) -+ goto out; -+ -+ h_dir = inode; -+ h_inode = NULL; -+#ifdef AuDbgHnotify -+ au_debug_on(); -+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1 -+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) { -+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n", -+ h_dir->i_ino, mask, au_hfsn_name(mask), -+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0); -+ /* WARN_ON(1); */ -+ } -+ au_debug_off(); -+#endif -+ -+ AuDebugOn(!inode_mark); -+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark); -+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode); -+ -+out: -+ return err; -+} -+ -+static struct fsnotify_ops au_hfsn_ops = { -+ .handle_event = au_hfsn_handle_event, -+ .free_group_priv = au_hfsn_free_group -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void au_hfsn_fin_br(struct au_branch *br) -+{ -+ struct au_br_hfsnotify *hfsn; -+ -+ hfsn = br->br_hfsn; -+ if (hfsn) -+ fsnotify_put_group(hfsn->hfsn_group); -+} -+ -+static int au_hfsn_init_br(struct au_branch *br, int perm) -+{ -+ int err; -+ struct fsnotify_group *group; -+ struct au_br_hfsnotify *hfsn; -+ -+ err = 0; -+ br->br_hfsn = NULL; -+ if (!au_br_hnotifyable(perm)) -+ goto out; -+ -+ err = -ENOMEM; -+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS); -+ if (unlikely(!hfsn)) -+ goto out; -+ -+ err = 0; -+ group = fsnotify_alloc_group(&au_hfsn_ops); -+ if (IS_ERR(group)) { -+ err = PTR_ERR(group); -+ pr_err("fsnotify_alloc_group() failed, %d\n", err); -+ goto out_hfsn; -+ } -+ -+ group->private = hfsn; -+ hfsn->hfsn_group = group; -+ br->br_hfsn = hfsn; -+ goto out; /* success */ -+ -+out_hfsn: -+ kfree(hfsn); -+out: -+ return err; -+} -+ -+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm) -+{ -+ int err; -+ -+ err = 0; -+ if (!br->br_hfsn) -+ err = au_hfsn_init_br(br, perm); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void au_hfsn_fin(void) -+{ -+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree)); -+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree)); -+} -+ -+const struct au_hnotify_op au_hnotify_op = { -+ .ctl = au_hfsn_ctl, -+ .alloc = au_hfsn_alloc, -+ .free = au_hfsn_free, -+ -+ .fin = au_hfsn_fin, -+ -+ .reset_br = au_hfsn_reset_br, -+ .fin_br = au_hfsn_fin_br, -+ .init_br = au_hfsn_init_br -+}; -diff --git fs/aufs/hfsplus.c fs/aufs/hfsplus.c -new file mode 100644 -index 0000000..98f9ca2 ---- /dev/null -+++ fs/aufs/hfsplus.c -@@ -0,0 +1,43 @@ -+/* -+ * Copyright (C) 2010-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * special support for filesystems which aqucires an inode mutex -+ * at final closing a file, eg, hfsplus. -+ * -+ * This trick is very simple and stupid, just to open the file before really -+ * neceeary open to tell hfsplus that this is not the final closing. -+ * The caller should call au_h_open_pre() after acquiring the inode mutex, -+ * and au_h_open_post() after releasing it. -+ */ -+ -+#include "aufs.h" -+ -+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex, -+ int force_wr) -+{ -+ struct file *h_file; -+ struct dentry *h_dentry; -+ -+ h_dentry = au_h_dptr(dentry, bindex); -+ AuDebugOn(!h_dentry); -+ AuDebugOn(!h_dentry->d_inode); -+ -+ h_file = NULL; -+ if (au_test_hfsplus(h_dentry->d_sb) -+ && S_ISREG(h_dentry->d_inode->i_mode)) -+ h_file = au_h_open(dentry, bindex, -+ O_RDONLY | O_NOATIME | O_LARGEFILE, -+ /*file*/NULL, force_wr); -+ return h_file; -+} -+ -+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex, -+ struct file *h_file) -+{ -+ if (h_file) { -+ fput(h_file); -+ au_sbr_put(dentry->d_sb, bindex); -+ } -+} -diff --git fs/aufs/hnotify.c fs/aufs/hnotify.c -new file mode 100644 -index 0000000..6935a727 ---- /dev/null -+++ fs/aufs/hnotify.c -@@ -0,0 +1,701 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * abstraction to notify the direct changes on lower directories -+ */ -+ -+#include "aufs.h" -+ -+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode) -+{ -+ int err; -+ struct au_hnotify *hn; -+ -+ err = -ENOMEM; -+ hn = au_cache_alloc_hnotify(); -+ if (hn) { -+ hn->hn_aufs_inode = inode; -+ hinode->hi_notify = hn; -+ err = au_hnotify_op.alloc(hinode); -+ AuTraceErr(err); -+ if (unlikely(err)) { -+ hinode->hi_notify = NULL; -+ au_cache_free_hnotify(hn); -+ /* -+ * The upper dir was removed by udba, but the same named -+ * dir left. In this case, aufs assignes a new inode -+ * number and set the monitor again. -+ * For the lower dir, the old monitnor is still left. -+ */ -+ if (err == -EEXIST) -+ err = 0; -+ } -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+void au_hn_free(struct au_hinode *hinode) -+{ -+ struct au_hnotify *hn; -+ -+ hn = hinode->hi_notify; -+ if (hn) { -+ hinode->hi_notify = NULL; -+ if (au_hnotify_op.free(hinode, hn)) -+ au_cache_free_hnotify(hn); -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_hn_ctl(struct au_hinode *hinode, int do_set) -+{ -+ if (hinode->hi_notify) -+ au_hnotify_op.ctl(hinode, do_set); -+} -+ -+void au_hn_reset(struct inode *inode, unsigned int flags) -+{ -+ aufs_bindex_t bindex, bend; -+ struct inode *hi; -+ struct dentry *iwhdentry; -+ -+ bend = au_ibend(inode); -+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) { -+ hi = au_h_iptr(inode, bindex); -+ if (!hi) -+ continue; -+ -+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */ -+ iwhdentry = au_hi_wh(inode, bindex); -+ if (iwhdentry) -+ dget(iwhdentry); -+ au_igrab(hi); -+ au_set_h_iptr(inode, bindex, NULL, 0); -+ au_set_h_iptr(inode, bindex, au_igrab(hi), -+ flags & ~AuHi_XINO); -+ iput(hi); -+ dput(iwhdentry); -+ /* mutex_unlock(&hi->i_mutex); */ -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int hn_xino(struct inode *inode, struct inode *h_inode) -+{ -+ int err; -+ aufs_bindex_t bindex, bend, bfound, bstart; -+ struct inode *h_i; -+ -+ err = 0; -+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) { -+ pr_warn("branch root dir was changed\n"); -+ goto out; -+ } -+ -+ bfound = -1; -+ bend = au_ibend(inode); -+ bstart = au_ibstart(inode); -+#if 0 /* reserved for future use */ -+ if (bindex == bend) { -+ /* keep this ino in rename case */ -+ goto out; -+ } -+#endif -+ for (bindex = bstart; bindex <= bend; bindex++) -+ if (au_h_iptr(inode, bindex) == h_inode) { -+ bfound = bindex; -+ break; -+ } -+ if (bfound < 0) -+ goto out; -+ -+ for (bindex = bstart; bindex <= bend; bindex++) { -+ h_i = au_h_iptr(inode, bindex); -+ if (!h_i) -+ continue; -+ -+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0); -+ /* ignore this error */ -+ /* bad action? */ -+ } -+ -+ /* children inode number will be broken */ -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static int hn_gen_tree(struct dentry *dentry) -+{ -+ int err, i, j, ndentry; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry **dentries; -+ -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL); -+ if (unlikely(err)) -+ goto out_dpages; -+ -+ for (i = 0; i < dpages.ndpage; i++) { -+ dpage = dpages.dpages + i; -+ dentries = dpage->dentries; -+ ndentry = dpage->ndentry; -+ for (j = 0; j < ndentry; j++) { -+ struct dentry *d; -+ -+ d = dentries[j]; -+ if (IS_ROOT(d)) -+ continue; -+ -+ au_digen_dec(d); -+ if (d->d_inode) -+ /* todo: reset children xino? -+ cached children only? */ -+ au_iigen_dec(d->d_inode); -+ } -+ } -+ -+out_dpages: -+ au_dpages_free(&dpages); -+ -+#if 0 -+ /* discard children */ -+ dentry_unhash(dentry); -+ dput(dentry); -+#endif -+out: -+ return err; -+} -+ -+/* -+ * return 0 if processed. -+ */ -+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode, -+ const unsigned int isdir) -+{ -+ int err; -+ struct dentry *d; -+ struct qstr *dname; -+ -+ err = 1; -+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) { -+ pr_warn("branch root dir was changed\n"); -+ err = 0; -+ goto out; -+ } -+ -+ if (!isdir) { -+ AuDebugOn(!name); -+ au_iigen_dec(inode); -+ spin_lock(&inode->i_lock); -+ hlist_for_each_entry(d, &inode->i_dentry, d_alias) { -+ spin_lock(&d->d_lock); -+ dname = &d->d_name; -+ if (dname->len != nlen -+ && memcmp(dname->name, name, nlen)) { -+ spin_unlock(&d->d_lock); -+ continue; -+ } -+ err = 0; -+ au_digen_dec(d); -+ spin_unlock(&d->d_lock); -+ break; -+ } -+ spin_unlock(&inode->i_lock); -+ } else { -+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR); -+ d = d_find_alias(inode); -+ if (!d) { -+ au_iigen_dec(inode); -+ goto out; -+ } -+ -+ spin_lock(&d->d_lock); -+ dname = &d->d_name; -+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) { -+ spin_unlock(&d->d_lock); -+ err = hn_gen_tree(d); -+ spin_lock(&d->d_lock); -+ } -+ spin_unlock(&d->d_lock); -+ dput(d); -+ } -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir) -+{ -+ int err; -+ struct inode *inode; -+ -+ inode = dentry->d_inode; -+ if (IS_ROOT(dentry) -+ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */ -+ ) { -+ pr_warn("branch root dir was changed\n"); -+ return 0; -+ } -+ -+ err = 0; -+ if (!isdir) { -+ au_digen_dec(dentry); -+ if (inode) -+ au_iigen_dec(inode); -+ } else { -+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR); -+ if (inode) -+ err = hn_gen_tree(dentry); -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* hnotify job flags */ -+#define AuHnJob_XINO0 1 -+#define AuHnJob_GEN (1 << 1) -+#define AuHnJob_DIRENT (1 << 2) -+#define AuHnJob_ISDIR (1 << 3) -+#define AuHnJob_TRYXINO0 (1 << 4) -+#define AuHnJob_MNTPNT (1 << 5) -+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name) -+#define au_fset_hnjob(flags, name) \ -+ do { (flags) |= AuHnJob_##name; } while (0) -+#define au_fclr_hnjob(flags, name) \ -+ do { (flags) &= ~AuHnJob_##name; } while (0) -+ -+enum { -+ AuHn_CHILD, -+ AuHn_PARENT, -+ AuHnLast -+}; -+ -+struct au_hnotify_args { -+ struct inode *h_dir, *dir, *h_child_inode; -+ u32 mask; -+ unsigned int flags[AuHnLast]; -+ unsigned int h_child_nlen; -+ char h_child_name[]; -+}; -+ -+struct hn_job_args { -+ unsigned int flags; -+ struct inode *inode, *h_inode, *dir, *h_dir; -+ struct dentry *dentry; -+ char *h_name; -+ int h_nlen; -+}; -+ -+static int hn_job(struct hn_job_args *a) -+{ -+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR); -+ int e; -+ -+ /* reset xino */ -+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode) -+ hn_xino(a->inode, a->h_inode); /* ignore this error */ -+ -+ if (au_ftest_hnjob(a->flags, TRYXINO0) -+ && a->inode -+ && a->h_inode) { -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ if (!a->h_inode->i_nlink -+ && !(a->h_inode->i_state & I_LINKABLE)) -+ hn_xino(a->inode, a->h_inode); /* ignore this error */ -+ mutex_unlock(&a->h_inode->i_mutex); -+ } -+ -+ /* make the generation obsolete */ -+ if (au_ftest_hnjob(a->flags, GEN)) { -+ e = -1; -+ if (a->inode) -+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode, -+ isdir); -+ if (e && a->dentry) -+ hn_gen_by_name(a->dentry, isdir); -+ /* ignore this error */ -+ } -+ -+ /* make dir entries obsolete */ -+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) { -+ struct au_vdir *vdir; -+ -+ vdir = au_ivdir(a->inode); -+ if (vdir) -+ vdir->vd_jiffy = 0; -+ /* IMustLock(a->inode); */ -+ /* a->inode->i_version++; */ -+ } -+ -+ /* can do nothing but warn */ -+ if (au_ftest_hnjob(a->flags, MNTPNT) -+ && a->dentry -+ && d_mountpoint(a->dentry)) -+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry); -+ -+ return 0; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen, -+ struct inode *dir) -+{ -+ struct dentry *dentry, *d, *parent; -+ struct qstr *dname; -+ -+ parent = d_find_alias(dir); -+ if (!parent) -+ return NULL; -+ -+ dentry = NULL; -+ spin_lock(&parent->d_lock); -+ list_for_each_entry(d, &parent->d_subdirs, d_u.d_child) { -+ /* AuDbg("%pd\n", d); */ -+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); -+ dname = &d->d_name; -+ if (dname->len != nlen || memcmp(dname->name, name, nlen)) -+ goto cont_unlock; -+ if (au_di(d)) -+ au_digen_dec(d); -+ else -+ goto cont_unlock; -+ if (d_count(d)) { -+ dentry = dget_dlock(d); -+ spin_unlock(&d->d_lock); -+ break; -+ } -+ -+cont_unlock: -+ spin_unlock(&d->d_lock); -+ } -+ spin_unlock(&parent->d_lock); -+ dput(parent); -+ -+ if (dentry) -+ di_write_lock_child(dentry); -+ -+ return dentry; -+} -+ -+static struct inode *lookup_wlock_by_ino(struct super_block *sb, -+ aufs_bindex_t bindex, ino_t h_ino) -+{ -+ struct inode *inode; -+ ino_t ino; -+ int err; -+ -+ inode = NULL; -+ err = au_xino_read(sb, bindex, h_ino, &ino); -+ if (!err && ino) -+ inode = ilookup(sb, ino); -+ if (!inode) -+ goto out; -+ -+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) { -+ pr_warn("wrong root branch\n"); -+ iput(inode); -+ inode = NULL; -+ goto out; -+ } -+ -+ ii_write_lock_child(inode); -+ -+out: -+ return inode; -+} -+ -+static void au_hn_bh(void *_args) -+{ -+ struct au_hnotify_args *a = _args; -+ struct super_block *sb; -+ aufs_bindex_t bindex, bend, bfound; -+ unsigned char xino, try_iput; -+ int err; -+ struct inode *inode; -+ ino_t h_ino; -+ struct hn_job_args args; -+ struct dentry *dentry; -+ struct au_sbinfo *sbinfo; -+ -+ AuDebugOn(!_args); -+ AuDebugOn(!a->h_dir); -+ AuDebugOn(!a->dir); -+ AuDebugOn(!a->mask); -+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n", -+ a->mask, a->dir->i_ino, a->h_dir->i_ino, -+ a->h_child_inode ? a->h_child_inode->i_ino : 0); -+ -+ inode = NULL; -+ dentry = NULL; -+ /* -+ * do not lock a->dir->i_mutex here -+ * because of d_revalidate() may cause a deadlock. -+ */ -+ sb = a->dir->i_sb; -+ AuDebugOn(!sb); -+ sbinfo = au_sbi(sb); -+ AuDebugOn(!sbinfo); -+ si_write_lock(sb, AuLock_NOPLMW); -+ -+ ii_read_lock_parent(a->dir); -+ bfound = -1; -+ bend = au_ibend(a->dir); -+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++) -+ if (au_h_iptr(a->dir, bindex) == a->h_dir) { -+ bfound = bindex; -+ break; -+ } -+ ii_read_unlock(a->dir); -+ if (unlikely(bfound < 0)) -+ goto out; -+ -+ xino = !!au_opt_test(au_mntflags(sb), XINO); -+ h_ino = 0; -+ if (a->h_child_inode) -+ h_ino = a->h_child_inode->i_ino; -+ -+ if (a->h_child_nlen -+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN) -+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT))) -+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen, -+ a->dir); -+ try_iput = 0; -+ if (dentry) -+ inode = dentry->d_inode; -+ if (xino && !inode && h_ino -+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0) -+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0) -+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) { -+ inode = lookup_wlock_by_ino(sb, bfound, h_ino); -+ try_iput = 1; -+ } -+ -+ args.flags = a->flags[AuHn_CHILD]; -+ args.dentry = dentry; -+ args.inode = inode; -+ args.h_inode = a->h_child_inode; -+ args.dir = a->dir; -+ args.h_dir = a->h_dir; -+ args.h_name = a->h_child_name; -+ args.h_nlen = a->h_child_nlen; -+ err = hn_job(&args); -+ if (dentry) { -+ if (au_di(dentry)) -+ di_write_unlock(dentry); -+ dput(dentry); -+ } -+ if (inode && try_iput) { -+ ii_write_unlock(inode); -+ iput(inode); -+ } -+ -+ ii_write_lock_parent(a->dir); -+ args.flags = a->flags[AuHn_PARENT]; -+ args.dentry = NULL; -+ args.inode = a->dir; -+ args.h_inode = a->h_dir; -+ args.dir = NULL; -+ args.h_dir = NULL; -+ args.h_name = NULL; -+ args.h_nlen = 0; -+ err = hn_job(&args); -+ ii_write_unlock(a->dir); -+ -+out: -+ iput(a->h_child_inode); -+ iput(a->h_dir); -+ iput(a->dir); -+ si_write_unlock(sb); -+ au_nwt_done(&sbinfo->si_nowait); -+ kfree(a); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask, -+ struct qstr *h_child_qstr, struct inode *h_child_inode) -+{ -+ int err, len; -+ unsigned int flags[AuHnLast], f; -+ unsigned char isdir, isroot, wh; -+ struct inode *dir; -+ struct au_hnotify_args *args; -+ char *p, *h_child_name; -+ -+ err = 0; -+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode); -+ dir = igrab(hnotify->hn_aufs_inode); -+ if (!dir) -+ goto out; -+ -+ isroot = (dir->i_ino == AUFS_ROOT_INO); -+ wh = 0; -+ h_child_name = (void *)h_child_qstr->name; -+ len = h_child_qstr->len; -+ if (h_child_name) { -+ if (len > AUFS_WH_PFX_LEN -+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { -+ h_child_name += AUFS_WH_PFX_LEN; -+ len -= AUFS_WH_PFX_LEN; -+ wh = 1; -+ } -+ } -+ -+ isdir = 0; -+ if (h_child_inode) -+ isdir = !!S_ISDIR(h_child_inode->i_mode); -+ flags[AuHn_PARENT] = AuHnJob_ISDIR; -+ flags[AuHn_CHILD] = 0; -+ if (isdir) -+ flags[AuHn_CHILD] = AuHnJob_ISDIR; -+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT); -+ au_fset_hnjob(flags[AuHn_CHILD], GEN); -+ switch (mask & FS_EVENTS_POSS_ON_CHILD) { -+ case FS_MOVED_FROM: -+ case FS_MOVED_TO: -+ au_fset_hnjob(flags[AuHn_CHILD], XINO0); -+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT); -+ /*FALLTHROUGH*/ -+ case FS_CREATE: -+ AuDebugOn(!h_child_name); -+ break; -+ -+ case FS_DELETE: -+ /* -+ * aufs never be able to get this child inode. -+ * revalidation should be in d_revalidate() -+ * by checking i_nlink, i_generation or d_unhashed(). -+ */ -+ AuDebugOn(!h_child_name); -+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0); -+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT); -+ break; -+ -+ default: -+ AuDebugOn(1); -+ } -+ -+ if (wh) -+ h_child_inode = NULL; -+ -+ err = -ENOMEM; -+ /* iput() and kfree() will be called in au_hnotify() */ -+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS); -+ if (unlikely(!args)) { -+ AuErr1("no memory\n"); -+ iput(dir); -+ goto out; -+ } -+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT]; -+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD]; -+ args->mask = mask; -+ args->dir = dir; -+ args->h_dir = igrab(h_dir); -+ if (h_child_inode) -+ h_child_inode = igrab(h_child_inode); /* can be NULL */ -+ args->h_child_inode = h_child_inode; -+ args->h_child_nlen = len; -+ if (len) { -+ p = (void *)args; -+ p += sizeof(*args); -+ memcpy(p, h_child_name, len); -+ p[len] = 0; -+ } -+ -+ /* NFS fires the event for silly-renamed one from kworker */ -+ f = 0; -+ if (!dir->i_nlink -+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE))) -+ f = AuWkq_NEST; -+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f); -+ if (unlikely(err)) { -+ pr_err("wkq %d\n", err); -+ iput(args->h_child_inode); -+ iput(args->h_dir); -+ iput(args->dir); -+ kfree(args); -+ } -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm) -+{ -+ int err; -+ -+ AuDebugOn(!(udba & AuOptMask_UDBA)); -+ -+ err = 0; -+ if (au_hnotify_op.reset_br) -+ err = au_hnotify_op.reset_br(udba, br, perm); -+ -+ return err; -+} -+ -+int au_hnotify_init_br(struct au_branch *br, int perm) -+{ -+ int err; -+ -+ err = 0; -+ if (au_hnotify_op.init_br) -+ err = au_hnotify_op.init_br(br, perm); -+ -+ return err; -+} -+ -+void au_hnotify_fin_br(struct au_branch *br) -+{ -+ if (au_hnotify_op.fin_br) -+ au_hnotify_op.fin_br(br); -+} -+ -+static void au_hn_destroy_cache(void) -+{ -+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]); -+ au_cachep[AuCache_HNOTIFY] = NULL; -+} -+ -+int __init au_hnotify_init(void) -+{ -+ int err; -+ -+ err = -ENOMEM; -+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify); -+ if (au_cachep[AuCache_HNOTIFY]) { -+ err = 0; -+ if (au_hnotify_op.init) -+ err = au_hnotify_op.init(); -+ if (unlikely(err)) -+ au_hn_destroy_cache(); -+ } -+ AuTraceErr(err); -+ return err; -+} -+ -+void au_hnotify_fin(void) -+{ -+ if (au_hnotify_op.fin) -+ au_hnotify_op.fin(); -+ /* cf. au_cache_fin() */ -+ if (au_cachep[AuCache_HNOTIFY]) -+ au_hn_destroy_cache(); -+} -diff --git fs/aufs/i_op.c fs/aufs/i_op.c -new file mode 100644 -index 0000000..ae66937 ---- /dev/null -+++ fs/aufs/i_op.c -@@ -0,0 +1,1127 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * inode operations (except add/del/rename) -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "aufs.h" -+ -+static int h_permission(struct inode *h_inode, int mask, -+ struct vfsmount *h_mnt, int brperm) -+{ -+ int err; -+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); -+ -+ err = -EACCES; -+ if ((write_mask && IS_IMMUTABLE(h_inode)) -+ || ((mask & MAY_EXEC) -+ && S_ISREG(h_inode->i_mode) -+ && ((h_mnt->mnt_flags & MNT_NOEXEC) -+ || !(h_inode->i_mode & S_IXUGO)))) -+ goto out; -+ -+ /* -+ * - skip the lower fs test in the case of write to ro branch. -+ * - nfs dir permission write check is optimized, but a policy for -+ * link/rename requires a real check. -+ */ -+ if ((write_mask && !au_br_writable(brperm)) -+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode) -+ && write_mask && !(mask & MAY_READ)) -+ || !h_inode->i_op->permission) { -+ /* AuLabel(generic_permission); */ -+ err = generic_permission(h_inode, mask); -+ } else { -+ /* AuLabel(h_inode->permission); */ -+ err = h_inode->i_op->permission(h_inode, mask); -+ AuTraceErr(err); -+ } -+ -+ if (!err) -+ err = devcgroup_inode_permission(h_inode, mask); -+ if (!err) -+ err = security_inode_permission(h_inode, mask); -+ -+#if 0 -+ if (!err) { -+ /* todo: do we need to call ima_path_check()? */ -+ struct path h_path = { -+ .dentry = -+ .mnt = h_mnt -+ }; -+ err = ima_path_check(&h_path, -+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC), -+ IMA_COUNT_LEAVE); -+ } -+#endif -+ -+out: -+ return err; -+} -+ -+static int aufs_permission(struct inode *inode, int mask) -+{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ const unsigned char isdir = !!S_ISDIR(inode->i_mode), -+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); -+ struct inode *h_inode; -+ struct super_block *sb; -+ struct au_branch *br; -+ -+ /* todo: support rcu-walk? */ -+ if (mask & MAY_NOT_BLOCK) -+ return -ECHILD; -+ -+ sb = inode->i_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ ii_read_lock_child(inode); -+#if 0 -+ err = au_iigen_test(inode, au_sigen(sb)); -+ if (unlikely(err)) -+ goto out; -+#endif -+ -+ if (!isdir -+ || write_mask -+ || au_opt_test(au_mntflags(sb), DIRPERM1)) { -+ err = au_busy_or_stale(); -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ if (unlikely(!h_inode -+ || (h_inode->i_mode & S_IFMT) -+ != (inode->i_mode & S_IFMT))) -+ goto out; -+ -+ err = 0; -+ bindex = au_ibstart(inode); -+ br = au_sbr(sb, bindex); -+ err = h_permission(h_inode, mask, au_br_mnt(br), br->br_perm); -+ if (write_mask -+ && !err -+ && !special_file(h_inode->i_mode)) { -+ /* test whether the upper writable branch exists */ -+ err = -EROFS; -+ for (; bindex >= 0; bindex--) -+ if (!au_br_rdonly(au_sbr(sb, bindex))) { -+ err = 0; -+ break; -+ } -+ } -+ goto out; -+ } -+ -+ /* non-write to dir */ -+ err = 0; -+ bend = au_ibend(inode); -+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) { -+ h_inode = au_h_iptr(inode, bindex); -+ if (h_inode) { -+ err = au_busy_or_stale(); -+ if (unlikely(!S_ISDIR(h_inode->i_mode))) -+ break; -+ -+ br = au_sbr(sb, bindex); -+ err = h_permission(h_inode, mask, au_br_mnt(br), -+ br->br_perm); -+ } -+ } -+ -+out: -+ ii_read_unlock(inode); -+ si_read_unlock(sb); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry, -+ unsigned int flags) -+{ -+ struct dentry *ret, *parent; -+ struct inode *inode; -+ struct super_block *sb; -+ int err, npositive; -+ -+ IMustLock(dir); -+ -+ /* todo: support rcu-walk? */ -+ ret = ERR_PTR(-ECHILD); -+ if (flags & LOOKUP_RCU) -+ goto out; -+ -+ ret = ERR_PTR(-ENAMETOOLONG); -+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ goto out; -+ -+ sb = dir->i_sb; -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ ret = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_di_init(dentry); -+ ret = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_si; -+ -+ inode = NULL; -+ npositive = 0; /* suppress a warning */ -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_read_lock_parent(parent, AuLock_IR); -+ err = au_alive_dir(parent); -+ if (!err) -+ err = au_digen_test(parent, au_sigen(sb)); -+ if (!err) { -+ npositive = au_lkup_dentry(dentry, au_dbstart(parent), -+ /*type*/0); -+ err = npositive; -+ } -+ di_read_unlock(parent, AuLock_IR); -+ ret = ERR_PTR(err); -+ if (unlikely(err < 0)) -+ goto out_unlock; -+ -+ if (npositive) { -+ inode = au_new_inode(dentry, /*must_new*/0); -+ ret = (void *)inode; -+ } -+ if (IS_ERR(inode)) { -+ inode = NULL; -+ goto out_unlock; -+ } -+ -+ ret = d_splice_alias(inode, dentry); -+#if 0 -+ if (unlikely(d_need_lookup(dentry))) { -+ spin_lock(&dentry->d_lock); -+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP; -+ spin_unlock(&dentry->d_lock); -+ } else -+#endif -+ if (unlikely(IS_ERR(ret) && inode)) { -+ ii_write_unlock(inode); -+ iput(inode); -+ inode = NULL; -+ } -+ -+out_unlock: -+ di_write_unlock(dentry); -+ if (inode) { -+ /* verbose coding for lock class name */ -+ if (unlikely(S_ISLNK(inode->i_mode))) -+ au_rw_class(&au_di(dentry)->di_rwsem, -+ au_lc_key + AuLcSymlink_DIINFO); -+ else if (unlikely(S_ISDIR(inode->i_mode))) -+ au_rw_class(&au_di(dentry)->di_rwsem, -+ au_lc_key + AuLcDir_DIINFO); -+ else /* likely */ -+ au_rw_class(&au_di(dentry)->di_rwsem, -+ au_lc_key + AuLcNonDir_DIINFO); -+ } -+out_si: -+ si_read_unlock(sb); -+out: -+ return ret; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent, -+ const unsigned char add_entry, aufs_bindex_t bcpup, -+ aufs_bindex_t bstart) -+{ -+ int err; -+ struct dentry *h_parent; -+ struct inode *h_dir; -+ -+ if (add_entry) -+ IMustLock(parent->d_inode); -+ else -+ di_write_lock_parent(parent); -+ -+ err = 0; -+ if (!au_h_dptr(parent, bcpup)) { -+ if (bstart > bcpup) -+ err = au_cpup_dirs(dentry, bcpup); -+ else if (bstart < bcpup) -+ err = au_cpdown_dirs(dentry, bcpup); -+ else -+ BUG(); -+ } -+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) { -+ h_parent = au_h_dptr(parent, bcpup); -+ h_dir = h_parent->d_inode; -+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT); -+ err = au_lkup_neg(dentry, bcpup, -+ au_ftest_wrdir(add_entry, TMP_WHENTRY)); -+ /* todo: no unlock here */ -+ mutex_unlock(&h_dir->i_mutex); -+ -+ AuDbg("bcpup %d\n", bcpup); -+ if (!err) { -+ if (!dentry->d_inode) -+ au_set_h_dptr(dentry, bstart, NULL); -+ au_update_dbrange(dentry, /*do_put_zero*/0); -+ } -+ } -+ -+ if (!add_entry) -+ di_write_unlock(parent); -+ if (!err) -+ err = bcpup; /* success */ -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+/* -+ * decide the branch and the parent dir where we will create a new entry. -+ * returns new bindex or an error. -+ * copyup the parent dir if needed. -+ */ -+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, -+ struct au_wr_dir_args *args) -+{ -+ int err; -+ unsigned int flags; -+ aufs_bindex_t bcpup, bstart, src_bstart; -+ const unsigned char add_entry -+ = au_ftest_wrdir(args->flags, ADD_ENTRY) -+ | au_ftest_wrdir(args->flags, TMP_WHENTRY) -+ | au_ftest_wrdir(args->flags, TMPFILE); -+ struct super_block *sb; -+ struct dentry *parent; -+ struct au_sbinfo *sbinfo; -+ -+ sb = dentry->d_sb; -+ sbinfo = au_sbi(sb); -+ parent = dget_parent(dentry); -+ bstart = au_dbstart(dentry); -+ bcpup = bstart; -+ if (args->force_btgt < 0) { -+ if (src_dentry) { -+ src_bstart = au_dbstart(src_dentry); -+ if (src_bstart < bstart) -+ bcpup = src_bstart; -+ } else if (add_entry) { -+ flags = 0; -+ if (au_ftest_wrdir(args->flags, ISDIR)) -+ au_fset_wbr(flags, DIR); -+ err = AuWbrCreate(sbinfo, dentry, flags); -+ bcpup = err; -+ } -+ -+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) { -+ if (add_entry) -+ err = AuWbrCopyup(sbinfo, dentry); -+ else { -+ if (!IS_ROOT(dentry)) { -+ di_read_lock_parent(parent, !AuLock_IR); -+ err = AuWbrCopyup(sbinfo, dentry); -+ di_read_unlock(parent, !AuLock_IR); -+ } else -+ err = AuWbrCopyup(sbinfo, dentry); -+ } -+ bcpup = err; -+ if (unlikely(err < 0)) -+ goto out; -+ } -+ } else { -+ bcpup = args->force_btgt; -+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode)); -+ } -+ -+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup); -+ err = bcpup; -+ if (bcpup == bstart) -+ goto out; /* success */ -+ -+ /* copyup the new parent into the branch we process */ -+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart); -+ if (err >= 0) { -+ if (!dentry->d_inode) { -+ au_set_h_dptr(dentry, bstart, NULL); -+ au_set_dbstart(dentry, bcpup); -+ au_set_dbend(dentry, bcpup); -+ } -+ AuDebugOn(add_entry -+ && !au_ftest_wrdir(args->flags, TMPFILE) -+ && !au_h_dptr(dentry, bcpup)); -+ } -+ -+out: -+ dput(parent); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_pin_hdir_unlock(struct au_pin *p) -+{ -+ if (p->hdir) -+ au_hn_imtx_unlock(p->hdir); -+} -+ -+static int au_pin_hdir_lock(struct au_pin *p) -+{ -+ int err; -+ -+ err = 0; -+ if (!p->hdir) -+ goto out; -+ -+ /* even if an error happens later, keep this lock */ -+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi); -+ -+ err = -EBUSY; -+ if (unlikely(p->hdir->hi_inode != p->h_parent->d_inode)) -+ goto out; -+ -+ err = 0; -+ if (p->h_dentry) -+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode, -+ p->h_parent, p->br); -+ -+out: -+ return err; -+} -+ -+int au_pin_hdir_relock(struct au_pin *p) -+{ -+ int err, i; -+ struct inode *h_i; -+ struct dentry *h_d[] = { -+ p->h_dentry, -+ p->h_parent -+ }; -+ -+ err = au_pin_hdir_lock(p); -+ if (unlikely(err)) -+ goto out; -+ -+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) { -+ if (!h_d[i]) -+ continue; -+ h_i = h_d[i]->d_inode; -+ if (h_i) -+ err = !h_i->i_nlink; -+ } -+ -+out: -+ return err; -+} -+ -+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task) -+{ -+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) -+ p->hdir->hi_inode->i_mutex.owner = task; -+#endif -+} -+ -+void au_pin_hdir_acquire_nest(struct au_pin *p) -+{ -+ if (p->hdir) { -+ mutex_acquire_nest(&p->hdir->hi_inode->i_mutex.dep_map, -+ p->lsc_hi, 0, NULL, _RET_IP_); -+ au_pin_hdir_set_owner(p, current); -+ } -+} -+ -+void au_pin_hdir_release(struct au_pin *p) -+{ -+ if (p->hdir) { -+ au_pin_hdir_set_owner(p, p->task); -+ mutex_release(&p->hdir->hi_inode->i_mutex.dep_map, 1, _RET_IP_); -+ } -+} -+ -+struct dentry *au_pinned_h_parent(struct au_pin *pin) -+{ -+ if (pin && pin->parent) -+ return au_h_dptr(pin->parent, pin->bindex); -+ return NULL; -+} -+ -+void au_unpin(struct au_pin *p) -+{ -+ if (p->hdir) -+ au_pin_hdir_unlock(p); -+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE)) -+ vfsub_mnt_drop_write(p->h_mnt); -+ if (!p->hdir) -+ return; -+ -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_unlock(p->parent, AuLock_IR); -+ iput(p->hdir->hi_inode); -+ dput(p->parent); -+ p->parent = NULL; -+ p->hdir = NULL; -+ p->h_mnt = NULL; -+ /* do not clear p->task */ -+} -+ -+int au_do_pin(struct au_pin *p) -+{ -+ int err; -+ struct super_block *sb; -+ struct inode *h_dir; -+ -+ err = 0; -+ sb = p->dentry->d_sb; -+ p->br = au_sbr(sb, p->bindex); -+ if (IS_ROOT(p->dentry)) { -+ if (au_ftest_pin(p->flags, MNT_WRITE)) { -+ p->h_mnt = au_br_mnt(p->br); -+ err = vfsub_mnt_want_write(p->h_mnt); -+ if (unlikely(err)) { -+ au_fclr_pin(p->flags, MNT_WRITE); -+ goto out_err; -+ } -+ } -+ goto out; -+ } -+ -+ p->h_dentry = NULL; -+ if (p->bindex <= au_dbend(p->dentry)) -+ p->h_dentry = au_h_dptr(p->dentry, p->bindex); -+ -+ p->parent = dget_parent(p->dentry); -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_lock(p->parent, AuLock_IR, p->lsc_di); -+ -+ h_dir = NULL; -+ p->h_parent = au_h_dptr(p->parent, p->bindex); -+ p->hdir = au_hi(p->parent->d_inode, p->bindex); -+ if (p->hdir) -+ h_dir = p->hdir->hi_inode; -+ -+ /* -+ * udba case, or -+ * if DI_LOCKED is not set, then p->parent may be different -+ * and h_parent can be NULL. -+ */ -+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) { -+ err = -EBUSY; -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_unlock(p->parent, AuLock_IR); -+ dput(p->parent); -+ p->parent = NULL; -+ goto out_err; -+ } -+ -+ if (au_ftest_pin(p->flags, MNT_WRITE)) { -+ p->h_mnt = au_br_mnt(p->br); -+ err = vfsub_mnt_want_write(p->h_mnt); -+ if (unlikely(err)) { -+ au_fclr_pin(p->flags, MNT_WRITE); -+ if (!au_ftest_pin(p->flags, DI_LOCKED)) -+ di_read_unlock(p->parent, AuLock_IR); -+ dput(p->parent); -+ p->parent = NULL; -+ goto out_err; -+ } -+ } -+ -+ au_igrab(h_dir); -+ err = au_pin_hdir_lock(p); -+ if (!err) -+ goto out; /* success */ -+ -+out_err: -+ pr_err("err %d\n", err); -+ err = au_busy_or_stale(); -+out: -+ return err; -+} -+ -+void au_pin_init(struct au_pin *p, struct dentry *dentry, -+ aufs_bindex_t bindex, int lsc_di, int lsc_hi, -+ unsigned int udba, unsigned char flags) -+{ -+ p->dentry = dentry; -+ p->udba = udba; -+ p->lsc_di = lsc_di; -+ p->lsc_hi = lsc_hi; -+ p->flags = flags; -+ p->bindex = bindex; -+ -+ p->parent = NULL; -+ p->hdir = NULL; -+ p->h_mnt = NULL; -+ -+ p->h_dentry = NULL; -+ p->h_parent = NULL; -+ p->br = NULL; -+ p->task = current; -+} -+ -+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int udba, unsigned char flags) -+{ -+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2, -+ udba, flags); -+ return au_do_pin(pin); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * ->setattr() and ->getattr() are called in various cases. -+ * chmod, stat: dentry is revalidated. -+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be -+ * unhashed. -+ * for ->setattr(), ia->ia_file is passed from ftruncate only. -+ */ -+/* todo: consolidate with do_refresh() and simple_reval_dpath() */ -+static int au_reval_for_attr(struct dentry *dentry, unsigned int sigen) -+{ -+ int err; -+ struct inode *inode; -+ struct dentry *parent; -+ -+ err = 0; -+ inode = dentry->d_inode; -+ if (au_digen_test(dentry, sigen)) { -+ parent = dget_parent(dentry); -+ di_read_lock_parent(parent, AuLock_IR); -+ err = au_refresh_dentry(dentry, parent); -+ di_read_unlock(parent, AuLock_IR); -+ dput(parent); -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+#define AuIcpup_DID_CPUP 1 -+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name) -+#define au_fset_icpup(flags, name) \ -+ do { (flags) |= AuIcpup_##name; } while (0) -+#define au_fclr_icpup(flags, name) \ -+ do { (flags) &= ~AuIcpup_##name; } while (0) -+ -+struct au_icpup_args { -+ unsigned char flags; -+ unsigned char pin_flags; -+ aufs_bindex_t btgt; -+ unsigned int udba; -+ struct au_pin pin; -+ struct path h_path; -+ struct inode *h_inode; -+}; -+ -+static int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia, -+ struct au_icpup_args *a) -+{ -+ int err; -+ loff_t sz; -+ aufs_bindex_t bstart, ibstart; -+ struct dentry *hi_wh, *parent; -+ struct inode *inode; -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = 0 -+ }; -+ -+ bstart = au_dbstart(dentry); -+ inode = dentry->d_inode; -+ if (S_ISDIR(inode->i_mode)) -+ au_fset_wrdir(wr_dir_args.flags, ISDIR); -+ /* plink or hi_wh() case */ -+ ibstart = au_ibstart(inode); -+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode)) -+ wr_dir_args.force_btgt = ibstart; -+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args); -+ if (unlikely(err < 0)) -+ goto out; -+ a->btgt = err; -+ if (err != bstart) -+ au_fset_icpup(a->flags, DID_CPUP); -+ -+ err = 0; -+ a->pin_flags = AuPin_MNT_WRITE; -+ parent = NULL; -+ if (!IS_ROOT(dentry)) { -+ au_fset_pin(a->pin_flags, DI_LOCKED); -+ parent = dget_parent(dentry); -+ di_write_lock_parent(parent); -+ } -+ -+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags); -+ if (unlikely(err)) -+ goto out_parent; -+ -+ a->h_path.dentry = au_h_dptr(dentry, bstart); -+ a->h_inode = a->h_path.dentry->d_inode; -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ sz = -1; -+ if ((ia->ia_valid & ATTR_SIZE) && ia->ia_size < i_size_read(a->h_inode)) -+ sz = ia->ia_size; -+ mutex_unlock(&a->h_inode->i_mutex); -+ -+ hi_wh = NULL; -+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) { -+ hi_wh = au_hi_wh(inode, a->btgt); -+ if (!hi_wh) { -+ struct au_cp_generic cpg = { -+ .dentry = dentry, -+ .bdst = a->btgt, -+ .bsrc = -1, -+ .len = sz, -+ .pin = &a->pin -+ }; -+ err = au_sio_cpup_wh(&cpg, /*file*/NULL); -+ if (unlikely(err)) -+ goto out_unlock; -+ hi_wh = au_hi_wh(inode, a->btgt); -+ /* todo: revalidate hi_wh? */ -+ } -+ } -+ -+ if (parent) { -+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0); -+ di_downgrade_lock(parent, AuLock_IR); -+ dput(parent); -+ parent = NULL; -+ } -+ if (!au_ftest_icpup(a->flags, DID_CPUP)) -+ goto out; /* success */ -+ -+ if (!d_unhashed(dentry)) { -+ struct au_cp_generic cpg = { -+ .dentry = dentry, -+ .bdst = a->btgt, -+ .bsrc = bstart, -+ .len = sz, -+ .pin = &a->pin, -+ .flags = AuCpup_DTIME | AuCpup_HOPEN -+ }; -+ err = au_sio_cpup_simple(&cpg); -+ if (!err) -+ a->h_path.dentry = au_h_dptr(dentry, a->btgt); -+ } else if (!hi_wh) -+ a->h_path.dentry = au_h_dptr(dentry, a->btgt); -+ else -+ a->h_path.dentry = hi_wh; /* do not dget here */ -+ -+out_unlock: -+ a->h_inode = a->h_path.dentry->d_inode; -+ if (!err) -+ goto out; /* success */ -+ au_unpin(&a->pin); -+out_parent: -+ if (parent) { -+ di_write_unlock(parent); -+ dput(parent); -+ } -+out: -+ if (!err) -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ return err; -+} -+ -+static int aufs_setattr(struct dentry *dentry, struct iattr *ia) -+{ -+ int err; -+ struct inode *inode, *delegated; -+ struct super_block *sb; -+ struct file *file; -+ struct au_icpup_args *a; -+ -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ -+ err = -ENOMEM; -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) -+ ia->ia_valid &= ~ATTR_MODE; -+ -+ file = NULL; -+ sb = dentry->d_sb; -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (unlikely(err)) -+ goto out_kfree; -+ -+ if (ia->ia_valid & ATTR_FILE) { -+ /* currently ftruncate(2) only */ -+ AuDebugOn(!S_ISREG(inode->i_mode)); -+ file = ia->ia_file; -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1); -+ if (unlikely(err)) -+ goto out_si; -+ ia->ia_file = au_hf_top(file); -+ a->udba = AuOpt_UDBA_NONE; -+ } else { -+ /* fchmod() doesn't pass ia_file */ -+ a->udba = au_opt_udba(sb); -+ di_write_lock_child(dentry); -+ /* no d_unlinked(), to set UDBA_NONE for root */ -+ if (d_unhashed(dentry)) -+ a->udba = AuOpt_UDBA_NONE; -+ if (a->udba != AuOpt_UDBA_NONE) { -+ AuDebugOn(IS_ROOT(dentry)); -+ err = au_reval_for_attr(dentry, au_sigen(sb)); -+ if (unlikely(err)) -+ goto out_dentry; -+ } -+ } -+ -+ err = au_pin_and_icpup(dentry, ia, a); -+ if (unlikely(err < 0)) -+ goto out_dentry; -+ if (au_ftest_icpup(a->flags, DID_CPUP)) { -+ ia->ia_file = NULL; -+ ia->ia_valid &= ~ATTR_FILE; -+ } -+ -+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt); -+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME)) -+ == (ATTR_MODE | ATTR_CTIME)) { -+ err = security_path_chmod(&a->h_path, ia->ia_mode); -+ if (unlikely(err)) -+ goto out_unlock; -+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID)) -+ && (ia->ia_valid & ATTR_CTIME)) { -+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid); -+ if (unlikely(err)) -+ goto out_unlock; -+ } -+ -+ if (ia->ia_valid & ATTR_SIZE) { -+ struct file *f; -+ -+ if (ia->ia_size < i_size_read(inode)) -+ /* unmap only */ -+ truncate_setsize(inode, ia->ia_size); -+ -+ f = NULL; -+ if (ia->ia_valid & ATTR_FILE) -+ f = ia->ia_file; -+ mutex_unlock(&a->h_inode->i_mutex); -+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f); -+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD); -+ } else { -+ delegated = NULL; -+ while (1) { -+ err = vfsub_notify_change(&a->h_path, ia, &delegated); -+ if (delegated) { -+ err = break_deleg_wait(&delegated); -+ if (!err) -+ continue; -+ } -+ break; -+ } -+ } -+ if (!err) -+ au_cpup_attr_changeable(inode); -+ -+out_unlock: -+ mutex_unlock(&a->h_inode->i_mutex); -+ au_unpin(&a->pin); -+ if (unlikely(err)) -+ au_update_dbstart(dentry); -+out_dentry: -+ di_write_unlock(dentry); -+ if (file) { -+ fi_write_unlock(file); -+ ia->ia_file = file; -+ ia->ia_valid |= ATTR_FILE; -+ } -+out_si: -+ si_read_unlock(sb); -+out_kfree: -+ kfree(a); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static void au_refresh_iattr(struct inode *inode, struct kstat *st, -+ unsigned int nlink) -+{ -+ unsigned int n; -+ -+ inode->i_mode = st->mode; -+ /* don't i_[ug]id_write() here */ -+ inode->i_uid = st->uid; -+ inode->i_gid = st->gid; -+ inode->i_atime = st->atime; -+ inode->i_mtime = st->mtime; -+ inode->i_ctime = st->ctime; -+ -+ au_cpup_attr_nlink(inode, /*force*/0); -+ if (S_ISDIR(inode->i_mode)) { -+ n = inode->i_nlink; -+ n -= nlink; -+ n += st->nlink; -+ smp_mb(); /* for i_nlink */ -+ /* 0 can happen */ -+ set_nlink(inode, n); -+ } -+ -+ spin_lock(&inode->i_lock); -+ inode->i_blocks = st->blocks; -+ i_size_write(inode, st->size); -+ spin_unlock(&inode->i_lock); -+} -+ -+static int aufs_getattr(struct vfsmount *mnt __maybe_unused, -+ struct dentry *dentry, struct kstat *st) -+{ -+ int err; -+ unsigned int mnt_flags, sigen; -+ aufs_bindex_t bindex; -+ unsigned char udba_none, positive; -+ struct super_block *sb, *h_sb; -+ struct inode *inode; -+ struct path h_path; -+ -+ sb = dentry->d_sb; -+ inode = dentry->d_inode; -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (unlikely(err)) -+ goto out; -+ mnt_flags = au_mntflags(sb); -+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE); -+ -+ /* support fstat(2) */ -+ if (!d_unlinked(dentry) && !udba_none) { -+ sigen = au_sigen(sb); -+ err = au_digen_test(dentry, sigen); -+ if (!err) { -+ di_read_lock_child(dentry, AuLock_IR); -+ err = au_dbrange_test(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ } else { -+ AuDebugOn(IS_ROOT(dentry)); -+ di_write_lock_child(dentry); -+ err = au_dbrange_test(dentry); -+ if (!err) -+ err = au_reval_for_attr(dentry, sigen); -+ di_downgrade_lock(dentry, AuLock_IR); -+ if (unlikely(err)) -+ goto out_unlock; -+ } -+ } else -+ di_read_lock_child(dentry, AuLock_IR); -+ -+ bindex = au_ibstart(inode); -+ h_path.mnt = au_sbr_mnt(sb, bindex); -+ h_sb = h_path.mnt->mnt_sb; -+ if (!au_test_fs_bad_iattr(h_sb) && udba_none) -+ goto out_fill; /* success */ -+ -+ h_path.dentry = NULL; -+ if (au_dbstart(dentry) == bindex) -+ h_path.dentry = dget(au_h_dptr(dentry, bindex)); -+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) { -+ h_path.dentry = au_plink_lkup(inode, bindex); -+ if (IS_ERR(h_path.dentry)) -+ goto out_fill; /* pretending success */ -+ } -+ /* illegally overlapped or something */ -+ if (unlikely(!h_path.dentry)) -+ goto out_fill; /* pretending success */ -+ -+ positive = !!h_path.dentry->d_inode; -+ if (positive) -+ err = vfs_getattr(&h_path, st); -+ dput(h_path.dentry); -+ if (!err) { -+ if (positive) -+ au_refresh_iattr(inode, st, -+ h_path.dentry->d_inode->i_nlink); -+ goto out_fill; /* success */ -+ } -+ AuTraceErr(err); -+ goto out_unlock; -+ -+out_fill: -+ generic_fillattr(inode, st); -+out_unlock: -+ di_read_unlock(dentry, AuLock_IR); -+ si_read_unlock(sb); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf, -+ int bufsiz) -+{ -+ int err; -+ struct super_block *sb; -+ struct dentry *h_dentry; -+ -+ err = -EINVAL; -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (unlikely(!h_dentry->d_inode->i_op->readlink)) -+ goto out; -+ -+ err = security_inode_readlink(h_dentry); -+ if (unlikely(err)) -+ goto out; -+ -+ sb = dentry->d_sb; -+ if (!au_test_ro(sb, bindex, dentry->d_inode)) { -+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry); -+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode); -+ } -+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz); -+ -+out: -+ return err; -+} -+ -+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) -+{ -+ int err; -+ -+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN); -+ if (unlikely(err)) -+ goto out; -+ err = au_d_hashed_positive(dentry); -+ if (!err) -+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz); -+ aufs_read_unlock(dentry, AuLock_IR); -+ -+out: -+ return err; -+} -+ -+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd) -+{ -+ int err; -+ mm_segment_t old_fs; -+ union { -+ char *k; -+ char __user *u; -+ } buf; -+ -+ err = -ENOMEM; -+ buf.k = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!buf.k)) -+ goto out; -+ -+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_name; -+ -+ err = au_d_hashed_positive(dentry); -+ if (!err) { -+ old_fs = get_fs(); -+ set_fs(KERNEL_DS); -+ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX); -+ set_fs(old_fs); -+ } -+ aufs_read_unlock(dentry, AuLock_IR); -+ -+ if (err >= 0) { -+ buf.k[err] = 0; -+ /* will be freed by put_link */ -+ nd_set_link(nd, buf.k); -+ return NULL; /* success */ -+ } -+ -+out_name: -+ free_page((unsigned long)buf.k); -+out: -+ AuTraceErr(err); -+ return ERR_PTR(err); -+} -+ -+static void aufs_put_link(struct dentry *dentry __maybe_unused, -+ struct nameidata *nd, void *cookie __maybe_unused) -+{ -+ char *p; -+ -+ p = nd_get_link(nd); -+ if (!IS_ERR_OR_NULL(p)) -+ free_page((unsigned long)p); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags) -+{ -+ int err; -+ struct super_block *sb; -+ struct inode *h_inode; -+ -+ sb = inode->i_sb; -+ /* mmap_sem might be acquired already, cf. aufs_mmap() */ -+ lockdep_off(); -+ si_read_lock(sb, AuLock_FLUSH); -+ ii_write_lock_child(inode); -+ lockdep_on(); -+ h_inode = au_h_iptr(inode, au_ibstart(inode)); -+ err = vfsub_update_time(h_inode, ts, flags); -+ lockdep_off(); -+ if (!err) -+ au_cpup_attr_timesizes(inode); -+ ii_write_unlock(inode); -+ si_read_unlock(sb); -+ lockdep_on(); -+ -+ if (!err && (flags & S_VERSION)) -+ inode_inc_iversion(inode); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct inode_operations aufs_symlink_iop = { -+ .permission = aufs_permission, -+ .setattr = aufs_setattr, -+ .getattr = aufs_getattr, -+ -+ .readlink = aufs_readlink, -+ .follow_link = aufs_follow_link, -+ .put_link = aufs_put_link, -+ -+ /* .update_time = aufs_update_time */ -+}; -+ -+struct inode_operations aufs_dir_iop = { -+ .create = aufs_create, -+ .lookup = aufs_lookup, -+ .link = aufs_link, -+ .unlink = aufs_unlink, -+ .symlink = aufs_symlink, -+ .mkdir = aufs_mkdir, -+ .rmdir = aufs_rmdir, -+ .mknod = aufs_mknod, -+ .rename = aufs_rename, -+ -+ .permission = aufs_permission, -+ .setattr = aufs_setattr, -+ .getattr = aufs_getattr, -+ -+ .update_time = aufs_update_time, -+ /* no support for atomic_open() */ -+ -+ .tmpfile = aufs_tmpfile -+}; -+ -+struct inode_operations aufs_iop = { -+ .permission = aufs_permission, -+ .setattr = aufs_setattr, -+ .getattr = aufs_getattr, -+ -+ .update_time = aufs_update_time -+}; -diff --git fs/aufs/i_op_add.c fs/aufs/i_op_add.c -new file mode 100644 -index 0000000..4371d03 ---- /dev/null -+++ fs/aufs/i_op_add.c -@@ -0,0 +1,867 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * inode operations (add entry) -+ */ -+ -+#include "aufs.h" -+ -+/* -+ * final procedure of adding a new entry, except link(2). -+ * remove whiteout, instantiate, copyup the parent dir's times and size -+ * and update version. -+ * if it failed, re-create the removed whiteout. -+ */ -+static int epilog(struct inode *dir, aufs_bindex_t bindex, -+ struct dentry *wh_dentry, struct dentry *dentry) -+{ -+ int err, rerr; -+ aufs_bindex_t bwh; -+ struct path h_path; -+ struct inode *inode, *h_dir; -+ struct dentry *wh; -+ -+ bwh = -1; -+ if (wh_dentry) { -+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */ -+ IMustLock(h_dir); -+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir); -+ bwh = au_dbwh(dentry); -+ h_path.dentry = wh_dentry; -+ h_path.mnt = au_sbr_mnt(dir->i_sb, bindex); -+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, -+ dentry); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+ inode = au_new_inode(dentry, /*must_new*/1); -+ if (!IS_ERR(inode)) { -+ d_instantiate(dentry, inode); -+ dir = dentry->d_parent->d_inode; /* dir inode is locked */ -+ IMustLock(dir); -+ if (au_ibstart(dir) == au_dbstart(dentry)) -+ au_cpup_attr_timesizes(dir); -+ dir->i_version++; -+ return 0; /* success */ -+ } -+ -+ err = PTR_ERR(inode); -+ if (!wh_dentry) -+ goto out; -+ -+ /* revert */ -+ /* dir inode is locked */ -+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent); -+ rerr = PTR_ERR(wh); -+ if (IS_ERR(wh)) { -+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", -+ dentry, err, rerr); -+ err = -EIO; -+ } else -+ dput(wh); -+ -+out: -+ return err; -+} -+ -+static int au_d_may_add(struct dentry *dentry) -+{ -+ int err; -+ -+ err = 0; -+ if (unlikely(d_unhashed(dentry))) -+ err = -ENOENT; -+ if (unlikely(dentry->d_inode)) -+ err = -EEXIST; -+ return err; -+} -+ -+/* -+ * simple tests for the adding inode operations. -+ * following the checks in vfs, plus the parent-child relationship. -+ */ -+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir) -+{ -+ int err; -+ umode_t h_mode; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ -+ err = -ENAMETOOLONG; -+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ goto out; -+ -+ h_dentry = au_h_dptr(dentry, bindex); -+ h_inode = h_dentry->d_inode; -+ if (!dentry->d_inode) { -+ err = -EEXIST; -+ if (unlikely(h_inode)) -+ goto out; -+ } else { -+ /* rename(2) case */ -+ err = -EIO; -+ if (unlikely(!h_inode || !h_inode->i_nlink)) -+ goto out; -+ -+ h_mode = h_inode->i_mode; -+ if (!isdir) { -+ err = -EISDIR; -+ if (unlikely(S_ISDIR(h_mode))) -+ goto out; -+ } else if (unlikely(!S_ISDIR(h_mode))) { -+ err = -ENOTDIR; -+ goto out; -+ } -+ } -+ -+ err = 0; -+ /* expected parent dir is locked */ -+ if (unlikely(h_parent != h_dentry->d_parent)) -+ err = -EIO; -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* -+ * initial procedure of adding a new entry. -+ * prepare writable branch and the parent dir, lock it, -+ * and lookup whiteout for the new entry. -+ */ -+static struct dentry* -+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt, -+ struct dentry *src_dentry, struct au_pin *pin, -+ struct au_wr_dir_args *wr_dir_args) -+{ -+ struct dentry *wh_dentry, *h_parent; -+ struct super_block *sb; -+ struct au_branch *br; -+ int err; -+ unsigned int udba; -+ aufs_bindex_t bcpup; -+ -+ AuDbg("%pd\n", dentry); -+ -+ err = au_wr_dir(dentry, src_dentry, wr_dir_args); -+ bcpup = err; -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err < 0)) -+ goto out; -+ -+ sb = dentry->d_sb; -+ udba = au_opt_udba(sb); -+ err = au_pin(pin, dentry, bcpup, udba, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; -+ -+ h_parent = au_pinned_h_parent(pin); -+ if (udba != AuOpt_UDBA_NONE -+ && au_dbstart(dentry) == bcpup) -+ err = au_may_add(dentry, bcpup, h_parent, -+ au_ftest_wrdir(wr_dir_args->flags, ISDIR)); -+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ err = -ENAMETOOLONG; -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_unpin; -+ -+ br = au_sbr(sb, bcpup); -+ if (dt) { -+ struct path tmp = { -+ .dentry = h_parent, -+ .mnt = au_br_mnt(br) -+ }; -+ au_dtime_store(dt, au_pinned_parent(pin), &tmp); -+ } -+ -+ wh_dentry = NULL; -+ if (bcpup != au_dbwh(dentry)) -+ goto out; /* success */ -+ -+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br); -+ -+out_unpin: -+ if (IS_ERR(wh_dentry)) -+ au_unpin(pin); -+out: -+ return wh_dentry; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+enum { Mknod, Symlink, Creat }; -+struct simple_arg { -+ int type; -+ union { -+ struct { -+ umode_t mode; -+ bool want_excl; -+ } c; -+ struct { -+ const char *symname; -+ } s; -+ struct { -+ umode_t mode; -+ dev_t dev; -+ } m; -+ } u; -+}; -+ -+static int add_simple(struct inode *dir, struct dentry *dentry, -+ struct simple_arg *arg) -+{ -+ int err, rerr; -+ aufs_bindex_t bstart; -+ unsigned char created; -+ struct dentry *wh_dentry, *parent; -+ struct inode *h_dir; -+ /* to reuduce stack size */ -+ struct { -+ struct au_dtime dt; -+ struct au_pin pin; -+ struct path h_path; -+ struct au_wr_dir_args wr_dir_args; -+ } *a; -+ -+ AuDbg("%pd\n", dentry); -+ IMustLock(dir); -+ -+ err = -ENOMEM; -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ a->wr_dir_args.force_btgt = -1; -+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY; -+ -+ parent = dentry->d_parent; /* dir inode is locked */ -+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_free; -+ err = au_d_may_add(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL, -+ &a->pin, &a->wr_dir_args); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; -+ -+ bstart = au_dbstart(dentry); -+ a->h_path.dentry = au_h_dptr(dentry, bstart); -+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); -+ h_dir = au_pinned_h_dir(&a->pin); -+ switch (arg->type) { -+ case Creat: -+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode, -+ arg->u.c.want_excl); -+ break; -+ case Symlink: -+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname); -+ break; -+ case Mknod: -+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode, -+ arg->u.m.dev); -+ break; -+ default: -+ BUG(); -+ } -+ created = !err; -+ if (!err) -+ err = epilog(dir, bstart, wh_dentry, dentry); -+ -+ /* revert */ -+ if (unlikely(created && err && a->h_path.dentry->d_inode)) { -+ /* no delegation since it is just created */ -+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL, -+ /*force*/0); -+ if (rerr) { -+ AuIOErr("%pd revert failure(%d, %d)\n", -+ dentry, err, rerr); -+ err = -EIO; -+ } -+ au_dtime_revert(&a->dt); -+ } -+ -+ au_unpin(&a->pin); -+ dput(wh_dentry); -+ -+out_parent: -+ di_write_unlock(parent); -+out_unlock: -+ if (unlikely(err)) { -+ au_update_dbstart(dentry); -+ d_drop(dentry); -+ } -+ aufs_read_unlock(dentry, AuLock_DW); -+out_free: -+ kfree(a); -+out: -+ return err; -+} -+ -+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, -+ dev_t dev) -+{ -+ struct simple_arg arg = { -+ .type = Mknod, -+ .u.m = { -+ .mode = mode, -+ .dev = dev -+ } -+ }; -+ return add_simple(dir, dentry, &arg); -+} -+ -+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) -+{ -+ struct simple_arg arg = { -+ .type = Symlink, -+ .u.s.symname = symname -+ }; -+ return add_simple(dir, dentry, &arg); -+} -+ -+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode, -+ bool want_excl) -+{ -+ struct simple_arg arg = { -+ .type = Creat, -+ .u.c = { -+ .mode = mode, -+ .want_excl = want_excl -+ } -+ }; -+ return add_simple(dir, dentry, &arg); -+} -+ -+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ struct super_block *sb; -+ struct dentry *parent, *h_parent, *h_dentry; -+ struct inode *h_dir, *inode; -+ struct vfsmount *h_mnt; -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = AuWrDir_TMPFILE -+ }; -+ -+ /* copy-up may happen */ -+ mutex_lock(&dir->i_mutex); -+ -+ sb = dir->i_sb; -+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_di_init(dentry); -+ if (unlikely(err)) -+ goto out_si; -+ -+ err = -EBUSY; -+ parent = d_find_any_alias(dir); -+ AuDebugOn(!parent); -+ di_write_lock_parent(parent); -+ if (unlikely(parent->d_inode != dir)) -+ goto out_parent; -+ -+ err = au_digen_test(parent, au_sigen(sb)); -+ if (unlikely(err)) -+ goto out_parent; -+ -+ bindex = au_dbstart(parent); -+ au_set_dbstart(dentry, bindex); -+ au_set_dbend(dentry, bindex); -+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args); -+ bindex = err; -+ if (unlikely(err < 0)) -+ goto out_parent; -+ -+ err = -EOPNOTSUPP; -+ h_dir = au_h_iptr(dir, bindex); -+ if (unlikely(!h_dir->i_op->tmpfile)) -+ goto out_parent; -+ -+ h_mnt = au_sbr_mnt(sb, bindex); -+ err = vfsub_mnt_want_write(h_mnt); -+ if (unlikely(err)) -+ goto out_parent; -+ -+ h_parent = au_h_dptr(parent, bindex); -+ err = inode_permission(h_parent->d_inode, MAY_WRITE | MAY_EXEC); -+ if (unlikely(err)) -+ goto out_mnt; -+ -+ err = -ENOMEM; -+ h_dentry = d_alloc(h_parent, &dentry->d_name); -+ if (unlikely(!h_dentry)) -+ goto out_mnt; -+ -+ err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode); -+ if (unlikely(err)) -+ goto out_dentry; -+ -+ au_set_dbstart(dentry, bindex); -+ au_set_dbend(dentry, bindex); -+ au_set_h_dptr(dentry, bindex, dget(h_dentry)); -+ inode = au_new_inode(dentry, /*must_new*/1); -+ if (IS_ERR(inode)) { -+ err = PTR_ERR(inode); -+ au_set_h_dptr(dentry, bindex, NULL); -+ au_set_dbstart(dentry, -1); -+ au_set_dbend(dentry, -1); -+ } else { -+ if (!inode->i_nlink) -+ set_nlink(inode, 1); -+ d_tmpfile(dentry, inode); -+ au_di(dentry)->di_tmpfile = 1; -+ -+ /* update without i_mutex */ -+ if (au_ibstart(dir) == au_dbstart(dentry)) -+ au_cpup_attr_timesizes(dir); -+ } -+ -+out_dentry: -+ dput(h_dentry); -+out_mnt: -+ vfsub_mnt_drop_write(h_mnt); -+out_parent: -+ di_write_unlock(parent); -+ dput(parent); -+ di_write_unlock(dentry); -+ if (!err) -+#if 0 -+ /* verbose coding for lock class name */ -+ au_rw_class(&au_di(dentry)->di_rwsem, -+ au_lc_key + AuLcNonDir_DIINFO); -+#else -+ ; -+#endif -+ else { -+ au_di_fin(dentry); -+ dentry->d_fsdata = NULL; -+ } -+out_si: -+ si_read_unlock(sb); -+out: -+ mutex_unlock(&dir->i_mutex); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_link_args { -+ aufs_bindex_t bdst, bsrc; -+ struct au_pin pin; -+ struct path h_path; -+ struct dentry *src_parent, *parent; -+}; -+ -+static int au_cpup_before_link(struct dentry *src_dentry, -+ struct au_link_args *a) -+{ -+ int err; -+ struct dentry *h_src_dentry; -+ struct au_cp_generic cpg = { -+ .dentry = src_dentry, -+ .bdst = a->bdst, -+ .bsrc = a->bsrc, -+ .len = -1, -+ .pin = &a->pin, -+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */ -+ }; -+ -+ di_read_lock_parent(a->src_parent, AuLock_IR); -+ err = au_test_and_cpup_dirs(src_dentry, a->bdst); -+ if (unlikely(err)) -+ goto out; -+ -+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc); -+ err = au_pin(&a->pin, src_dentry, a->bdst, -+ au_opt_udba(src_dentry->d_sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_sio_cpup_simple(&cpg); -+ au_unpin(&a->pin); -+ -+out: -+ di_read_unlock(a->src_parent, AuLock_IR); -+ return err; -+} -+ -+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry, -+ struct au_link_args *a) -+{ -+ int err; -+ unsigned char plink; -+ aufs_bindex_t bend; -+ struct dentry *h_src_dentry; -+ struct inode *h_inode, *inode, *delegated; -+ struct super_block *sb; -+ struct file *h_file; -+ -+ plink = 0; -+ h_inode = NULL; -+ sb = src_dentry->d_sb; -+ inode = src_dentry->d_inode; -+ if (au_ibstart(inode) <= a->bdst) -+ h_inode = au_h_iptr(inode, a->bdst); -+ if (!h_inode || !h_inode->i_nlink) { -+ /* copyup src_dentry as the name of dentry. */ -+ bend = au_dbend(dentry); -+ if (bend < a->bsrc) -+ au_set_dbend(dentry, a->bsrc); -+ au_set_h_dptr(dentry, a->bsrc, -+ dget(au_h_dptr(src_dentry, a->bsrc))); -+ dget(a->h_path.dentry); -+ au_set_h_dptr(dentry, a->bdst, NULL); -+ dentry->d_inode = src_dentry->d_inode; /* tmp */ -+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0); -+ if (IS_ERR(h_file)) -+ err = PTR_ERR(h_file); -+ else { -+ struct au_cp_generic cpg = { -+ .dentry = dentry, -+ .bdst = a->bdst, -+ .bsrc = -1, -+ .len = -1, -+ .pin = &a->pin, -+ .flags = AuCpup_KEEPLINO -+ }; -+ err = au_sio_cpup_simple(&cpg); -+ au_h_open_post(dentry, a->bsrc, h_file); -+ if (!err) { -+ dput(a->h_path.dentry); -+ a->h_path.dentry = au_h_dptr(dentry, a->bdst); -+ } else -+ au_set_h_dptr(dentry, a->bdst, -+ a->h_path.dentry); -+ } -+ dentry->d_inode = NULL; /* restore */ -+ au_set_h_dptr(dentry, a->bsrc, NULL); -+ au_set_dbend(dentry, bend); -+ } else { -+ /* the inode of src_dentry already exists on a.bdst branch */ -+ h_src_dentry = d_find_alias(h_inode); -+ if (!h_src_dentry && au_plink_test(inode)) { -+ plink = 1; -+ h_src_dentry = au_plink_lkup(inode, a->bdst); -+ err = PTR_ERR(h_src_dentry); -+ if (IS_ERR(h_src_dentry)) -+ goto out; -+ -+ if (unlikely(!h_src_dentry->d_inode)) { -+ dput(h_src_dentry); -+ h_src_dentry = NULL; -+ } -+ -+ } -+ if (h_src_dentry) { -+ delegated = NULL; -+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), -+ &a->h_path, &delegated); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal link\n"); -+ iput(delegated); -+ } -+ dput(h_src_dentry); -+ } else { -+ AuIOErr("no dentry found for hi%lu on b%d\n", -+ h_inode->i_ino, a->bdst); -+ err = -EIO; -+ } -+ } -+ -+ if (!err && !plink) -+ au_plink_append(inode, a->bdst, a->h_path.dentry); -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int aufs_link(struct dentry *src_dentry, struct inode *dir, -+ struct dentry *dentry) -+{ -+ int err, rerr; -+ struct au_dtime dt; -+ struct au_link_args *a; -+ struct dentry *wh_dentry, *h_src_dentry; -+ struct inode *inode, *delegated; -+ struct super_block *sb; -+ struct au_wr_dir_args wr_dir_args = { -+ /* .force_btgt = -1, */ -+ .flags = AuWrDir_ADD_ENTRY -+ }; -+ -+ IMustLock(dir); -+ inode = src_dentry->d_inode; -+ IMustLock(inode); -+ -+ err = -ENOMEM; -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ a->parent = dentry->d_parent; /* dir inode is locked */ -+ err = aufs_read_and_write_lock2(dentry, src_dentry, -+ AuLock_NOPLM | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_kfree; -+ err = au_d_linkable(src_dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ err = au_d_may_add(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ -+ a->src_parent = dget_parent(src_dentry); -+ wr_dir_args.force_btgt = au_ibstart(inode); -+ -+ di_write_lock_parent(a->parent); -+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin, -+ &wr_dir_args); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; -+ -+ err = 0; -+ sb = dentry->d_sb; -+ a->bdst = au_dbstart(dentry); -+ a->h_path.dentry = au_h_dptr(dentry, a->bdst); -+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst); -+ a->bsrc = au_ibstart(inode); -+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); -+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile) -+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc)); -+ if (!h_src_dentry) { -+ a->bsrc = au_dbstart(src_dentry); -+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc); -+ AuDebugOn(!h_src_dentry); -+ } else if (IS_ERR(h_src_dentry)) { -+ err = PTR_ERR(h_src_dentry); -+ goto out_parent; -+ } -+ -+ if (au_opt_test(au_mntflags(sb), PLINK)) { -+ if (a->bdst < a->bsrc -+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) -+ err = au_cpup_or_link(src_dentry, dentry, a); -+ else { -+ delegated = NULL; -+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin), -+ &a->h_path, &delegated); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal link\n"); -+ iput(delegated); -+ } -+ } -+ dput(h_src_dentry); -+ } else { -+ /* -+ * copyup src_dentry to the branch we process, -+ * and then link(2) to it. -+ */ -+ dput(h_src_dentry); -+ if (a->bdst < a->bsrc -+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) { -+ au_unpin(&a->pin); -+ di_write_unlock(a->parent); -+ err = au_cpup_before_link(src_dentry, a); -+ di_write_lock_parent(a->parent); -+ if (!err) -+ err = au_pin(&a->pin, dentry, a->bdst, -+ au_opt_udba(sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (unlikely(err)) -+ goto out_wh; -+ } -+ if (!err) { -+ h_src_dentry = au_h_dptr(src_dentry, a->bdst); -+ err = -ENOENT; -+ if (h_src_dentry && h_src_dentry->d_inode) { -+ delegated = NULL; -+ err = vfsub_link(h_src_dentry, -+ au_pinned_h_dir(&a->pin), -+ &a->h_path, &delegated); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry" -+ " for NFSv4 delegation" -+ " for an internal link\n"); -+ iput(delegated); -+ } -+ } -+ } -+ } -+ if (unlikely(err)) -+ goto out_unpin; -+ -+ if (wh_dentry) { -+ a->h_path.dentry = wh_dentry; -+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path, -+ dentry); -+ if (unlikely(err)) -+ goto out_revert; -+ } -+ -+ dir->i_version++; -+ if (au_ibstart(dir) == au_dbstart(dentry)) -+ au_cpup_attr_timesizes(dir); -+ inc_nlink(inode); -+ inode->i_ctime = dir->i_ctime; -+ d_instantiate(dentry, au_igrab(inode)); -+ if (d_unhashed(a->h_path.dentry)) -+ /* some filesystem calls d_drop() */ -+ d_drop(dentry); -+ goto out_unpin; /* success */ -+ -+out_revert: -+ /* no delegation since it is just created */ -+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path, -+ /*delegated*/NULL, /*force*/0); -+ if (unlikely(rerr)) { -+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr); -+ err = -EIO; -+ } -+ au_dtime_revert(&dt); -+out_unpin: -+ au_unpin(&a->pin); -+out_wh: -+ dput(wh_dentry); -+out_parent: -+ di_write_unlock(a->parent); -+ dput(a->src_parent); -+out_unlock: -+ if (unlikely(err)) { -+ au_update_dbstart(dentry); -+ d_drop(dentry); -+ } -+ aufs_read_and_write_unlock2(dentry, src_dentry); -+out_kfree: -+ kfree(a); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) -+{ -+ int err, rerr; -+ aufs_bindex_t bindex; -+ unsigned char diropq; -+ struct path h_path; -+ struct dentry *wh_dentry, *parent, *opq_dentry; -+ struct mutex *h_mtx; -+ struct super_block *sb; -+ struct { -+ struct au_pin pin; -+ struct au_dtime dt; -+ } *a; /* reduce the stack usage */ -+ struct au_wr_dir_args wr_dir_args = { -+ .force_btgt = -1, -+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR -+ }; -+ -+ IMustLock(dir); -+ -+ err = -ENOMEM; -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_free; -+ err = au_d_may_add(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL, -+ &a->pin, &wr_dir_args); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; -+ -+ sb = dentry->d_sb; -+ bindex = au_dbstart(dentry); -+ h_path.dentry = au_h_dptr(dentry, bindex); -+ h_path.mnt = au_sbr_mnt(sb, bindex); -+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode); -+ if (unlikely(err)) -+ goto out_unpin; -+ -+ /* make the dir opaque */ -+ diropq = 0; -+ h_mtx = &h_path.dentry->d_inode->i_mutex; -+ if (wh_dentry -+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) { -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ opq_dentry = au_diropq_create(dentry, bindex); -+ mutex_unlock(h_mtx); -+ err = PTR_ERR(opq_dentry); -+ if (IS_ERR(opq_dentry)) -+ goto out_dir; -+ dput(opq_dentry); -+ diropq = 1; -+ } -+ -+ err = epilog(dir, bindex, wh_dentry, dentry); -+ if (!err) { -+ inc_nlink(dir); -+ goto out_unpin; /* success */ -+ } -+ -+ /* revert */ -+ if (diropq) { -+ AuLabel(revert opq); -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ rerr = au_diropq_remove(dentry, bindex); -+ mutex_unlock(h_mtx); -+ if (rerr) { -+ AuIOErr("%pd reverting diropq failed(%d, %d)\n", -+ dentry, err, rerr); -+ err = -EIO; -+ } -+ } -+ -+out_dir: -+ AuLabel(revert dir); -+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path); -+ if (rerr) { -+ AuIOErr("%pd reverting dir failed(%d, %d)\n", -+ dentry, err, rerr); -+ err = -EIO; -+ } -+ au_dtime_revert(&a->dt); -+out_unpin: -+ au_unpin(&a->pin); -+ dput(wh_dentry); -+out_parent: -+ di_write_unlock(parent); -+out_unlock: -+ if (unlikely(err)) { -+ au_update_dbstart(dentry); -+ d_drop(dentry); -+ } -+ aufs_read_unlock(dentry, AuLock_DW); -+out_free: -+ kfree(a); -+out: -+ return err; -+} -diff --git fs/aufs/i_op_del.c fs/aufs/i_op_del.c -new file mode 100644 -index 0000000..4e1ee2e ---- /dev/null -+++ fs/aufs/i_op_del.c -@@ -0,0 +1,494 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * inode operations (del entry) -+ */ -+ -+#include "aufs.h" -+ -+/* -+ * decide if a new whiteout for @dentry is necessary or not. -+ * when it is necessary, prepare the parent dir for the upper branch whose -+ * branch index is @bcpup for creation. the actual creation of the whiteout will -+ * be done by caller. -+ * return value: -+ * 0: wh is unnecessary -+ * plus: wh is necessary -+ * minus: error -+ */ -+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup) -+{ -+ int need_wh, err; -+ aufs_bindex_t bstart; -+ struct super_block *sb; -+ -+ sb = dentry->d_sb; -+ bstart = au_dbstart(dentry); -+ if (*bcpup < 0) { -+ *bcpup = bstart; -+ if (au_test_ro(sb, bstart, dentry->d_inode)) { -+ err = AuWbrCopyup(au_sbi(sb), dentry); -+ *bcpup = err; -+ if (unlikely(err < 0)) -+ goto out; -+ } -+ } else -+ AuDebugOn(bstart < *bcpup -+ || au_test_ro(sb, *bcpup, dentry->d_inode)); -+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart); -+ -+ if (*bcpup != bstart) { -+ err = au_cpup_dirs(dentry, *bcpup); -+ if (unlikely(err)) -+ goto out; -+ need_wh = 1; -+ } else { -+ struct au_dinfo *dinfo, *tmp; -+ -+ need_wh = -ENOMEM; -+ dinfo = au_di(dentry); -+ tmp = au_di_alloc(sb, AuLsc_DI_TMP); -+ if (tmp) { -+ au_di_cp(tmp, dinfo); -+ au_di_swap(tmp, dinfo); -+ /* returns the number of positive dentries */ -+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0); -+ au_di_swap(tmp, dinfo); -+ au_rw_write_unlock(&tmp->di_rwsem); -+ au_di_free(tmp); -+ } -+ } -+ AuDbg("need_wh %d\n", need_wh); -+ err = need_wh; -+ -+out: -+ return err; -+} -+ -+/* -+ * simple tests for the del-entry operations. -+ * following the checks in vfs, plus the parent-child relationship. -+ */ -+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir) -+{ -+ int err; -+ umode_t h_mode; -+ struct dentry *h_dentry, *h_latest; -+ struct inode *h_inode; -+ -+ h_dentry = au_h_dptr(dentry, bindex); -+ h_inode = h_dentry->d_inode; -+ if (dentry->d_inode) { -+ err = -ENOENT; -+ if (unlikely(!h_inode || !h_inode->i_nlink)) -+ goto out; -+ -+ h_mode = h_inode->i_mode; -+ if (!isdir) { -+ err = -EISDIR; -+ if (unlikely(S_ISDIR(h_mode))) -+ goto out; -+ } else if (unlikely(!S_ISDIR(h_mode))) { -+ err = -ENOTDIR; -+ goto out; -+ } -+ } else { -+ /* rename(2) case */ -+ err = -EIO; -+ if (unlikely(h_inode)) -+ goto out; -+ } -+ -+ err = -ENOENT; -+ /* expected parent dir is locked */ -+ if (unlikely(h_parent != h_dentry->d_parent)) -+ goto out; -+ err = 0; -+ -+ /* -+ * rmdir a dir may break the consistency on some filesystem. -+ * let's try heavy test. -+ */ -+ err = -EACCES; -+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1) -+ && au_test_h_perm(h_parent->d_inode, -+ MAY_EXEC | MAY_WRITE))) -+ goto out; -+ -+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent); -+ err = -EIO; -+ if (IS_ERR(h_latest)) -+ goto out; -+ if (h_latest == h_dentry) -+ err = 0; -+ dput(h_latest); -+ -+out: -+ return err; -+} -+ -+/* -+ * decide the branch where we operate for @dentry. the branch index will be set -+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent -+ * dir for reverting. -+ * when a new whiteout is necessary, create it. -+ */ -+static struct dentry* -+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup, -+ struct au_dtime *dt, struct au_pin *pin) -+{ -+ struct dentry *wh_dentry; -+ struct super_block *sb; -+ struct path h_path; -+ int err, need_wh; -+ unsigned int udba; -+ aufs_bindex_t bcpup; -+ -+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup); -+ wh_dentry = ERR_PTR(need_wh); -+ if (unlikely(need_wh < 0)) -+ goto out; -+ -+ sb = dentry->d_sb; -+ udba = au_opt_udba(sb); -+ bcpup = *rbcpup; -+ err = au_pin(pin, dentry, bcpup, udba, -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; -+ -+ h_path.dentry = au_pinned_h_parent(pin); -+ if (udba != AuOpt_UDBA_NONE -+ && au_dbstart(dentry) == bcpup) { -+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir); -+ wh_dentry = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out_unpin; -+ } -+ -+ h_path.mnt = au_sbr_mnt(sb, bcpup); -+ au_dtime_store(dt, au_pinned_parent(pin), &h_path); -+ wh_dentry = NULL; -+ if (!need_wh) -+ goto out; /* success, no need to create whiteout */ -+ -+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_unpin; -+ -+ /* returns with the parent is locked and wh_dentry is dget-ed */ -+ goto out; /* success */ -+ -+out_unpin: -+ au_unpin(pin); -+out: -+ return wh_dentry; -+} -+ -+/* -+ * when removing a dir, rename it to a unique temporary whiteout-ed name first -+ * in order to be revertible and save time for removing many child whiteouts -+ * under the dir. -+ * returns 1 when there are too many child whiteout and caller should remove -+ * them asynchronously. returns 0 when the number of children is enough small to -+ * remove now or the branch fs is a remote fs. -+ * otherwise return an error. -+ */ -+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex, -+ struct au_nhash *whlist, struct inode *dir) -+{ -+ int rmdir_later, err, dirwh; -+ struct dentry *h_dentry; -+ struct super_block *sb; -+ -+ sb = dentry->d_sb; -+ SiMustAnyLock(sb); -+ h_dentry = au_h_dptr(dentry, bindex); -+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex)); -+ if (unlikely(err)) -+ goto out; -+ -+ /* stop monitoring */ -+ au_hn_free(au_hi(dentry->d_inode, bindex)); -+ -+ if (!au_test_fs_remote(h_dentry->d_sb)) { -+ dirwh = au_sbi(sb)->si_dirwh; -+ rmdir_later = (dirwh <= 1); -+ if (!rmdir_later) -+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex, -+ dirwh); -+ if (rmdir_later) -+ return rmdir_later; -+ } -+ -+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist); -+ if (unlikely(err)) { -+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n", -+ h_dentry, bindex, err); -+ err = 0; -+ } -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* -+ * final procedure for deleting a entry. -+ * maintain dentry and iattr. -+ */ -+static void epilog(struct inode *dir, struct dentry *dentry, -+ aufs_bindex_t bindex) -+{ -+ struct inode *inode; -+ -+ inode = dentry->d_inode; -+ d_drop(dentry); -+ inode->i_ctime = dir->i_ctime; -+ -+ if (au_ibstart(dir) == bindex) -+ au_cpup_attr_timesizes(dir); -+ dir->i_version++; -+} -+ -+/* -+ * when an error happened, remove the created whiteout and revert everything. -+ */ -+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex, -+ aufs_bindex_t bwh, struct dentry *wh_dentry, -+ struct dentry *dentry, struct au_dtime *dt) -+{ -+ int rerr; -+ struct path h_path = { -+ .dentry = wh_dentry, -+ .mnt = au_sbr_mnt(dir->i_sb, bindex) -+ }; -+ -+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry); -+ if (!rerr) { -+ au_set_dbwh(dentry, bwh); -+ au_dtime_revert(dt); -+ return 0; -+ } -+ -+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr); -+ return -EIO; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int aufs_unlink(struct inode *dir, struct dentry *dentry) -+{ -+ int err; -+ aufs_bindex_t bwh, bindex, bstart; -+ struct inode *inode, *h_dir, *delegated; -+ struct dentry *parent, *wh_dentry; -+ /* to reuduce stack size */ -+ struct { -+ struct au_dtime dt; -+ struct au_pin pin; -+ struct path h_path; -+ } *a; -+ -+ IMustLock(dir); -+ -+ err = -ENOMEM; -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_free; -+ err = au_d_hashed_positive(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ err = -EISDIR; -+ if (unlikely(S_ISDIR(inode->i_mode))) -+ goto out_unlock; /* possible? */ -+ -+ bstart = au_dbstart(dentry); -+ bwh = au_dbwh(dentry); -+ bindex = -1; -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_write_lock_parent(parent); -+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt, -+ &a->pin); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; -+ -+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart); -+ a->h_path.dentry = au_h_dptr(dentry, bstart); -+ dget(a->h_path.dentry); -+ if (bindex == bstart) { -+ h_dir = au_pinned_h_dir(&a->pin); -+ delegated = NULL; -+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal unlink\n"); -+ iput(delegated); -+ } -+ } else { -+ /* dir inode is locked */ -+ h_dir = wh_dentry->d_parent->d_inode; -+ IMustLock(h_dir); -+ err = 0; -+ } -+ -+ if (!err) { -+ vfsub_drop_nlink(inode); -+ epilog(dir, dentry, bindex); -+ -+ /* update target timestamps */ -+ if (bindex == bstart) { -+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); -+ /*ignore*/ -+ inode->i_ctime = a->h_path.dentry->d_inode->i_ctime; -+ } else -+ /* todo: this timestamp may be reverted later */ -+ inode->i_ctime = h_dir->i_ctime; -+ goto out_unpin; /* success */ -+ } -+ -+ /* revert */ -+ if (wh_dentry) { -+ int rerr; -+ -+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, -+ &a->dt); -+ if (rerr) -+ err = rerr; -+ } -+ -+out_unpin: -+ au_unpin(&a->pin); -+ dput(wh_dentry); -+ dput(a->h_path.dentry); -+out_parent: -+ di_write_unlock(parent); -+out_unlock: -+ aufs_read_unlock(dentry, AuLock_DW); -+out_free: -+ kfree(a); -+out: -+ return err; -+} -+ -+int aufs_rmdir(struct inode *dir, struct dentry *dentry) -+{ -+ int err, rmdir_later; -+ aufs_bindex_t bwh, bindex, bstart; -+ struct inode *inode; -+ struct dentry *parent, *wh_dentry, *h_dentry; -+ struct au_whtmp_rmdir *args; -+ /* to reuduce stack size */ -+ struct { -+ struct au_dtime dt; -+ struct au_pin pin; -+ } *a; -+ -+ IMustLock(dir); -+ -+ err = -ENOMEM; -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN); -+ if (unlikely(err)) -+ goto out_free; -+ err = au_alive_dir(dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ inode = dentry->d_inode; -+ IMustLock(inode); -+ err = -ENOTDIR; -+ if (unlikely(!S_ISDIR(inode->i_mode))) -+ goto out_unlock; /* possible? */ -+ -+ err = -ENOMEM; -+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS); -+ if (unlikely(!args)) -+ goto out_unlock; -+ -+ parent = dentry->d_parent; /* dir inode is locked */ -+ di_write_lock_parent(parent); -+ err = au_test_empty(dentry, &args->whlist); -+ if (unlikely(err)) -+ goto out_parent; -+ -+ bstart = au_dbstart(dentry); -+ bwh = au_dbwh(dentry); -+ bindex = -1; -+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt, -+ &a->pin); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out_parent; -+ -+ h_dentry = au_h_dptr(dentry, bstart); -+ dget(h_dentry); -+ rmdir_later = 0; -+ if (bindex == bstart) { -+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir); -+ if (err > 0) { -+ rmdir_later = err; -+ err = 0; -+ } -+ } else { -+ /* stop monitoring */ -+ au_hn_free(au_hi(inode, bstart)); -+ -+ /* dir inode is locked */ -+ IMustLock(wh_dentry->d_parent->d_inode); -+ err = 0; -+ } -+ -+ if (!err) { -+ vfsub_dead_dir(inode); -+ au_set_dbdiropq(dentry, -1); -+ epilog(dir, dentry, bindex); -+ -+ if (rmdir_later) { -+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args); -+ args = NULL; -+ } -+ -+ goto out_unpin; /* success */ -+ } -+ -+ /* revert */ -+ AuLabel(revert); -+ if (wh_dentry) { -+ int rerr; -+ -+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry, -+ &a->dt); -+ if (rerr) -+ err = rerr; -+ } -+ -+out_unpin: -+ au_unpin(&a->pin); -+ dput(wh_dentry); -+ dput(h_dentry); -+out_parent: -+ di_write_unlock(parent); -+ if (args) -+ au_whtmp_rmdir_free(args); -+out_unlock: -+ aufs_read_unlock(dentry, AuLock_DW); -+out_free: -+ kfree(a); -+out: -+ AuTraceErr(err); -+ return err; -+} -diff --git fs/aufs/i_op_ren.c fs/aufs/i_op_ren.c -new file mode 100644 -index 0000000..0f94247 ---- /dev/null -+++ fs/aufs/i_op_ren.c -@@ -0,0 +1,1019 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * inode operation (rename entry) -+ * todo: this is crazy monster -+ */ -+ -+#include "aufs.h" -+ -+enum { AuSRC, AuDST, AuSrcDst }; -+enum { AuPARENT, AuCHILD, AuParentChild }; -+ -+#define AuRen_ISDIR 1 -+#define AuRen_ISSAMEDIR (1 << 1) -+#define AuRen_WHSRC (1 << 2) -+#define AuRen_WHDST (1 << 3) -+#define AuRen_MNT_WRITE (1 << 4) -+#define AuRen_DT_DSTDIR (1 << 5) -+#define AuRen_DIROPQ (1 << 6) -+#define AuRen_CPUP (1 << 7) -+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name) -+#define au_fset_ren(flags, name) \ -+ do { (flags) |= AuRen_##name; } while (0) -+#define au_fclr_ren(flags, name) \ -+ do { (flags) &= ~AuRen_##name; } while (0) -+ -+struct au_ren_args { -+ struct { -+ struct dentry *dentry, *h_dentry, *parent, *h_parent, -+ *wh_dentry; -+ struct inode *dir, *inode; -+ struct au_hinode *hdir; -+ struct au_dtime dt[AuParentChild]; -+ aufs_bindex_t bstart; -+ } sd[AuSrcDst]; -+ -+#define src_dentry sd[AuSRC].dentry -+#define src_dir sd[AuSRC].dir -+#define src_inode sd[AuSRC].inode -+#define src_h_dentry sd[AuSRC].h_dentry -+#define src_parent sd[AuSRC].parent -+#define src_h_parent sd[AuSRC].h_parent -+#define src_wh_dentry sd[AuSRC].wh_dentry -+#define src_hdir sd[AuSRC].hdir -+#define src_h_dir sd[AuSRC].hdir->hi_inode -+#define src_dt sd[AuSRC].dt -+#define src_bstart sd[AuSRC].bstart -+ -+#define dst_dentry sd[AuDST].dentry -+#define dst_dir sd[AuDST].dir -+#define dst_inode sd[AuDST].inode -+#define dst_h_dentry sd[AuDST].h_dentry -+#define dst_parent sd[AuDST].parent -+#define dst_h_parent sd[AuDST].h_parent -+#define dst_wh_dentry sd[AuDST].wh_dentry -+#define dst_hdir sd[AuDST].hdir -+#define dst_h_dir sd[AuDST].hdir->hi_inode -+#define dst_dt sd[AuDST].dt -+#define dst_bstart sd[AuDST].bstart -+ -+ struct dentry *h_trap; -+ struct au_branch *br; -+ struct au_hinode *src_hinode; -+ struct path h_path; -+ struct au_nhash whlist; -+ aufs_bindex_t btgt, src_bwh, src_bdiropq; -+ -+ unsigned int flags; -+ -+ struct au_whtmp_rmdir *thargs; -+ struct dentry *h_dst; -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * functions for reverting. -+ * when an error happened in a single rename systemcall, we should revert -+ * everything as if nothing happend. -+ * we don't need to revert the copied-up/down the parent dir since they are -+ * harmless. -+ */ -+ -+#define RevertFailure(fmt, ...) do { \ -+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \ -+ ##__VA_ARGS__, err, rerr); \ -+ err = -EIO; \ -+} while (0) -+ -+static void au_ren_rev_diropq(int err, struct au_ren_args *a) -+{ -+ int rerr; -+ -+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); -+ rerr = au_diropq_remove(a->src_dentry, a->btgt); -+ au_hn_imtx_unlock(a->src_hinode); -+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq); -+ if (rerr) -+ RevertFailure("remove diropq %pd", a->src_dentry); -+} -+ -+static void au_ren_rev_rename(int err, struct au_ren_args *a) -+{ -+ int rerr; -+ struct inode *delegated; -+ -+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name, -+ a->src_h_parent); -+ rerr = PTR_ERR(a->h_path.dentry); -+ if (IS_ERR(a->h_path.dentry)) { -+ RevertFailure("lkup one %pd", a->src_dentry); -+ return; -+ } -+ -+ delegated = NULL; -+ rerr = vfsub_rename(a->dst_h_dir, -+ au_h_dptr(a->src_dentry, a->btgt), -+ a->src_h_dir, &a->h_path, &delegated); -+ if (unlikely(rerr == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal rename\n"); -+ iput(delegated); -+ } -+ d_drop(a->h_path.dentry); -+ dput(a->h_path.dentry); -+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */ -+ if (rerr) -+ RevertFailure("rename %pd", a->src_dentry); -+} -+ -+static void au_ren_rev_cpup(int err, struct au_ren_args *a) -+{ -+ int rerr; -+ -+ a->h_path.dentry = a->dst_h_dentry; -+ /* no delegation since it is just created */ -+ rerr = vfsub_unlink(a->dst_h_dir, &a->h_path, /*delegated*/NULL, -+ /*force*/0); -+ au_set_h_dptr(a->src_dentry, a->btgt, NULL); -+ au_set_dbstart(a->src_dentry, a->src_bstart); -+ if (rerr) -+ RevertFailure("unlink %pd", a->dst_h_dentry); -+} -+ -+static void au_ren_rev_whtmp(int err, struct au_ren_args *a) -+{ -+ int rerr; -+ struct inode *delegated; -+ -+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name, -+ a->dst_h_parent); -+ rerr = PTR_ERR(a->h_path.dentry); -+ if (IS_ERR(a->h_path.dentry)) { -+ RevertFailure("lkup one %pd", a->dst_dentry); -+ return; -+ } -+ if (a->h_path.dentry->d_inode) { -+ d_drop(a->h_path.dentry); -+ dput(a->h_path.dentry); -+ return; -+ } -+ -+ delegated = NULL; -+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path, -+ &delegated); -+ if (unlikely(rerr == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal rename\n"); -+ iput(delegated); -+ } -+ d_drop(a->h_path.dentry); -+ dput(a->h_path.dentry); -+ if (!rerr) -+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst)); -+ else -+ RevertFailure("rename %pd", a->h_dst); -+} -+ -+static void au_ren_rev_whsrc(int err, struct au_ren_args *a) -+{ -+ int rerr; -+ -+ a->h_path.dentry = a->src_wh_dentry; -+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry); -+ au_set_dbwh(a->src_dentry, a->src_bwh); -+ if (rerr) -+ RevertFailure("unlink %pd", a->src_wh_dentry); -+} -+#undef RevertFailure -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * when we have to copyup the renaming entry, do it with the rename-target name -+ * in order to minimize the cost (the later actual rename is unnecessary). -+ * otherwise rename it on the target branch. -+ */ -+static int au_ren_or_cpup(struct au_ren_args *a) -+{ -+ int err; -+ struct dentry *d; -+ struct inode *delegated; -+ -+ d = a->src_dentry; -+ if (au_dbstart(d) == a->btgt) { -+ a->h_path.dentry = a->dst_h_dentry; -+ if (au_ftest_ren(a->flags, DIROPQ) -+ && au_dbdiropq(d) == a->btgt) -+ au_fclr_ren(a->flags, DIROPQ); -+ AuDebugOn(au_dbstart(d) != a->btgt); -+ delegated = NULL; -+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt), -+ a->dst_h_dir, &a->h_path, &delegated); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal rename\n"); -+ iput(delegated); -+ } -+ } else -+ BUG(); -+ -+ if (!err && a->h_dst) -+ /* it will be set to dinfo later */ -+ dget(a->h_dst); -+ -+ return err; -+} -+ -+/* cf. aufs_rmdir() */ -+static int au_ren_del_whtmp(struct au_ren_args *a) -+{ -+ int err; -+ struct inode *dir; -+ -+ dir = a->dst_dir; -+ SiMustAnyLock(dir->i_sb); -+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt, -+ au_sbi(dir->i_sb)->si_dirwh) -+ || au_test_fs_remote(a->h_dst->d_sb)) { -+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist); -+ if (unlikely(err)) -+ pr_warn("failed removing whtmp dir %pd (%d), " -+ "ignored.\n", a->h_dst, err); -+ } else { -+ au_nhash_wh_free(&a->thargs->whlist); -+ a->thargs->whlist = a->whlist; -+ a->whlist.nh_num = 0; -+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs); -+ dput(a->h_dst); -+ a->thargs = NULL; -+ } -+ -+ return 0; -+} -+ -+/* make it 'opaque' dir. */ -+static int au_ren_diropq(struct au_ren_args *a) -+{ -+ int err; -+ struct dentry *diropq; -+ -+ err = 0; -+ a->src_bdiropq = au_dbdiropq(a->src_dentry); -+ a->src_hinode = au_hi(a->src_inode, a->btgt); -+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD); -+ diropq = au_diropq_create(a->src_dentry, a->btgt); -+ au_hn_imtx_unlock(a->src_hinode); -+ if (IS_ERR(diropq)) -+ err = PTR_ERR(diropq); -+ dput(diropq); -+ -+ return err; -+} -+ -+static int do_rename(struct au_ren_args *a) -+{ -+ int err; -+ struct dentry *d, *h_d; -+ -+ /* prepare workqueue args for asynchronous rmdir */ -+ h_d = a->dst_h_dentry; -+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) { -+ err = -ENOMEM; -+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS); -+ if (unlikely(!a->thargs)) -+ goto out; -+ a->h_dst = dget(h_d); -+ } -+ -+ /* create whiteout for src_dentry */ -+ if (au_ftest_ren(a->flags, WHSRC)) { -+ a->src_bwh = au_dbwh(a->src_dentry); -+ AuDebugOn(a->src_bwh >= 0); -+ a->src_wh_dentry -+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent); -+ err = PTR_ERR(a->src_wh_dentry); -+ if (IS_ERR(a->src_wh_dentry)) -+ goto out_thargs; -+ } -+ -+ /* lookup whiteout for dentry */ -+ if (au_ftest_ren(a->flags, WHDST)) { -+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name, -+ a->br); -+ err = PTR_ERR(h_d); -+ if (IS_ERR(h_d)) -+ goto out_whsrc; -+ if (!h_d->d_inode) -+ dput(h_d); -+ else -+ a->dst_wh_dentry = h_d; -+ } -+ -+ /* rename dentry to tmpwh */ -+ if (a->thargs) { -+ err = au_whtmp_ren(a->dst_h_dentry, a->br); -+ if (unlikely(err)) -+ goto out_whdst; -+ -+ d = a->dst_dentry; -+ au_set_h_dptr(d, a->btgt, NULL); -+ err = au_lkup_neg(d, a->btgt, /*wh*/0); -+ if (unlikely(err)) -+ goto out_whtmp; -+ a->dst_h_dentry = au_h_dptr(d, a->btgt); -+ } -+ -+ BUG_ON(a->dst_h_dentry->d_inode && a->src_bstart != a->btgt); -+ -+ /* rename by vfs_rename or cpup */ -+ d = a->dst_dentry; -+ if (au_ftest_ren(a->flags, ISDIR) -+ && (a->dst_wh_dentry -+ || au_dbdiropq(d) == a->btgt -+ /* hide the lower to keep xino */ -+ || a->btgt < au_dbend(d) -+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ))) -+ au_fset_ren(a->flags, DIROPQ); -+ err = au_ren_or_cpup(a); -+ if (unlikely(err)) -+ /* leave the copied-up one */ -+ goto out_whtmp; -+ -+ /* make dir opaque */ -+ if (au_ftest_ren(a->flags, DIROPQ)) { -+ err = au_ren_diropq(a); -+ if (unlikely(err)) -+ goto out_rename; -+ } -+ -+ /* update target timestamps */ -+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt); -+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt); -+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/ -+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime; -+ -+ /* remove whiteout for dentry */ -+ if (a->dst_wh_dentry) { -+ a->h_path.dentry = a->dst_wh_dentry; -+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path, -+ a->dst_dentry); -+ if (unlikely(err)) -+ goto out_diropq; -+ } -+ -+ /* remove whtmp */ -+ if (a->thargs) -+ au_ren_del_whtmp(a); /* ignore this error */ -+ -+ err = 0; -+ goto out_success; -+ -+out_diropq: -+ if (au_ftest_ren(a->flags, DIROPQ)) -+ au_ren_rev_diropq(err, a); -+out_rename: -+ if (!au_ftest_ren(a->flags, CPUP)) -+ au_ren_rev_rename(err, a); -+ else -+ au_ren_rev_cpup(err, a); -+ dput(a->h_dst); -+out_whtmp: -+ if (a->thargs) -+ au_ren_rev_whtmp(err, a); -+out_whdst: -+ dput(a->dst_wh_dentry); -+ a->dst_wh_dentry = NULL; -+out_whsrc: -+ if (a->src_wh_dentry) -+ au_ren_rev_whsrc(err, a); -+out_success: -+ dput(a->src_wh_dentry); -+ dput(a->dst_wh_dentry); -+out_thargs: -+ if (a->thargs) { -+ dput(a->h_dst); -+ au_whtmp_rmdir_free(a->thargs); -+ a->thargs = NULL; -+ } -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * test if @dentry dir can be rename destination or not. -+ * success means, it is a logically empty dir. -+ */ -+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist) -+{ -+ return au_test_empty(dentry, whlist); -+} -+ -+/* -+ * test if @dentry dir can be rename source or not. -+ * if it can, return 0 and @children is filled. -+ * success means, -+ * - it is a logically empty dir. -+ * - or, it exists on writable branch and has no children including whiteouts -+ * on the lower branch. -+ */ -+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt) -+{ -+ int err; -+ unsigned int rdhash; -+ aufs_bindex_t bstart; -+ -+ bstart = au_dbstart(dentry); -+ if (bstart != btgt) { -+ struct au_nhash whlist; -+ -+ SiMustAnyLock(dentry->d_sb); -+ rdhash = au_sbi(dentry->d_sb)->si_rdhash; -+ if (!rdhash) -+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, -+ dentry)); -+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_test_empty(dentry, &whlist); -+ au_nhash_wh_free(&whlist); -+ goto out; -+ } -+ -+ if (bstart == au_dbtaildir(dentry)) -+ return 0; /* success */ -+ -+ err = au_test_empty_lower(dentry); -+ -+out: -+ if (err == -ENOTEMPTY) { -+ AuWarn1("renaming dir who has child(ren) on multiple branches," -+ " is not supported\n"); -+ err = -EXDEV; -+ } -+ return err; -+} -+ -+/* side effect: sets whlist and h_dentry */ -+static int au_ren_may_dir(struct au_ren_args *a) -+{ -+ int err; -+ unsigned int rdhash; -+ struct dentry *d; -+ -+ d = a->dst_dentry; -+ SiMustAnyLock(d->d_sb); -+ -+ err = 0; -+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) { -+ rdhash = au_sbi(d->d_sb)->si_rdhash; -+ if (!rdhash) -+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d)); -+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ -+ au_set_dbstart(d, a->dst_bstart); -+ err = may_rename_dstdir(d, &a->whlist); -+ au_set_dbstart(d, a->btgt); -+ } -+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d)); -+ if (unlikely(err)) -+ goto out; -+ -+ d = a->src_dentry; -+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d)); -+ if (au_ftest_ren(a->flags, ISDIR)) { -+ err = may_rename_srcdir(d, a->btgt); -+ if (unlikely(err)) { -+ au_nhash_wh_free(&a->whlist); -+ a->whlist.nh_num = 0; -+ } -+ } -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * simple tests for rename. -+ * following the checks in vfs, plus the parent-child relationship. -+ */ -+static int au_may_ren(struct au_ren_args *a) -+{ -+ int err, isdir; -+ struct inode *h_inode; -+ -+ if (a->src_bstart == a->btgt) { -+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent, -+ au_ftest_ren(a->flags, ISDIR)); -+ if (unlikely(err)) -+ goto out; -+ err = -EINVAL; -+ if (unlikely(a->src_h_dentry == a->h_trap)) -+ goto out; -+ } -+ -+ err = 0; -+ if (a->dst_bstart != a->btgt) -+ goto out; -+ -+ err = -ENOTEMPTY; -+ if (unlikely(a->dst_h_dentry == a->h_trap)) -+ goto out; -+ -+ err = -EIO; -+ h_inode = a->dst_h_dentry->d_inode; -+ isdir = !!au_ftest_ren(a->flags, ISDIR); -+ if (!a->dst_dentry->d_inode) { -+ if (unlikely(h_inode)) -+ goto out; -+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent, -+ isdir); -+ } else { -+ if (unlikely(!h_inode || !h_inode->i_nlink)) -+ goto out; -+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent, -+ isdir); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+out: -+ if (unlikely(err == -ENOENT || err == -EEXIST)) -+ err = -EIO; -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * locking order -+ * (VFS) -+ * - src_dir and dir by lock_rename() -+ * - inode if exitsts -+ * (aufs) -+ * - lock all -+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls, -+ * + si_read_lock -+ * + di_write_lock2_child() -+ * + di_write_lock_child() -+ * + ii_write_lock_child() -+ * + di_write_lock_child2() -+ * + ii_write_lock_child2() -+ * + src_parent and parent -+ * + di_write_lock_parent() -+ * + ii_write_lock_parent() -+ * + di_write_lock_parent2() -+ * + ii_write_lock_parent2() -+ * + lower src_dir and dir by vfsub_lock_rename() -+ * + verify the every relationships between child and parent. if any -+ * of them failed, unlock all and return -EBUSY. -+ */ -+static void au_ren_unlock(struct au_ren_args *a) -+{ -+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir, -+ a->dst_h_parent, a->dst_hdir); -+ if (au_ftest_ren(a->flags, MNT_WRITE)) -+ vfsub_mnt_drop_write(au_br_mnt(a->br)); -+} -+ -+static int au_ren_lock(struct au_ren_args *a) -+{ -+ int err; -+ unsigned int udba; -+ -+ err = 0; -+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt); -+ a->src_hdir = au_hi(a->src_dir, a->btgt); -+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt); -+ a->dst_hdir = au_hi(a->dst_dir, a->btgt); -+ -+ err = vfsub_mnt_want_write(au_br_mnt(a->br)); -+ if (unlikely(err)) -+ goto out; -+ au_fset_ren(a->flags, MNT_WRITE); -+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir, -+ a->dst_h_parent, a->dst_hdir); -+ udba = au_opt_udba(a->src_dentry->d_sb); -+ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode -+ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode)) -+ err = au_busy_or_stale(); -+ if (!err && au_dbstart(a->src_dentry) == a->btgt) -+ err = au_h_verify(a->src_h_dentry, udba, -+ a->src_h_parent->d_inode, a->src_h_parent, -+ a->br); -+ if (!err && au_dbstart(a->dst_dentry) == a->btgt) -+ err = au_h_verify(a->dst_h_dentry, udba, -+ a->dst_h_parent->d_inode, a->dst_h_parent, -+ a->br); -+ if (!err) -+ goto out; /* success */ -+ -+ err = au_busy_or_stale(); -+ au_ren_unlock(a); -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void au_ren_refresh_dir(struct au_ren_args *a) -+{ -+ struct inode *dir; -+ -+ dir = a->dst_dir; -+ dir->i_version++; -+ if (au_ftest_ren(a->flags, ISDIR)) { -+ /* is this updating defined in POSIX? */ -+ au_cpup_attr_timesizes(a->src_inode); -+ au_cpup_attr_nlink(dir, /*force*/1); -+ } -+ -+ if (au_ibstart(dir) == a->btgt) -+ au_cpup_attr_timesizes(dir); -+ -+ if (au_ftest_ren(a->flags, ISSAMEDIR)) -+ return; -+ -+ dir = a->src_dir; -+ dir->i_version++; -+ if (au_ftest_ren(a->flags, ISDIR)) -+ au_cpup_attr_nlink(dir, /*force*/1); -+ if (au_ibstart(dir) == a->btgt) -+ au_cpup_attr_timesizes(dir); -+} -+ -+static void au_ren_refresh(struct au_ren_args *a) -+{ -+ aufs_bindex_t bend, bindex; -+ struct dentry *d, *h_d; -+ struct inode *i, *h_i; -+ struct super_block *sb; -+ -+ d = a->dst_dentry; -+ d_drop(d); -+ if (a->h_dst) -+ /* already dget-ed by au_ren_or_cpup() */ -+ au_set_h_dptr(d, a->btgt, a->h_dst); -+ -+ i = a->dst_inode; -+ if (i) { -+ if (!au_ftest_ren(a->flags, ISDIR)) -+ vfsub_drop_nlink(i); -+ else { -+ vfsub_dead_dir(i); -+ au_cpup_attr_timesizes(i); -+ } -+ au_update_dbrange(d, /*do_put_zero*/1); -+ } else { -+ bend = a->btgt; -+ for (bindex = au_dbstart(d); bindex < bend; bindex++) -+ au_set_h_dptr(d, bindex, NULL); -+ bend = au_dbend(d); -+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) -+ au_set_h_dptr(d, bindex, NULL); -+ au_update_dbrange(d, /*do_put_zero*/0); -+ } -+ -+ d = a->src_dentry; -+ au_set_dbwh(d, -1); -+ bend = au_dbend(d); -+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { -+ h_d = au_h_dptr(d, bindex); -+ if (h_d) -+ au_set_h_dptr(d, bindex, NULL); -+ } -+ au_set_dbend(d, a->btgt); -+ -+ sb = d->d_sb; -+ i = a->src_inode; -+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i)) -+ return; /* success */ -+ -+ bend = au_ibend(i); -+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) { -+ h_i = au_h_iptr(i, bindex); -+ if (h_i) { -+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0); -+ /* ignore this error */ -+ au_set_h_iptr(i, bindex, NULL, 0); -+ } -+ } -+ au_set_ibend(i, a->btgt); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* mainly for link(2) and rename(2) */ -+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt) -+{ -+ aufs_bindex_t bdiropq, bwh; -+ struct dentry *parent; -+ struct au_branch *br; -+ -+ parent = dentry->d_parent; -+ IMustLock(parent->d_inode); /* dir is locked */ -+ -+ bdiropq = au_dbdiropq(parent); -+ bwh = au_dbwh(dentry); -+ br = au_sbr(dentry->d_sb, btgt); -+ if (au_br_rdonly(br) -+ || (0 <= bdiropq && bdiropq < btgt) -+ || (0 <= bwh && bwh < btgt)) -+ btgt = -1; -+ -+ AuDbg("btgt %d\n", btgt); -+ return btgt; -+} -+ -+/* sets src_bstart, dst_bstart and btgt */ -+static int au_ren_wbr(struct au_ren_args *a) -+{ -+ int err; -+ struct au_wr_dir_args wr_dir_args = { -+ /* .force_btgt = -1, */ -+ .flags = AuWrDir_ADD_ENTRY -+ }; -+ -+ a->src_bstart = au_dbstart(a->src_dentry); -+ a->dst_bstart = au_dbstart(a->dst_dentry); -+ if (au_ftest_ren(a->flags, ISDIR)) -+ au_fset_wrdir(wr_dir_args.flags, ISDIR); -+ wr_dir_args.force_btgt = a->src_bstart; -+ if (a->dst_inode && a->dst_bstart < a->src_bstart) -+ wr_dir_args.force_btgt = a->dst_bstart; -+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt); -+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args); -+ a->btgt = err; -+ -+ return err; -+} -+ -+static void au_ren_dt(struct au_ren_args *a) -+{ -+ a->h_path.dentry = a->src_h_parent; -+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path); -+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) { -+ a->h_path.dentry = a->dst_h_parent; -+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path); -+ } -+ -+ au_fclr_ren(a->flags, DT_DSTDIR); -+ if (!au_ftest_ren(a->flags, ISDIR)) -+ return; -+ -+ a->h_path.dentry = a->src_h_dentry; -+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path); -+ if (a->dst_h_dentry->d_inode) { -+ au_fset_ren(a->flags, DT_DSTDIR); -+ a->h_path.dentry = a->dst_h_dentry; -+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path); -+ } -+} -+ -+static void au_ren_rev_dt(int err, struct au_ren_args *a) -+{ -+ struct dentry *h_d; -+ struct mutex *h_mtx; -+ -+ au_dtime_revert(a->src_dt + AuPARENT); -+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) -+ au_dtime_revert(a->dst_dt + AuPARENT); -+ -+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) { -+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry; -+ h_mtx = &h_d->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ au_dtime_revert(a->src_dt + AuCHILD); -+ mutex_unlock(h_mtx); -+ -+ if (au_ftest_ren(a->flags, DT_DSTDIR)) { -+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry; -+ h_mtx = &h_d->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD); -+ au_dtime_revert(a->dst_dt + AuCHILD); -+ mutex_unlock(h_mtx); -+ } -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry, -+ struct inode *_dst_dir, struct dentry *_dst_dentry) -+{ -+ int err, flags; -+ /* reduce stack space */ -+ struct au_ren_args *a; -+ -+ AuDbg("%pd, %pd\n", _src_dentry, _dst_dentry); -+ IMustLock(_src_dir); -+ IMustLock(_dst_dir); -+ -+ err = -ENOMEM; -+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE); -+ a = kzalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ a->src_dir = _src_dir; -+ a->src_dentry = _src_dentry; -+ a->src_inode = a->src_dentry->d_inode; -+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */ -+ a->dst_dir = _dst_dir; -+ a->dst_dentry = _dst_dentry; -+ a->dst_inode = a->dst_dentry->d_inode; -+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */ -+ if (a->dst_inode) { -+ IMustLock(a->dst_inode); -+ au_igrab(a->dst_inode); -+ } -+ -+ err = -ENOTDIR; -+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN; -+ if (S_ISDIR(a->src_inode->i_mode)) { -+ au_fset_ren(a->flags, ISDIR); -+ if (unlikely(a->dst_inode && !S_ISDIR(a->dst_inode->i_mode))) -+ goto out_free; -+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, -+ AuLock_DIR | flags); -+ } else -+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, -+ flags); -+ if (unlikely(err)) -+ goto out_free; -+ -+ err = au_d_hashed_positive(a->src_dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ err = -ENOENT; -+ if (a->dst_inode) { -+ /* -+ * If it is a dir, VFS unhash dst_dentry before this -+ * function. It means we cannot rely upon d_unhashed(). -+ */ -+ if (unlikely(!a->dst_inode->i_nlink)) -+ goto out_unlock; -+ if (!S_ISDIR(a->dst_inode->i_mode)) { -+ err = au_d_hashed_positive(a->dst_dentry); -+ if (unlikely(err)) -+ goto out_unlock; -+ } else if (unlikely(IS_DEADDIR(a->dst_inode))) -+ goto out_unlock; -+ } else if (unlikely(d_unhashed(a->dst_dentry))) -+ goto out_unlock; -+ -+ /* -+ * is it possible? -+ * yes, it happend (in linux-3.3-rcN) but I don't know why. -+ * there may exist a problem somewhere else. -+ */ -+ err = -EINVAL; -+ if (unlikely(a->dst_parent->d_inode == a->src_dentry->d_inode)) -+ goto out_unlock; -+ -+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */ -+ di_write_lock_parent(a->dst_parent); -+ -+ /* which branch we process */ -+ err = au_ren_wbr(a); -+ if (unlikely(err < 0)) -+ goto out_parent; -+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt); -+ a->h_path.mnt = au_br_mnt(a->br); -+ -+ /* are they available to be renamed */ -+ err = au_ren_may_dir(a); -+ if (unlikely(err)) -+ goto out_children; -+ -+ /* prepare the writable parent dir on the same branch */ -+ if (a->dst_bstart == a->btgt) { -+ au_fset_ren(a->flags, WHDST); -+ } else { -+ err = au_cpup_dirs(a->dst_dentry, a->btgt); -+ if (unlikely(err)) -+ goto out_children; -+ } -+ -+ if (a->src_dir != a->dst_dir) { -+ /* -+ * this temporary unlock is safe, -+ * because both dir->i_mutex are locked. -+ */ -+ di_write_unlock(a->dst_parent); -+ di_write_lock_parent(a->src_parent); -+ err = au_wr_dir_need_wh(a->src_dentry, -+ au_ftest_ren(a->flags, ISDIR), -+ &a->btgt); -+ di_write_unlock(a->src_parent); -+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1); -+ au_fclr_ren(a->flags, ISSAMEDIR); -+ } else -+ err = au_wr_dir_need_wh(a->src_dentry, -+ au_ftest_ren(a->flags, ISDIR), -+ &a->btgt); -+ if (unlikely(err < 0)) -+ goto out_children; -+ if (err) -+ au_fset_ren(a->flags, WHSRC); -+ -+ /* cpup src */ -+ if (a->src_bstart != a->btgt) { -+ struct au_pin pin; -+ -+ err = au_pin(&pin, a->src_dentry, a->btgt, -+ au_opt_udba(a->src_dentry->d_sb), -+ AuPin_DI_LOCKED | AuPin_MNT_WRITE); -+ if (!err) { -+ struct au_cp_generic cpg = { -+ .dentry = a->src_dentry, -+ .bdst = a->btgt, -+ .bsrc = a->src_bstart, -+ .len = -1, -+ .pin = &pin, -+ .flags = AuCpup_DTIME | AuCpup_HOPEN -+ }; -+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart); -+ err = au_sio_cpup_simple(&cpg); -+ au_unpin(&pin); -+ } -+ if (unlikely(err)) -+ goto out_children; -+ a->src_bstart = a->btgt; -+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt); -+ au_fset_ren(a->flags, WHSRC); -+ } -+ -+ /* lock them all */ -+ err = au_ren_lock(a); -+ if (unlikely(err)) -+ /* leave the copied-up one */ -+ goto out_children; -+ -+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE)) -+ err = au_may_ren(a); -+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN)) -+ err = -ENAMETOOLONG; -+ if (unlikely(err)) -+ goto out_hdir; -+ -+ /* store timestamps to be revertible */ -+ au_ren_dt(a); -+ -+ /* here we go */ -+ err = do_rename(a); -+ if (unlikely(err)) -+ goto out_dt; -+ -+ /* update dir attributes */ -+ au_ren_refresh_dir(a); -+ -+ /* dput/iput all lower dentries */ -+ au_ren_refresh(a); -+ -+ goto out_hdir; /* success */ -+ -+out_dt: -+ au_ren_rev_dt(err, a); -+out_hdir: -+ au_ren_unlock(a); -+out_children: -+ au_nhash_wh_free(&a->whlist); -+ if (err && a->dst_inode && a->dst_bstart != a->btgt) { -+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt); -+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL); -+ au_set_dbstart(a->dst_dentry, a->dst_bstart); -+ } -+out_parent: -+ if (!err) -+ d_move(a->src_dentry, a->dst_dentry); -+ else { -+ au_update_dbstart(a->dst_dentry); -+ if (!a->dst_inode) -+ d_drop(a->dst_dentry); -+ } -+ if (au_ftest_ren(a->flags, ISSAMEDIR)) -+ di_write_unlock(a->dst_parent); -+ else -+ di_write_unlock2(a->src_parent, a->dst_parent); -+out_unlock: -+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry); -+out_free: -+ iput(a->dst_inode); -+ if (a->thargs) -+ au_whtmp_rmdir_free(a->thargs); -+ kfree(a); -+out: -+ AuTraceErr(err); -+ return err; -+} -diff --git fs/aufs/iinfo.c fs/aufs/iinfo.c -new file mode 100644 -index 0000000..da9ba0b ---- /dev/null -+++ fs/aufs/iinfo.c -@@ -0,0 +1,264 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * inode private data -+ */ -+ -+#include "aufs.h" -+ -+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex) -+{ -+ struct inode *h_inode; -+ -+ IiMustAnyLock(inode); -+ -+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode; -+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); -+ return h_inode; -+} -+ -+/* todo: hard/soft set? */ -+void au_hiput(struct au_hinode *hinode) -+{ -+ au_hn_free(hinode); -+ dput(hinode->hi_whdentry); -+ iput(hinode->hi_inode); -+} -+ -+unsigned int au_hi_flags(struct inode *inode, int isdir) -+{ -+ unsigned int flags; -+ const unsigned int mnt_flags = au_mntflags(inode->i_sb); -+ -+ flags = 0; -+ if (au_opt_test(mnt_flags, XINO)) -+ au_fset_hi(flags, XINO); -+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY)) -+ au_fset_hi(flags, HNOTIFY); -+ return flags; -+} -+ -+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode, unsigned int flags) -+{ -+ struct au_hinode *hinode; -+ struct inode *hi; -+ struct au_iinfo *iinfo = au_ii(inode); -+ -+ IiMustWriteLock(inode); -+ -+ hinode = iinfo->ii_hinode + bindex; -+ hi = hinode->hi_inode; -+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0); -+ -+ if (hi) -+ au_hiput(hinode); -+ hinode->hi_inode = h_inode; -+ if (h_inode) { -+ int err; -+ struct super_block *sb = inode->i_sb; -+ struct au_branch *br; -+ -+ AuDebugOn(inode->i_mode -+ && (h_inode->i_mode & S_IFMT) -+ != (inode->i_mode & S_IFMT)); -+ if (bindex == iinfo->ii_bstart) -+ au_cpup_igen(inode, h_inode); -+ br = au_sbr(sb, bindex); -+ hinode->hi_id = br->br_id; -+ if (au_ftest_hi(flags, XINO)) { -+ err = au_xino_write(sb, bindex, h_inode->i_ino, -+ inode->i_ino); -+ if (unlikely(err)) -+ AuIOErr1("failed au_xino_write() %d\n", err); -+ } -+ -+ if (au_ftest_hi(flags, HNOTIFY) -+ && au_br_hnotifyable(br->br_perm)) { -+ err = au_hn_alloc(hinode, inode); -+ if (unlikely(err)) -+ AuIOErr1("au_hn_alloc() %d\n", err); -+ } -+ } -+} -+ -+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_wh) -+{ -+ struct au_hinode *hinode; -+ -+ IiMustWriteLock(inode); -+ -+ hinode = au_ii(inode)->ii_hinode + bindex; -+ AuDebugOn(hinode->hi_whdentry); -+ hinode->hi_whdentry = h_wh; -+} -+ -+void au_update_iigen(struct inode *inode, int half) -+{ -+ struct au_iinfo *iinfo; -+ struct au_iigen *iigen; -+ unsigned int sigen; -+ -+ sigen = au_sigen(inode->i_sb); -+ iinfo = au_ii(inode); -+ iigen = &iinfo->ii_generation; -+ spin_lock(&iinfo->ii_genspin); -+ iigen->ig_generation = sigen; -+ if (half) -+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED); -+ else -+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED); -+ spin_unlock(&iinfo->ii_genspin); -+} -+ -+/* it may be called at remount time, too */ -+void au_update_ibrange(struct inode *inode, int do_put_zero) -+{ -+ struct au_iinfo *iinfo; -+ aufs_bindex_t bindex, bend; -+ -+ iinfo = au_ii(inode); -+ if (!iinfo) -+ return; -+ -+ IiMustWriteLock(inode); -+ -+ if (do_put_zero && iinfo->ii_bstart >= 0) { -+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; -+ bindex++) { -+ struct inode *h_i; -+ -+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode; -+ if (h_i -+ && !h_i->i_nlink -+ && !(h_i->i_state & I_LINKABLE)) -+ au_set_h_iptr(inode, bindex, NULL, 0); -+ } -+ } -+ -+ iinfo->ii_bstart = -1; -+ iinfo->ii_bend = -1; -+ bend = au_sbend(inode->i_sb); -+ for (bindex = 0; bindex <= bend; bindex++) -+ if (iinfo->ii_hinode[0 + bindex].hi_inode) { -+ iinfo->ii_bstart = bindex; -+ break; -+ } -+ if (iinfo->ii_bstart >= 0) -+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--) -+ if (iinfo->ii_hinode[0 + bindex].hi_inode) { -+ iinfo->ii_bend = bindex; -+ break; -+ } -+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_icntnr_init_once(void *_c) -+{ -+ struct au_icntnr *c = _c; -+ struct au_iinfo *iinfo = &c->iinfo; -+ static struct lock_class_key aufs_ii; -+ -+ spin_lock_init(&iinfo->ii_genspin); -+ au_rw_init(&iinfo->ii_rwsem); -+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii); -+ inode_init_once(&c->vfs_inode); -+} -+ -+int au_iinfo_init(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; -+ struct super_block *sb; -+ int nbr, i; -+ -+ sb = inode->i_sb; -+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); -+ nbr = au_sbend(sb) + 1; -+ if (unlikely(nbr <= 0)) -+ nbr = 1; -+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS); -+ if (iinfo->ii_hinode) { -+ au_ninodes_inc(sb); -+ for (i = 0; i < nbr; i++) -+ iinfo->ii_hinode[i].hi_id = -1; -+ -+ iinfo->ii_generation.ig_generation = au_sigen(sb); -+ iinfo->ii_bstart = -1; -+ iinfo->ii_bend = -1; -+ iinfo->ii_vdir = NULL; -+ return 0; -+ } -+ return -ENOMEM; -+} -+ -+int au_ii_realloc(struct au_iinfo *iinfo, int nbr) -+{ -+ int err, sz; -+ struct au_hinode *hip; -+ -+ AuRwMustWriteLock(&iinfo->ii_rwsem); -+ -+ err = -ENOMEM; -+ sz = sizeof(*hip) * (iinfo->ii_bend + 1); -+ if (!sz) -+ sz = sizeof(*hip); -+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS); -+ if (hip) { -+ iinfo->ii_hinode = hip; -+ err = 0; -+ } -+ -+ return err; -+} -+ -+void au_iinfo_fin(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; -+ struct au_hinode *hi; -+ struct super_block *sb; -+ aufs_bindex_t bindex, bend; -+ const unsigned char unlinked = !inode->i_nlink; -+ -+ iinfo = au_ii(inode); -+ /* bad_inode case */ -+ if (!iinfo) -+ return; -+ -+ sb = inode->i_sb; -+ au_ninodes_dec(sb); -+ if (si_pid_test(sb)) -+ au_xino_delete_inode(inode, unlinked); -+ else { -+ /* -+ * it is safe to hide the dependency between sbinfo and -+ * sb->s_umount. -+ */ -+ lockdep_off(); -+ si_noflush_read_lock(sb); -+ au_xino_delete_inode(inode, unlinked); -+ si_read_unlock(sb); -+ lockdep_on(); -+ } -+ -+ if (iinfo->ii_vdir) -+ au_vdir_free(iinfo->ii_vdir); -+ -+ bindex = iinfo->ii_bstart; -+ if (bindex >= 0) { -+ hi = iinfo->ii_hinode + bindex; -+ bend = iinfo->ii_bend; -+ while (bindex++ <= bend) { -+ if (hi->hi_inode) -+ au_hiput(hi); -+ hi++; -+ } -+ } -+ kfree(iinfo->ii_hinode); -+ iinfo->ii_hinode = NULL; -+ AuRwDestroy(&iinfo->ii_rwsem); -+} -diff --git fs/aufs/inode.c fs/aufs/inode.c -new file mode 100644 -index 0000000..d28d020 ---- /dev/null -+++ fs/aufs/inode.c -@@ -0,0 +1,479 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * inode functions -+ */ -+ -+#include "aufs.h" -+ -+struct inode *au_igrab(struct inode *inode) -+{ -+ if (inode) { -+ AuDebugOn(!atomic_read(&inode->i_count)); -+ ihold(inode); -+ } -+ return inode; -+} -+ -+static void au_refresh_hinode_attr(struct inode *inode, int do_version) -+{ -+ au_cpup_attr_all(inode, /*force*/0); -+ au_update_iigen(inode, /*half*/1); -+ if (do_version) -+ inode->i_version++; -+} -+ -+static int au_ii_refresh(struct inode *inode, int *update) -+{ -+ int err, e; -+ umode_t type; -+ aufs_bindex_t bindex, new_bindex; -+ struct super_block *sb; -+ struct au_iinfo *iinfo; -+ struct au_hinode *p, *q, tmp; -+ -+ IiMustWriteLock(inode); -+ -+ *update = 0; -+ sb = inode->i_sb; -+ type = inode->i_mode & S_IFMT; -+ iinfo = au_ii(inode); -+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1); -+ if (unlikely(err)) -+ goto out; -+ -+ AuDebugOn(iinfo->ii_bstart < 0); -+ p = iinfo->ii_hinode + iinfo->ii_bstart; -+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; -+ bindex++, p++) { -+ if (!p->hi_inode) -+ continue; -+ -+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT)); -+ new_bindex = au_br_index(sb, p->hi_id); -+ if (new_bindex == bindex) -+ continue; -+ -+ if (new_bindex < 0) { -+ *update = 1; -+ au_hiput(p); -+ p->hi_inode = NULL; -+ continue; -+ } -+ -+ if (new_bindex < iinfo->ii_bstart) -+ iinfo->ii_bstart = new_bindex; -+ if (iinfo->ii_bend < new_bindex) -+ iinfo->ii_bend = new_bindex; -+ /* swap two lower inode, and loop again */ -+ q = iinfo->ii_hinode + new_bindex; -+ tmp = *q; -+ *q = *p; -+ *p = tmp; -+ if (tmp.hi_inode) { -+ bindex--; -+ p--; -+ } -+ } -+ au_update_ibrange(inode, /*do_put_zero*/0); -+ e = au_dy_irefresh(inode); -+ if (unlikely(e && !err)) -+ err = e; -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int au_refresh_hinode_self(struct inode *inode) -+{ -+ int err, update; -+ -+ err = au_ii_refresh(inode, &update); -+ if (!err) -+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode)); -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+int au_refresh_hinode(struct inode *inode, struct dentry *dentry) -+{ -+ int err, e, update; -+ unsigned int flags; -+ umode_t mode; -+ aufs_bindex_t bindex, bend; -+ unsigned char isdir; -+ struct au_hinode *p; -+ struct au_iinfo *iinfo; -+ -+ err = au_ii_refresh(inode, &update); -+ if (unlikely(err)) -+ goto out; -+ -+ update = 0; -+ iinfo = au_ii(inode); -+ p = iinfo->ii_hinode + iinfo->ii_bstart; -+ mode = (inode->i_mode & S_IFMT); -+ isdir = S_ISDIR(mode); -+ flags = au_hi_flags(inode, isdir); -+ bend = au_dbend(dentry); -+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) { -+ struct inode *h_i; -+ struct dentry *h_d; -+ -+ h_d = au_h_dptr(dentry, bindex); -+ if (!h_d || !h_d->d_inode) -+ continue; -+ -+ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT)); -+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) { -+ h_i = au_h_iptr(inode, bindex); -+ if (h_i) { -+ if (h_i == h_d->d_inode) -+ continue; -+ err = -EIO; -+ break; -+ } -+ } -+ if (bindex < iinfo->ii_bstart) -+ iinfo->ii_bstart = bindex; -+ if (iinfo->ii_bend < bindex) -+ iinfo->ii_bend = bindex; -+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags); -+ update = 1; -+ } -+ au_update_ibrange(inode, /*do_put_zero*/0); -+ e = au_dy_irefresh(inode); -+ if (unlikely(e && !err)) -+ err = e; -+ if (!err) -+ au_refresh_hinode_attr(inode, update && isdir); -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static int set_inode(struct inode *inode, struct dentry *dentry) -+{ -+ int err; -+ unsigned int flags; -+ umode_t mode; -+ aufs_bindex_t bindex, bstart, btail; -+ unsigned char isdir; -+ struct dentry *h_dentry; -+ struct inode *h_inode; -+ struct au_iinfo *iinfo; -+ -+ IiMustWriteLock(inode); -+ -+ err = 0; -+ isdir = 0; -+ bstart = au_dbstart(dentry); -+ h_inode = au_h_dptr(dentry, bstart)->d_inode; -+ mode = h_inode->i_mode; -+ switch (mode & S_IFMT) { -+ case S_IFREG: -+ btail = au_dbtail(dentry); -+ inode->i_op = &aufs_iop; -+ inode->i_fop = &aufs_file_fop; -+ err = au_dy_iaop(inode, bstart, h_inode); -+ if (unlikely(err)) -+ goto out; -+ break; -+ case S_IFDIR: -+ isdir = 1; -+ btail = au_dbtaildir(dentry); -+ inode->i_op = &aufs_dir_iop; -+ inode->i_fop = &aufs_dir_fop; -+ break; -+ case S_IFLNK: -+ btail = au_dbtail(dentry); -+ inode->i_op = &aufs_symlink_iop; -+ break; -+ case S_IFBLK: -+ case S_IFCHR: -+ case S_IFIFO: -+ case S_IFSOCK: -+ btail = au_dbtail(dentry); -+ inode->i_op = &aufs_iop; -+ init_special_inode(inode, mode, h_inode->i_rdev); -+ break; -+ default: -+ AuIOErr("Unknown file type 0%o\n", mode); -+ err = -EIO; -+ goto out; -+ } -+ -+ /* do not set hnotify for whiteouted dirs (SHWH mode) */ -+ flags = au_hi_flags(inode, isdir); -+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH) -+ && au_ftest_hi(flags, HNOTIFY) -+ && dentry->d_name.len > AUFS_WH_PFX_LEN -+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) -+ au_fclr_hi(flags, HNOTIFY); -+ iinfo = au_ii(inode); -+ iinfo->ii_bstart = bstart; -+ iinfo->ii_bend = btail; -+ for (bindex = bstart; bindex <= btail; bindex++) { -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (h_dentry) -+ au_set_h_iptr(inode, bindex, -+ au_igrab(h_dentry->d_inode), flags); -+ } -+ au_cpup_attr_all(inode, /*force*/1); -+ -+out: -+ return err; -+} -+ -+/* -+ * successful returns with iinfo write_locked -+ * minus: errno -+ * zero: success, matched -+ * plus: no error, but unmatched -+ */ -+static int reval_inode(struct inode *inode, struct dentry *dentry) -+{ -+ int err; -+ unsigned int gen; -+ struct au_iigen iigen; -+ aufs_bindex_t bindex, bend; -+ struct inode *h_inode, *h_dinode; -+ -+ /* -+ * before this function, if aufs got any iinfo lock, it must be only -+ * one, the parent dir. -+ * it can happen by UDBA and the obsoleted inode number. -+ */ -+ err = -EIO; -+ if (unlikely(inode->i_ino == parent_ino(dentry))) -+ goto out; -+ -+ err = 1; -+ ii_write_lock_new_child(inode); -+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode; -+ bend = au_ibend(inode); -+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) { -+ h_inode = au_h_iptr(inode, bindex); -+ if (!h_inode || h_inode != h_dinode) -+ continue; -+ -+ err = 0; -+ gen = au_iigen(inode, &iigen); -+ if (gen == au_digen(dentry) -+ && !au_ig_ftest(iigen.ig_flags, HALF_REFRESHED)) -+ break; -+ -+ /* fully refresh inode using dentry */ -+ err = au_refresh_hinode(inode, dentry); -+ if (!err) -+ au_update_iigen(inode, /*half*/0); -+ break; -+ } -+ -+ if (unlikely(err)) -+ ii_write_unlock(inode); -+out: -+ return err; -+} -+ -+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ unsigned int d_type, ino_t *ino) -+{ -+ int err; -+ struct mutex *mtx; -+ -+ /* prevent hardlinked inode number from race condition */ -+ mtx = NULL; -+ if (d_type != DT_DIR) { -+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx; -+ mutex_lock(mtx); -+ } -+ err = au_xino_read(sb, bindex, h_ino, ino); -+ if (unlikely(err)) -+ goto out; -+ -+ if (!*ino) { -+ err = -EIO; -+ *ino = au_xino_new_ino(sb); -+ if (unlikely(!*ino)) -+ goto out; -+ err = au_xino_write(sb, bindex, h_ino, *ino); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+out: -+ if (mtx) -+ mutex_unlock(mtx); -+ return err; -+} -+ -+/* successful returns with iinfo write_locked */ -+/* todo: return with unlocked? */ -+struct inode *au_new_inode(struct dentry *dentry, int must_new) -+{ -+ struct inode *inode, *h_inode; -+ struct dentry *h_dentry; -+ struct super_block *sb; -+ struct mutex *mtx; -+ ino_t h_ino, ino; -+ int err; -+ aufs_bindex_t bstart; -+ -+ sb = dentry->d_sb; -+ bstart = au_dbstart(dentry); -+ h_dentry = au_h_dptr(dentry, bstart); -+ h_inode = h_dentry->d_inode; -+ h_ino = h_inode->i_ino; -+ -+ /* -+ * stop 'race'-ing between hardlinks under different -+ * parents. -+ */ -+ mtx = NULL; -+ if (!S_ISDIR(h_inode->i_mode)) -+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx; -+ -+new_ino: -+ if (mtx) -+ mutex_lock(mtx); -+ err = au_xino_read(sb, bstart, h_ino, &ino); -+ inode = ERR_PTR(err); -+ if (unlikely(err)) -+ goto out; -+ -+ if (!ino) { -+ ino = au_xino_new_ino(sb); -+ if (unlikely(!ino)) { -+ inode = ERR_PTR(-EIO); -+ goto out; -+ } -+ } -+ -+ AuDbg("i%lu\n", (unsigned long)ino); -+ inode = au_iget_locked(sb, ino); -+ err = PTR_ERR(inode); -+ if (IS_ERR(inode)) -+ goto out; -+ -+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW)); -+ if (inode->i_state & I_NEW) { -+ /* verbose coding for lock class name */ -+ if (unlikely(S_ISLNK(h_inode->i_mode))) -+ au_rw_class(&au_ii(inode)->ii_rwsem, -+ au_lc_key + AuLcSymlink_IIINFO); -+ else if (unlikely(S_ISDIR(h_inode->i_mode))) -+ au_rw_class(&au_ii(inode)->ii_rwsem, -+ au_lc_key + AuLcDir_IIINFO); -+ else /* likely */ -+ au_rw_class(&au_ii(inode)->ii_rwsem, -+ au_lc_key + AuLcNonDir_IIINFO); -+ -+ ii_write_lock_new_child(inode); -+ err = set_inode(inode, dentry); -+ if (!err) { -+ unlock_new_inode(inode); -+ goto out; /* success */ -+ } -+ -+ /* -+ * iget_failed() calls iput(), but we need to call -+ * ii_write_unlock() after iget_failed(). so dirty hack for -+ * i_count. -+ */ -+ atomic_inc(&inode->i_count); -+ iget_failed(inode); -+ ii_write_unlock(inode); -+ au_xino_write(sb, bstart, h_ino, /*ino*/0); -+ /* ignore this error */ -+ goto out_iput; -+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) { -+ /* -+ * horrible race condition between lookup, readdir and copyup -+ * (or something). -+ */ -+ if (mtx) -+ mutex_unlock(mtx); -+ err = reval_inode(inode, dentry); -+ if (unlikely(err < 0)) { -+ mtx = NULL; -+ goto out_iput; -+ } -+ -+ if (!err) { -+ mtx = NULL; -+ goto out; /* success */ -+ } else if (mtx) -+ mutex_lock(mtx); -+ } -+ -+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode))) -+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir," -+ " b%d, %s, %pd, hi%lu, i%lu.\n", -+ bstart, au_sbtype(h_dentry->d_sb), dentry, -+ (unsigned long)h_ino, (unsigned long)ino); -+ ino = 0; -+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0); -+ if (!err) { -+ iput(inode); -+ if (mtx) -+ mutex_unlock(mtx); -+ goto new_ino; -+ } -+ -+out_iput: -+ iput(inode); -+ inode = ERR_PTR(err); -+out: -+ if (mtx) -+ mutex_unlock(mtx); -+ return inode; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, -+ struct inode *inode) -+{ -+ int err; -+ struct inode *hi; -+ -+ err = au_br_rdonly(au_sbr(sb, bindex)); -+ -+ /* pseudo-link after flushed may happen out of bounds */ -+ if (!err -+ && inode -+ && au_ibstart(inode) <= bindex -+ && bindex <= au_ibend(inode)) { -+ /* -+ * permission check is unnecessary since vfsub routine -+ * will be called later -+ */ -+ hi = au_h_iptr(inode, bindex); -+ if (hi) -+ err = IS_IMMUTABLE(hi) ? -EROFS : 0; -+ } -+ -+ return err; -+} -+ -+int au_test_h_perm(struct inode *h_inode, int mask) -+{ -+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) -+ return 0; -+ return inode_permission(h_inode, mask); -+} -+ -+int au_test_h_perm_sio(struct inode *h_inode, int mask) -+{ -+ if (au_test_nfs(h_inode->i_sb) -+ && (mask & MAY_WRITE) -+ && S_ISDIR(h_inode->i_mode)) -+ mask |= MAY_READ; /* force permission check */ -+ return au_test_h_perm(h_inode, mask); -+} -diff --git fs/aufs/inode.h fs/aufs/inode.h -new file mode 100644 -index 0000000..d35be16 ---- /dev/null -+++ fs/aufs/inode.h -@@ -0,0 +1,588 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * inode operations -+ */ -+ -+#ifndef __AUFS_INODE_H__ -+#define __AUFS_INODE_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include "rwsem.h" -+ -+struct vfsmount; -+ -+struct au_hnotify { -+#ifdef CONFIG_AUFS_HNOTIFY -+#ifdef CONFIG_AUFS_HFSNOTIFY -+ /* never use fsnotify_add_vfsmount_mark() */ -+ struct fsnotify_mark hn_mark; -+#endif -+ struct inode *hn_aufs_inode; /* no get/put */ -+#endif -+} ____cacheline_aligned_in_smp; -+ -+struct au_hinode { -+ struct inode *hi_inode; -+ aufs_bindex_t hi_id; -+#ifdef CONFIG_AUFS_HNOTIFY -+ struct au_hnotify *hi_notify; -+#endif -+ -+ /* reference to the copied-up whiteout with get/put */ -+ struct dentry *hi_whdentry; -+}; -+ -+/* ig_flags */ -+#define AuIG_HALF_REFRESHED 1 -+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name) -+#define au_ig_fset(flags, name) \ -+ do { (flags) |= AuIG_##name; } while (0) -+#define au_ig_fclr(flags, name) \ -+ do { (flags) &= ~AuIG_##name; } while (0) -+ -+struct au_iigen { -+ __u32 ig_generation, ig_flags; -+}; -+ -+struct au_vdir; -+struct au_iinfo { -+ spinlock_t ii_genspin; -+ struct au_iigen ii_generation; -+ struct super_block *ii_hsb1; /* no get/put */ -+ -+ struct au_rwsem ii_rwsem; -+ aufs_bindex_t ii_bstart, ii_bend; -+ __u32 ii_higen; -+ struct au_hinode *ii_hinode; -+ struct au_vdir *ii_vdir; -+}; -+ -+struct au_icntnr { -+ struct au_iinfo iinfo; -+ struct inode vfs_inode; -+} ____cacheline_aligned_in_smp; -+ -+/* au_pin flags */ -+#define AuPin_DI_LOCKED 1 -+#define AuPin_MNT_WRITE (1 << 1) -+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name) -+#define au_fset_pin(flags, name) \ -+ do { (flags) |= AuPin_##name; } while (0) -+#define au_fclr_pin(flags, name) \ -+ do { (flags) &= ~AuPin_##name; } while (0) -+ -+struct au_pin { -+ /* input */ -+ struct dentry *dentry; -+ unsigned int udba; -+ unsigned char lsc_di, lsc_hi, flags; -+ aufs_bindex_t bindex; -+ -+ /* output */ -+ struct dentry *parent; -+ struct au_hinode *hdir; -+ struct vfsmount *h_mnt; -+ -+ /* temporary unlock/relock for copyup */ -+ struct dentry *h_dentry, *h_parent; -+ struct au_branch *br; -+ struct task_struct *task; -+}; -+ -+void au_pin_hdir_unlock(struct au_pin *p); -+int au_pin_hdir_relock(struct au_pin *p); -+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task); -+void au_pin_hdir_acquire_nest(struct au_pin *p); -+void au_pin_hdir_release(struct au_pin *p); -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct au_iinfo *au_ii(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; -+ -+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo); -+ if (iinfo->ii_hinode) -+ return iinfo; -+ return NULL; /* debugging bad_inode case */ -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* inode.c */ -+struct inode *au_igrab(struct inode *inode); -+int au_refresh_hinode_self(struct inode *inode); -+int au_refresh_hinode(struct inode *inode, struct dentry *dentry); -+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ unsigned int d_type, ino_t *ino); -+struct inode *au_new_inode(struct dentry *dentry, int must_new); -+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex, -+ struct inode *inode); -+int au_test_h_perm(struct inode *h_inode, int mask); -+int au_test_h_perm_sio(struct inode *h_inode, int mask); -+ -+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex, -+ ino_t h_ino, unsigned int d_type, ino_t *ino) -+{ -+#ifdef CONFIG_AUFS_SHWH -+ return au_ino(sb, bindex, h_ino, d_type, ino); -+#else -+ return 0; -+#endif -+} -+ -+/* i_op.c */ -+extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop; -+ -+/* au_wr_dir flags */ -+#define AuWrDir_ADD_ENTRY 1 -+#define AuWrDir_TMP_WHENTRY (1 << 1) -+#define AuWrDir_ISDIR (1 << 2) -+#define AuWrDir_TMPFILE (1 << 3) -+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name) -+#define au_fset_wrdir(flags, name) \ -+ do { (flags) |= AuWrDir_##name; } while (0) -+#define au_fclr_wrdir(flags, name) \ -+ do { (flags) &= ~AuWrDir_##name; } while (0) -+ -+struct au_wr_dir_args { -+ aufs_bindex_t force_btgt; -+ unsigned char flags; -+}; -+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry, -+ struct au_wr_dir_args *args); -+ -+struct dentry *au_pinned_h_parent(struct au_pin *pin); -+void au_pin_init(struct au_pin *pin, struct dentry *dentry, -+ aufs_bindex_t bindex, int lsc_di, int lsc_hi, -+ unsigned int udba, unsigned char flags); -+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int udba, unsigned char flags) __must_check; -+int au_do_pin(struct au_pin *pin) __must_check; -+void au_unpin(struct au_pin *pin); -+ -+/* i_op_add.c */ -+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir); -+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, -+ dev_t dev); -+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname); -+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode, -+ bool want_excl); -+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode); -+int aufs_link(struct dentry *src_dentry, struct inode *dir, -+ struct dentry *dentry); -+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); -+ -+/* i_op_del.c */ -+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup); -+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent, int isdir); -+int aufs_unlink(struct inode *dir, struct dentry *dentry); -+int aufs_rmdir(struct inode *dir, struct dentry *dentry); -+ -+/* i_op_ren.c */ -+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt); -+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry, -+ struct inode *dir, struct dentry *dentry); -+ -+/* iinfo.c */ -+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex); -+void au_hiput(struct au_hinode *hinode); -+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_wh); -+unsigned int au_hi_flags(struct inode *inode, int isdir); -+ -+/* hinode flags */ -+#define AuHi_XINO 1 -+#define AuHi_HNOTIFY (1 << 1) -+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name) -+#define au_fset_hi(flags, name) \ -+ do { (flags) |= AuHi_##name; } while (0) -+#define au_fclr_hi(flags, name) \ -+ do { (flags) &= ~AuHi_##name; } while (0) -+ -+#ifndef CONFIG_AUFS_HNOTIFY -+#undef AuHi_HNOTIFY -+#define AuHi_HNOTIFY 0 -+#endif -+ -+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex, -+ struct inode *h_inode, unsigned int flags); -+ -+void au_update_iigen(struct inode *inode, int half); -+void au_update_ibrange(struct inode *inode, int do_put_zero); -+ -+void au_icntnr_init_once(void *_c); -+int au_iinfo_init(struct inode *inode); -+void au_iinfo_fin(struct inode *inode); -+int au_ii_realloc(struct au_iinfo *iinfo, int nbr); -+ -+#ifdef CONFIG_PROC_FS -+/* plink.c */ -+int au_plink_maint(struct super_block *sb, int flags); -+void au_plink_maint_leave(struct au_sbinfo *sbinfo); -+int au_plink_maint_enter(struct super_block *sb); -+#ifdef CONFIG_AUFS_DEBUG -+void au_plink_list(struct super_block *sb); -+#else -+AuStubVoid(au_plink_list, struct super_block *sb) -+#endif -+int au_plink_test(struct inode *inode); -+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex); -+void au_plink_append(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_dentry); -+void au_plink_put(struct super_block *sb, int verbose); -+void au_plink_clean(struct super_block *sb, int verbose); -+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id); -+#else -+AuStubInt0(au_plink_maint, struct super_block *sb, int flags); -+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo); -+AuStubInt0(au_plink_maint_enter, struct super_block *sb); -+AuStubVoid(au_plink_list, struct super_block *sb); -+AuStubInt0(au_plink_test, struct inode *inode); -+AuStub(struct dentry *, au_plink_lkup, return NULL, -+ struct inode *inode, aufs_bindex_t bindex); -+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_dentry); -+AuStubVoid(au_plink_put, struct super_block *sb, int verbose); -+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose); -+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id); -+#endif /* CONFIG_PROC_FS */ -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* lock subclass for iinfo */ -+enum { -+ AuLsc_II_CHILD, /* child first */ -+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */ -+ AuLsc_II_CHILD3, /* copyup dirs */ -+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */ -+ AuLsc_II_PARENT2, -+ AuLsc_II_PARENT3, /* copyup dirs */ -+ AuLsc_II_NEW_CHILD -+}; -+ -+/* -+ * ii_read_lock_child, ii_write_lock_child, -+ * ii_read_lock_child2, ii_write_lock_child2, -+ * ii_read_lock_child3, ii_write_lock_child3, -+ * ii_read_lock_parent, ii_write_lock_parent, -+ * ii_read_lock_parent2, ii_write_lock_parent2, -+ * ii_read_lock_parent3, ii_write_lock_parent3, -+ * ii_read_lock_new_child, ii_write_lock_new_child, -+ */ -+#define AuReadLockFunc(name, lsc) \ -+static inline void ii_read_lock_##name(struct inode *i) \ -+{ \ -+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ -+} -+ -+#define AuWriteLockFunc(name, lsc) \ -+static inline void ii_write_lock_##name(struct inode *i) \ -+{ \ -+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \ -+} -+ -+#define AuRWLockFuncs(name, lsc) \ -+ AuReadLockFunc(name, lsc) \ -+ AuWriteLockFunc(name, lsc) -+ -+AuRWLockFuncs(child, CHILD); -+AuRWLockFuncs(child2, CHILD2); -+AuRWLockFuncs(child3, CHILD3); -+AuRWLockFuncs(parent, PARENT); -+AuRWLockFuncs(parent2, PARENT2); -+AuRWLockFuncs(parent3, PARENT3); -+AuRWLockFuncs(new_child, NEW_CHILD); -+ -+#undef AuReadLockFunc -+#undef AuWriteLockFunc -+#undef AuRWLockFuncs -+ -+/* -+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock -+ */ -+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem); -+ -+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem) -+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem) -+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem) -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline void au_icntnr_init(struct au_icntnr *c) -+{ -+#ifdef CONFIG_AUFS_DEBUG -+ c->vfs_inode.i_mode = 0; -+#endif -+} -+ -+static inline unsigned int au_iigen(struct inode *inode, struct au_iigen *iigen) -+{ -+ unsigned int gen; -+ struct au_iinfo *iinfo; -+ -+ iinfo = au_ii(inode); -+ spin_lock(&iinfo->ii_genspin); -+ if (iigen) -+ *iigen = iinfo->ii_generation; -+ gen = iinfo->ii_generation.ig_generation; -+ spin_unlock(&iinfo->ii_genspin); -+ -+ return gen; -+} -+ -+/* tiny test for inode number */ -+/* tmpfs generation is too rough */ -+static inline int au_test_higen(struct inode *inode, struct inode *h_inode) -+{ -+ struct au_iinfo *iinfo; -+ -+ iinfo = au_ii(inode); -+ AuRwMustAnyLock(&iinfo->ii_rwsem); -+ return !(iinfo->ii_hsb1 == h_inode->i_sb -+ && iinfo->ii_higen == h_inode->i_generation); -+} -+ -+static inline void au_iigen_dec(struct inode *inode) -+{ -+ struct au_iinfo *iinfo; -+ -+ iinfo = au_ii(inode); -+ spin_lock(&iinfo->ii_genspin); -+ iinfo->ii_generation.ig_generation--; -+ spin_unlock(&iinfo->ii_genspin); -+} -+ -+static inline int au_iigen_test(struct inode *inode, unsigned int sigen) -+{ -+ int err; -+ -+ err = 0; -+ if (unlikely(inode && au_iigen(inode, NULL) != sigen)) -+ err = -EIO; -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline aufs_bindex_t au_ii_br_id(struct inode *inode, -+ aufs_bindex_t bindex) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id; -+} -+ -+static inline aufs_bindex_t au_ibstart(struct inode *inode) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_bstart; -+} -+ -+static inline aufs_bindex_t au_ibend(struct inode *inode) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_bend; -+} -+ -+static inline struct au_vdir *au_ivdir(struct inode *inode) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_vdir; -+} -+ -+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry; -+} -+ -+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustWriteLock(inode); -+ au_ii(inode)->ii_bstart = bindex; -+} -+ -+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustWriteLock(inode); -+ au_ii(inode)->ii_bend = bindex; -+} -+ -+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir) -+{ -+ IiMustWriteLock(inode); -+ au_ii(inode)->ii_vdir = vdir; -+} -+ -+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex) -+{ -+ IiMustAnyLock(inode); -+ return au_ii(inode)->ii_hinode + bindex; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct dentry *au_pinned_parent(struct au_pin *pin) -+{ -+ if (pin) -+ return pin->parent; -+ return NULL; -+} -+ -+static inline struct inode *au_pinned_h_dir(struct au_pin *pin) -+{ -+ if (pin && pin->hdir) -+ return pin->hdir->hi_inode; -+ return NULL; -+} -+ -+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin) -+{ -+ if (pin) -+ return pin->hdir; -+ return NULL; -+} -+ -+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry) -+{ -+ if (pin) -+ pin->dentry = dentry; -+} -+ -+static inline void au_pin_set_parent_lflag(struct au_pin *pin, -+ unsigned char lflag) -+{ -+ if (pin) { -+ if (lflag) -+ au_fset_pin(pin->flags, DI_LOCKED); -+ else -+ au_fclr_pin(pin->flags, DI_LOCKED); -+ } -+} -+ -+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent) -+{ -+ if (pin) { -+ dput(pin->parent); -+ pin->parent = dget(parent); -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_branch; -+#ifdef CONFIG_AUFS_HNOTIFY -+struct au_hnotify_op { -+ void (*ctl)(struct au_hinode *hinode, int do_set); -+ int (*alloc)(struct au_hinode *hinode); -+ -+ /* -+ * if it returns true, the the caller should free hinode->hi_notify, -+ * otherwise ->free() frees it. -+ */ -+ int (*free)(struct au_hinode *hinode, -+ struct au_hnotify *hn) __must_check; -+ -+ void (*fin)(void); -+ int (*init)(void); -+ -+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm); -+ void (*fin_br)(struct au_branch *br); -+ int (*init_br)(struct au_branch *br, int perm); -+}; -+ -+/* hnotify.c */ -+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode); -+void au_hn_free(struct au_hinode *hinode); -+void au_hn_ctl(struct au_hinode *hinode, int do_set); -+void au_hn_reset(struct inode *inode, unsigned int flags); -+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask, -+ struct qstr *h_child_qstr, struct inode *h_child_inode); -+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm); -+int au_hnotify_init_br(struct au_branch *br, int perm); -+void au_hnotify_fin_br(struct au_branch *br); -+int __init au_hnotify_init(void); -+void au_hnotify_fin(void); -+ -+/* hfsnotify.c */ -+extern const struct au_hnotify_op au_hnotify_op; -+ -+static inline -+void au_hn_init(struct au_hinode *hinode) -+{ -+ hinode->hi_notify = NULL; -+} -+ -+static inline struct au_hnotify *au_hn(struct au_hinode *hinode) -+{ -+ return hinode->hi_notify; -+} -+ -+#else -+static inline -+int au_hn_alloc(struct au_hinode *hinode __maybe_unused, -+ struct inode *inode __maybe_unused) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline struct au_hnotify *au_hn(struct au_hinode *hinode) -+{ -+ return NULL; -+} -+ -+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused) -+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused, -+ int do_set __maybe_unused) -+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused, -+ unsigned int flags __maybe_unused) -+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused, -+ struct au_branch *br __maybe_unused, -+ int perm __maybe_unused) -+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused, -+ int perm __maybe_unused) -+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused) -+AuStubInt0(__init au_hnotify_init, void) -+AuStubVoid(au_hnotify_fin, void) -+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused) -+#endif /* CONFIG_AUFS_HNOTIFY */ -+ -+static inline void au_hn_suspend(struct au_hinode *hdir) -+{ -+ au_hn_ctl(hdir, /*do_set*/0); -+} -+ -+static inline void au_hn_resume(struct au_hinode *hdir) -+{ -+ au_hn_ctl(hdir, /*do_set*/1); -+} -+ -+static inline void au_hn_imtx_lock(struct au_hinode *hdir) -+{ -+ mutex_lock(&hdir->hi_inode->i_mutex); -+ au_hn_suspend(hdir); -+} -+ -+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir, -+ unsigned int sc __maybe_unused) -+{ -+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc); -+ au_hn_suspend(hdir); -+} -+ -+static inline void au_hn_imtx_unlock(struct au_hinode *hdir) -+{ -+ au_hn_resume(hdir); -+ mutex_unlock(&hdir->hi_inode->i_mutex); -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_INODE_H__ */ -diff --git fs/aufs/ioctl.c fs/aufs/ioctl.c -new file mode 100644 -index 0000000..f95ea07 ---- /dev/null -+++ fs/aufs/ioctl.c -@@ -0,0 +1,188 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * ioctl -+ * plink-management and readdir in userspace. -+ * assist the pathconf(3) wrapper library. -+ * move-down -+ */ -+ -+#include -+#include -+#include "aufs.h" -+ -+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg) -+{ -+ int err, fd; -+ aufs_bindex_t wbi, bindex, bend; -+ struct file *h_file; -+ struct super_block *sb; -+ struct dentry *root; -+ struct au_branch *br; -+ struct aufs_wbr_fd wbrfd = { -+ .oflags = au_dir_roflags, -+ .brid = -1 -+ }; -+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY -+ | O_NOATIME | O_CLOEXEC; -+ -+ AuDebugOn(wbrfd.oflags & ~valid); -+ -+ if (arg) { -+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd)); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ goto out; -+ } -+ -+ err = -EINVAL; -+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid); -+ wbrfd.oflags |= au_dir_roflags; -+ AuDbg("0%o\n", wbrfd.oflags); -+ if (unlikely(wbrfd.oflags & ~valid)) -+ goto out; -+ } -+ -+ fd = get_unused_fd(); -+ err = fd; -+ if (unlikely(fd < 0)) -+ goto out; -+ -+ h_file = ERR_PTR(-EINVAL); -+ wbi = 0; -+ br = NULL; -+ sb = path->dentry->d_sb; -+ root = sb->s_root; -+ aufs_read_lock(root, AuLock_IR); -+ bend = au_sbend(sb); -+ if (wbrfd.brid >= 0) { -+ wbi = au_br_index(sb, wbrfd.brid); -+ if (unlikely(wbi < 0 || wbi > bend)) -+ goto out_unlock; -+ } -+ -+ h_file = ERR_PTR(-ENOENT); -+ br = au_sbr(sb, wbi); -+ if (!au_br_writable(br->br_perm)) { -+ if (arg) -+ goto out_unlock; -+ -+ bindex = wbi + 1; -+ wbi = -1; -+ for (; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ if (au_br_writable(br->br_perm)) { -+ wbi = bindex; -+ br = au_sbr(sb, wbi); -+ break; -+ } -+ } -+ } -+ AuDbg("wbi %d\n", wbi); -+ if (wbi >= 0) -+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL, -+ /*force_wr*/0); -+ -+out_unlock: -+ aufs_read_unlock(root, AuLock_IR); -+ err = PTR_ERR(h_file); -+ if (IS_ERR(h_file)) -+ goto out_fd; -+ -+ atomic_dec(&br->br_count); /* cf. au_h_open() */ -+ fd_install(fd, h_file); -+ err = fd; -+ goto out; /* success */ -+ -+out_fd: -+ put_unused_fd(fd); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ long err; -+ -+ switch (cmd) { -+ case AUFS_CTL_RDU: -+ case AUFS_CTL_RDU_INO: -+ err = au_rdu_ioctl(file, cmd, arg); -+ break; -+ -+ case AUFS_CTL_WBR_FD: -+ err = au_wbr_fd(&file->f_path, (void __user *)arg); -+ break; -+ -+ case AUFS_CTL_IBUSY: -+ err = au_ibusy_ioctl(file, arg); -+ break; -+ -+ default: -+ /* do not call the lower */ -+ AuDbg("0x%x\n", cmd); -+ err = -ENOTTY; -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ long err; -+ -+ switch (cmd) { -+ case AUFS_CTL_MVDOWN: -+ err = au_mvdown(file->f_dentry, (void __user *)arg); -+ break; -+ -+ case AUFS_CTL_WBR_FD: -+ err = au_wbr_fd(&file->f_path, (void __user *)arg); -+ break; -+ -+ default: -+ /* do not call the lower */ -+ AuDbg("0x%x\n", cmd); -+ err = -ENOTTY; -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+#ifdef CONFIG_COMPAT -+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ long err; -+ -+ switch (cmd) { -+ case AUFS_CTL_RDU: -+ case AUFS_CTL_RDU_INO: -+ err = au_rdu_compat_ioctl(file, cmd, arg); -+ break; -+ -+ case AUFS_CTL_IBUSY: -+ err = au_ibusy_compat_ioctl(file, arg); -+ break; -+ -+ default: -+ err = aufs_ioctl_dir(file, cmd, arg); -+ } -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg)); -+} -+#endif -diff --git fs/aufs/loop.c fs/aufs/loop.c -new file mode 100644 -index 0000000..3b03b52 ---- /dev/null -+++ fs/aufs/loop.c -@@ -0,0 +1,132 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * support for loopback block device as a branch -+ */ -+ -+#include "aufs.h" -+ -+/* added into drivers/block/loop.c */ -+static struct file *(*backing_file_func)(struct super_block *sb); -+ -+/* -+ * test if two lower dentries have overlapping branches. -+ */ -+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding) -+{ -+ struct super_block *h_sb; -+ struct file *backing_file; -+ -+ if (unlikely(!backing_file_func)) { -+ /* don't load "loop" module here */ -+ backing_file_func = symbol_get(loop_backing_file); -+ if (unlikely(!backing_file_func)) -+ /* "loop" module is not loaded */ -+ return 0; -+ } -+ -+ h_sb = h_adding->d_sb; -+ backing_file = backing_file_func(h_sb); -+ if (!backing_file) -+ return 0; -+ -+ h_adding = backing_file->f_dentry; -+ /* -+ * h_adding can be local NFS. -+ * in this case aufs cannot detect the loop. -+ */ -+ if (unlikely(h_adding->d_sb == sb)) -+ return 1; -+ return !!au_test_subdir(h_adding, sb->s_root); -+} -+ -+/* true if a kernel thread named 'loop[0-9].*' accesses a file */ -+int au_test_loopback_kthread(void) -+{ -+ int ret; -+ struct task_struct *tsk = current; -+ char c, comm[sizeof(tsk->comm)]; -+ -+ ret = 0; -+ if (tsk->flags & PF_KTHREAD) { -+ get_task_comm(comm, tsk); -+ c = comm[4]; -+ ret = ('0' <= c && c <= '9' -+ && !strncmp(comm, "loop", 4)); -+ } -+ -+ return ret; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+#define au_warn_loopback_step 16 -+static int au_warn_loopback_nelem = au_warn_loopback_step; -+static unsigned long *au_warn_loopback_array; -+ -+void au_warn_loopback(struct super_block *h_sb) -+{ -+ int i, new_nelem; -+ unsigned long *a, magic; -+ static DEFINE_SPINLOCK(spin); -+ -+ magic = h_sb->s_magic; -+ spin_lock(&spin); -+ a = au_warn_loopback_array; -+ for (i = 0; i < au_warn_loopback_nelem && *a; i++) -+ if (a[i] == magic) { -+ spin_unlock(&spin); -+ return; -+ } -+ -+ /* h_sb is new to us, print it */ -+ if (i < au_warn_loopback_nelem) { -+ a[i] = magic; -+ goto pr; -+ } -+ -+ /* expand the array */ -+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step; -+ a = au_kzrealloc(au_warn_loopback_array, -+ au_warn_loopback_nelem * sizeof(unsigned long), -+ new_nelem * sizeof(unsigned long), GFP_ATOMIC); -+ if (a) { -+ au_warn_loopback_nelem = new_nelem; -+ au_warn_loopback_array = a; -+ a[i] = magic; -+ goto pr; -+ } -+ -+ spin_unlock(&spin); -+ AuWarn1("realloc failed, ignored\n"); -+ return; -+ -+pr: -+ spin_unlock(&spin); -+ pr_warn("you may want to try another patch for loopback file " -+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic); -+} -+ -+int au_loopback_init(void) -+{ -+ int err; -+ struct super_block *sb __maybe_unused; -+ -+ AuDebugOn(sizeof(sb->s_magic) != sizeof(unsigned long)); -+ -+ err = 0; -+ au_warn_loopback_array = kcalloc(au_warn_loopback_step, -+ sizeof(unsigned long), GFP_NOFS); -+ if (unlikely(!au_warn_loopback_array)) -+ err = -ENOMEM; -+ -+ return err; -+} -+ -+void au_loopback_fin(void) -+{ -+ symbol_put(loop_backing_file); -+ kfree(au_warn_loopback_array); -+} -diff --git fs/aufs/loop.h fs/aufs/loop.h -new file mode 100644 -index 0000000..da8b756 ---- /dev/null -+++ fs/aufs/loop.h -@@ -0,0 +1,39 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * support for loopback mount as a branch -+ */ -+ -+#ifndef __AUFS_LOOP_H__ -+#define __AUFS_LOOP_H__ -+ -+#ifdef __KERNEL__ -+ -+struct dentry; -+struct super_block; -+ -+#ifdef CONFIG_AUFS_BDEV_LOOP -+/* drivers/block/loop.c */ -+struct file *loop_backing_file(struct super_block *sb); -+ -+/* loop.c */ -+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding); -+int au_test_loopback_kthread(void); -+void au_warn_loopback(struct super_block *h_sb); -+ -+int au_loopback_init(void); -+void au_loopback_fin(void); -+#else -+AuStubInt0(au_test_loopback_overlap, struct super_block *sb, -+ struct dentry *h_adding) -+AuStubInt0(au_test_loopback_kthread, void) -+AuStubVoid(au_warn_loopback, struct super_block *h_sb) -+ -+AuStubInt0(au_loopback_init, void) -+AuStubVoid(au_loopback_fin, void) -+#endif /* BLK_DEV_LOOP */ -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_LOOP_H__ */ -diff --git fs/aufs/magic.mk fs/aufs/magic.mk -new file mode 100644 -index 0000000..3e6387b ---- /dev/null -+++ fs/aufs/magic.mk -@@ -0,0 +1,54 @@ -+ -+# defined in ${srctree}/fs/fuse/inode.c -+# tristate -+ifdef CONFIG_FUSE_FS -+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546 -+endif -+ -+# defined in ${srctree}/fs/ocfs2/ocfs2_fs.h -+# tristate -+ifdef CONFIG_OCFS2_FS -+ccflags-y += -DOCFS2_SUPER_MAGIC=0x7461636f -+endif -+ -+# defined in ${srctree}/fs/ocfs2/dlm/userdlm.h -+# tristate -+ifdef CONFIG_OCFS2_FS_O2CB -+ccflags-y += -DDLMFS_MAGIC=0x76a9f425 -+endif -+ -+# defined in ${srctree}/fs/cifs/cifsfs.c -+# tristate -+ifdef CONFIG_CIFS_FS -+ccflags-y += -DCIFS_MAGIC_NUMBER=0xFF534D42 -+endif -+ -+# defined in ${srctree}/fs/xfs/xfs_sb.h -+# tristate -+ifdef CONFIG_XFS_FS -+ccflags-y += -DXFS_SB_MAGIC=0x58465342 -+endif -+ -+# defined in ${srctree}/fs/configfs/mount.c -+# tristate -+ifdef CONFIG_CONFIGFS_FS -+ccflags-y += -DCONFIGFS_MAGIC=0x62656570 -+endif -+ -+# defined in ${srctree}/fs/9p/v9fs.h -+# tristate -+ifdef CONFIG_9P_FS -+ccflags-y += -DV9FS_MAGIC=0x01021997 -+endif -+ -+# defined in ${srctree}/fs/ubifs/ubifs.h -+# tristate -+ifdef CONFIG_UBIFS_FS -+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905 -+endif -+ -+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h -+# tristate -+ifdef CONFIG_HFSPLUS_FS -+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b -+endif -diff --git fs/aufs/module.c fs/aufs/module.c -new file mode 100644 -index 0000000..08d4902 ---- /dev/null -+++ fs/aufs/module.c -@@ -0,0 +1,196 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * module global variables and operations -+ */ -+ -+#include -+#include -+#include "aufs.h" -+ -+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp) -+{ -+ if (new_sz <= nused) -+ return p; -+ -+ p = krealloc(p, new_sz, gfp); -+ if (p) -+ memset(p + nused, 0, new_sz - nused); -+ return p; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * aufs caches -+ */ -+struct kmem_cache *au_cachep[AuCache_Last]; -+static int __init au_cache_init(void) -+{ -+ au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once); -+ if (au_cachep[AuCache_DINFO]) -+ /* SLAB_DESTROY_BY_RCU */ -+ au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr, -+ au_icntnr_init_once); -+ if (au_cachep[AuCache_ICNTNR]) -+ au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo, -+ au_fi_init_once); -+ if (au_cachep[AuCache_FINFO]) -+ au_cachep[AuCache_VDIR] = AuCache(au_vdir); -+ if (au_cachep[AuCache_VDIR]) -+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr); -+ if (au_cachep[AuCache_DEHSTR]) -+ return 0; -+ -+ return -ENOMEM; -+} -+ -+static void au_cache_fin(void) -+{ -+ int i; -+ -+ /* -+ * Make sure all delayed rcu free inodes are flushed before we -+ * destroy cache. -+ */ -+ rcu_barrier(); -+ -+ /* excluding AuCache_HNOTIFY */ -+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last); -+ for (i = 0; i < AuCache_HNOTIFY; i++) -+ if (au_cachep[i]) { -+ kmem_cache_destroy(au_cachep[i]); -+ au_cachep[i] = NULL; -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int au_dir_roflags; -+ -+#ifdef CONFIG_AUFS_SBILIST -+/* -+ * iterate_supers_type() doesn't protect us from -+ * remounting (branch management) -+ */ -+struct au_splhead au_sbilist; -+#endif -+ -+struct lock_class_key au_lc_key[AuLcKey_Last]; -+ -+/* -+ * functions for module interface. -+ */ -+MODULE_LICENSE("GPL"); -+/* MODULE_LICENSE("GPL v2"); */ -+MODULE_AUTHOR("Junjiro R. Okajima "); -+MODULE_DESCRIPTION(AUFS_NAME -+ " -- Advanced multi layered unification filesystem"); -+MODULE_VERSION(AUFS_VERSION); -+ -+/* this module parameter has no meaning when SYSFS is disabled */ -+int sysaufs_brs = 1; -+MODULE_PARM_DESC(brs, "use /fs/aufs/si_*/brN"); -+module_param_named(brs, sysaufs_brs, int, S_IRUGO); -+ -+/* this module parameter has no meaning when USER_NS is disabled */ -+static bool au_userns; -+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns"); -+module_param_named(allow_userns, au_userns, bool, S_IRUGO); -+ -+/* ---------------------------------------------------------------------- */ -+ -+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */ -+ -+int au_seq_path(struct seq_file *seq, struct path *path) -+{ -+ return seq_path(seq, path, au_esc_chars); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int __init aufs_init(void) -+{ -+ int err, i; -+ char *p; -+ -+ p = au_esc_chars; -+ for (i = 1; i <= ' '; i++) -+ *p++ = i; -+ *p++ = '\\'; -+ *p++ = '\x7f'; -+ *p = 0; -+ -+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE); -+ -+ au_sbilist_init(); -+ sysaufs_brs_init(); -+ au_debug_init(); -+ au_dy_init(); -+ err = sysaufs_init(); -+ if (unlikely(err)) -+ goto out; -+ err = au_procfs_init(); -+ if (unlikely(err)) -+ goto out_sysaufs; -+ err = au_wkq_init(); -+ if (unlikely(err)) -+ goto out_procfs; -+ err = au_loopback_init(); -+ if (unlikely(err)) -+ goto out_wkq; -+ err = au_hnotify_init(); -+ if (unlikely(err)) -+ goto out_loopback; -+ err = au_sysrq_init(); -+ if (unlikely(err)) -+ goto out_hin; -+ err = au_cache_init(); -+ if (unlikely(err)) -+ goto out_sysrq; -+ -+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0; -+ err = register_filesystem(&aufs_fs_type); -+ if (unlikely(err)) -+ goto out_cache; -+ -+ /* since we define pr_fmt, call printk directly */ -+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n"); -+ goto out; /* success */ -+ -+out_cache: -+ au_cache_fin(); -+out_sysrq: -+ au_sysrq_fin(); -+out_hin: -+ au_hnotify_fin(); -+out_loopback: -+ au_loopback_fin(); -+out_wkq: -+ au_wkq_fin(); -+out_procfs: -+ au_procfs_fin(); -+out_sysaufs: -+ sysaufs_fin(); -+ au_dy_fin(); -+out: -+ return err; -+} -+ -+static void __exit aufs_exit(void) -+{ -+ unregister_filesystem(&aufs_fs_type); -+ au_cache_fin(); -+ au_sysrq_fin(); -+ au_hnotify_fin(); -+ au_loopback_fin(); -+ au_wkq_fin(); -+ au_procfs_fin(); -+ sysaufs_fin(); -+ au_dy_fin(); -+} -+ -+module_init(aufs_init); -+module_exit(aufs_exit); -diff --git fs/aufs/module.h fs/aufs/module.h -new file mode 100644 -index 0000000..eab25b0 ---- /dev/null -+++ fs/aufs/module.h -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * module initialization and module-global -+ */ -+ -+#ifndef __AUFS_MODULE_H__ -+#define __AUFS_MODULE_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+ -+struct path; -+struct seq_file; -+ -+/* module parameters */ -+extern int sysaufs_brs; -+ -+/* ---------------------------------------------------------------------- */ -+ -+extern int au_dir_roflags; -+ -+enum { -+ AuLcNonDir_FIINFO, -+ AuLcNonDir_DIINFO, -+ AuLcNonDir_IIINFO, -+ -+ AuLcDir_FIINFO, -+ AuLcDir_DIINFO, -+ AuLcDir_IIINFO, -+ -+ AuLcSymlink_DIINFO, -+ AuLcSymlink_IIINFO, -+ -+ AuLcKey_Last -+}; -+extern struct lock_class_key au_lc_key[AuLcKey_Last]; -+ -+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp); -+int au_seq_path(struct seq_file *seq, struct path *path); -+ -+#ifdef CONFIG_PROC_FS -+/* procfs.c */ -+int __init au_procfs_init(void); -+void au_procfs_fin(void); -+#else -+AuStubInt0(au_procfs_init, void); -+AuStubVoid(au_procfs_fin, void); -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* kmem cache */ -+enum { -+ AuCache_DINFO, -+ AuCache_ICNTNR, -+ AuCache_FINFO, -+ AuCache_VDIR, -+ AuCache_DEHSTR, -+ AuCache_HNOTIFY, /* must be last */ -+ AuCache_Last -+}; -+ -+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD) -+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags) -+#define AuCacheCtor(type, ctor) \ -+ kmem_cache_create(#type, sizeof(struct type), \ -+ __alignof__(struct type), AuCacheFlags, ctor) -+ -+extern struct kmem_cache *au_cachep[]; -+ -+#define AuCacheFuncs(name, index) \ -+static inline struct au_##name *au_cache_alloc_##name(void) \ -+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \ -+static inline void au_cache_free_##name(struct au_##name *p) \ -+{ kmem_cache_free(au_cachep[AuCache_##index], p); } -+ -+AuCacheFuncs(dinfo, DINFO); -+AuCacheFuncs(icntnr, ICNTNR); -+AuCacheFuncs(finfo, FINFO); -+AuCacheFuncs(vdir, VDIR); -+AuCacheFuncs(vdir_dehstr, DEHSTR); -+#ifdef CONFIG_AUFS_HNOTIFY -+AuCacheFuncs(hnotify, HNOTIFY); -+#endif -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_MODULE_H__ */ -diff --git fs/aufs/mvdown.c fs/aufs/mvdown.c -new file mode 100644 -index 0000000..88cbbb7 ---- /dev/null -+++ fs/aufs/mvdown.c -@@ -0,0 +1,614 @@ -+/* -+ * Copyright (C) 2011-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * move-down, opposite of copy-up -+ */ -+ -+#include "aufs.h" -+ -+struct au_mvd_args { -+ struct { -+ struct super_block *h_sb; -+ struct dentry *h_parent; -+ struct au_hinode *hdir; -+ struct inode *h_dir, *h_inode; -+ } info[AUFS_MVDOWN_NARRAY]; -+ -+ struct aufs_mvdown mvdown; -+ struct dentry *dentry, *parent; -+ struct inode *inode, *dir; -+ struct super_block *sb; -+ aufs_bindex_t bopq, bwh, bfound; -+ unsigned char rename_lock; -+ struct au_pin pin; -+}; -+ -+#define mvd_errno mvdown.au_errno -+#define mvd_bsrc mvdown.a[AUFS_MVDOWN_UPPER].bindex -+#define mvd_src_brid mvdown.a[AUFS_MVDOWN_UPPER].brid -+#define mvd_bdst mvdown.a[AUFS_MVDOWN_LOWER].bindex -+#define mvd_dst_brid mvdown.a[AUFS_MVDOWN_LOWER].brid -+ -+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb -+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent -+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir -+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir -+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode -+ -+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb -+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent -+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir -+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir -+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode -+ -+#define AU_MVD_PR(flag, ...) do { \ -+ if (flag) \ -+ pr_err(__VA_ARGS__); \ -+ } while (0) -+ -+/* make the parent dir on bdst */ -+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ int err; -+ -+ err = 0; -+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc); -+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst); -+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc); -+ a->mvd_h_dst_parent = NULL; -+ if (au_dbend(a->parent) >= a->mvd_bdst) -+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst); -+ if (!a->mvd_h_dst_parent) { -+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst); -+ if (unlikely(err)) { -+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n"); -+ goto out; -+ } -+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst); -+ } -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* lock them all */ -+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ int err; -+ struct dentry *h_trap; -+ -+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc); -+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst); -+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) { -+ a->rename_lock = 0; -+ err = au_pin(&a->pin, a->dentry, a->mvd_bdst, -+ au_opt_udba(a->sb), -+ AuPin_MNT_WRITE | AuPin_DI_LOCKED); -+ if (!err) { -+ a->mvd_h_src_dir = a->mvd_h_src_parent->d_inode; -+ mutex_lock_nested(&a->mvd_h_src_dir->i_mutex, -+ AuLsc_I_PARENT3); -+ } else -+ AU_MVD_PR(dmsg, "pin failed\n"); -+ goto out; -+ } -+ -+ err = 0; -+ a->rename_lock = 1; -+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src, -+ a->mvd_h_dst_parent, a->mvd_hdir_dst); -+ if (h_trap) { -+ err = (h_trap != a->mvd_h_src_parent); -+ if (err) -+ err = (h_trap != a->mvd_h_dst_parent); -+ } -+ BUG_ON(err); /* it should never happen */ -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ if (!a->rename_lock) { -+ mutex_unlock(&a->mvd_h_src_dir->i_mutex); -+ au_unpin(&a->pin); -+ } else -+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src, -+ a->mvd_h_dst_parent, a->mvd_hdir_dst); -+} -+ -+/* copy-down the file */ -+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ int err; -+ struct au_cp_generic cpg = { -+ .dentry = a->dentry, -+ .bdst = a->mvd_bdst, -+ .bsrc = a->mvd_bsrc, -+ .len = -1, -+ .pin = &a->pin, -+ .flags = AuCpup_DTIME | AuCpup_HOPEN -+ }; -+ -+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst); -+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER) -+ au_fset_cpup(cpg.flags, OVERWRITE); -+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER) -+ au_fset_cpup(cpg.flags, RWDST); -+ err = au_sio_cpdown_simple(&cpg); -+ if (unlikely(err)) -+ AU_MVD_PR(dmsg, "cpdown failed\n"); -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+/* -+ * unlink the whiteout on bdst if exist which may be created by UDBA while we -+ * were sleeping -+ */ -+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ int err; -+ struct path h_path; -+ struct au_branch *br; -+ struct inode *delegated; -+ -+ br = au_sbr(a->sb, a->mvd_bdst); -+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br); -+ err = PTR_ERR(h_path.dentry); -+ if (IS_ERR(h_path.dentry)) { -+ AU_MVD_PR(dmsg, "wh_lkup failed\n"); -+ goto out; -+ } -+ -+ err = 0; -+ if (h_path.dentry->d_inode) { -+ h_path.mnt = au_br_mnt(br); -+ delegated = NULL; -+ err = vfsub_unlink(a->mvd_h_dst_parent->d_inode, &h_path, -+ &delegated, /*force*/0); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal unlink\n"); -+ iput(delegated); -+ } -+ if (unlikely(err)) -+ AU_MVD_PR(dmsg, "wh_unlink failed\n"); -+ } -+ dput(h_path.dentry); -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* -+ * unlink the topmost h_dentry -+ * Note: the target file MAY be modified by UDBA between this mutex_unlock() and -+ * mutex_lock() in vfs_unlink(). in this case, such changes may be lost. -+ */ -+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ int err; -+ struct path h_path; -+ struct inode *delegated; -+ -+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc); -+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc); -+ delegated = NULL; -+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal unlink\n"); -+ iput(delegated); -+ } -+ if (unlikely(err)) -+ AU_MVD_PR(dmsg, "unlink failed\n"); -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+/* -+ * copy-down the file and unlink the bsrc file. -+ * - unlink the bdst whout if exist -+ * - copy-down the file (with whtmp name and rename) -+ * - unlink the bsrc file -+ */ -+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ int err; -+ -+ err = au_do_mkdir(dmsg, a); -+ if (!err) -+ err = au_do_lock(dmsg, a); -+ if (unlikely(err)) -+ goto out; -+ -+ /* -+ * do not revert the activities we made on bdst since they should be -+ * harmless in aufs. -+ */ -+ -+ err = au_do_cpdown(dmsg, a); -+ if (!err) -+ err = au_do_unlink_wh(dmsg, a); -+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) -+ err = au_do_unlink(dmsg, a); -+ if (unlikely(err)) -+ goto out_unlock; -+ -+ /* maintain internal array */ -+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) { -+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL); -+ au_set_dbstart(a->dentry, a->mvd_bdst); -+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0); -+ au_set_ibstart(a->inode, a->mvd_bdst); -+ } -+ if (au_dbend(a->dentry) < a->mvd_bdst) -+ au_set_dbend(a->dentry, a->mvd_bdst); -+ if (au_ibend(a->inode) < a->mvd_bdst) -+ au_set_ibend(a->inode, a->mvd_bdst); -+ -+out_unlock: -+ au_do_unlock(dmsg, a); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int find_lower_writable(struct au_mvd_args *a) -+{ -+ struct super_block *sb; -+ aufs_bindex_t bindex, bend; -+ struct au_branch *br; -+ -+ sb = a->sb; -+ bindex = a->mvd_bsrc; -+ bend = au_sbend(sb); -+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER)) { -+ for (bindex++; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ if (!au_br_rdonly(br)) -+ return bindex; -+ } -+ } else { -+ for (bindex++; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ if (!(au_br_sb(br)->s_flags & MS_RDONLY)) { -+ if (au_br_rdonly(br)) -+ a->mvdown.flags -+ |= AUFS_MVDOWN_ROLOWER_R; -+ return bindex; -+ } -+ } -+ } -+ -+ return -1; -+} -+ -+/* make sure the file is idle */ -+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ int err, plinked; -+ -+ err = 0; -+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK); -+ if (au_dbstart(a->dentry) == a->mvd_bsrc -+ && d_count(a->dentry) == 1 -+ && atomic_read(&a->inode->i_count) == 1 -+ /* && a->mvd_h_src_inode->i_nlink == 1 */ -+ && (!plinked || !au_plink_test(a->inode)) -+ && a->inode->i_nlink == 1) -+ goto out; -+ -+ err = -EBUSY; -+ AU_MVD_PR(dmsg, -+ "b%d, d{b%d, c%u?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n", -+ a->mvd_bsrc, au_dbstart(a->dentry), d_count(a->dentry), -+ atomic_read(&a->inode->i_count), a->inode->i_nlink, -+ a->mvd_h_src_inode->i_nlink, -+ plinked, plinked ? au_plink_test(a->inode) : 0); -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* make sure the parent dir is fine */ -+static int au_mvd_args_parent(const unsigned char dmsg, -+ struct au_mvd_args *a) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ -+ err = 0; -+ if (unlikely(au_alive_dir(a->parent))) { -+ err = -ENOENT; -+ AU_MVD_PR(dmsg, "parent dir is dead\n"); -+ goto out; -+ } -+ -+ a->bopq = au_dbdiropq(a->parent); -+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst); -+ AuDbg("b%d\n", bindex); -+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst) -+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) { -+ err = -EINVAL; -+ a->mvd_errno = EAU_MVDOWN_OPAQUE; -+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n", -+ a->bopq, a->mvd_bdst); -+ } -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static int au_mvd_args_intermediate(const unsigned char dmsg, -+ struct au_mvd_args *a) -+{ -+ int err; -+ struct au_dinfo *dinfo, *tmp; -+ -+ /* lookup the next lower positive entry */ -+ err = -ENOMEM; -+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP); -+ if (unlikely(!tmp)) -+ goto out; -+ -+ a->bfound = -1; -+ a->bwh = -1; -+ dinfo = au_di(a->dentry); -+ au_di_cp(tmp, dinfo); -+ au_di_swap(tmp, dinfo); -+ -+ /* returns the number of positive dentries */ -+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1, /*type*/0); -+ if (!err) -+ a->bwh = au_dbwh(a->dentry); -+ else if (err > 0) -+ a->bfound = au_dbstart(a->dentry); -+ -+ au_di_swap(tmp, dinfo); -+ au_rw_write_unlock(&tmp->di_rwsem); -+ au_di_free(tmp); -+ if (unlikely(err < 0)) -+ AU_MVD_PR(dmsg, "failed look-up lower\n"); -+ -+ /* -+ * here, we have these cases. -+ * bfound == -1 -+ * no positive dentry under bsrc. there are more sub-cases. -+ * bwh < 0 -+ * there no whiteout, we can safely move-down. -+ * bwh <= bsrc -+ * impossible -+ * bsrc < bwh && bwh < bdst -+ * there is a whiteout on RO branch. cannot proceed. -+ * bwh == bdst -+ * there is a whiteout on the RW target branch. it should -+ * be removed. -+ * bdst < bwh -+ * there is a whiteout somewhere unrelated branch. -+ * -1 < bfound && bfound <= bsrc -+ * impossible. -+ * bfound < bdst -+ * found, but it is on RO branch between bsrc and bdst. cannot -+ * proceed. -+ * bfound == bdst -+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return -+ * error. -+ * bdst < bfound -+ * found, after we create the file on bdst, it will be hidden. -+ */ -+ -+ AuDebugOn(a->bfound == -1 -+ && a->bwh != -1 -+ && a->bwh <= a->mvd_bsrc); -+ AuDebugOn(-1 < a->bfound -+ && a->bfound <= a->mvd_bsrc); -+ -+ err = -EINVAL; -+ if (a->bfound == -1 -+ && a->mvd_bsrc < a->bwh -+ && a->bwh != -1 -+ && a->bwh < a->mvd_bdst) { -+ a->mvd_errno = EAU_MVDOWN_WHITEOUT; -+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n", -+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh); -+ goto out; -+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) { -+ a->mvd_errno = EAU_MVDOWN_UPPER; -+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n", -+ a->mvd_bdst, a->bfound); -+ goto out; -+ } -+ -+ err = 0; /* success */ -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ int err; -+ -+ err = 0; -+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER) -+ && a->bfound == a->mvd_bdst) -+ err = -EEXIST; -+ AuTraceErr(err); -+ return err; -+} -+ -+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a) -+{ -+ int err; -+ struct au_branch *br; -+ -+ err = -EISDIR; -+ if (unlikely(S_ISDIR(a->inode->i_mode))) -+ goto out; -+ -+ err = -EINVAL; -+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER)) -+ a->mvd_bsrc = au_ibstart(a->inode); -+ else { -+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid); -+ if (unlikely(a->mvd_bsrc < 0 -+ || (a->mvd_bsrc < au_dbstart(a->dentry) -+ || au_dbend(a->dentry) < a->mvd_bsrc -+ || !au_h_dptr(a->dentry, a->mvd_bsrc)) -+ || (a->mvd_bsrc < au_ibstart(a->inode) -+ || au_ibend(a->inode) < a->mvd_bsrc -+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) { -+ a->mvd_errno = EAU_MVDOWN_NOUPPER; -+ AU_MVD_PR(dmsg, "no upper\n"); -+ goto out; -+ } -+ } -+ if (unlikely(a->mvd_bsrc == au_sbend(a->sb))) { -+ a->mvd_errno = EAU_MVDOWN_BOTTOM; -+ AU_MVD_PR(dmsg, "on the bottom\n"); -+ goto out; -+ } -+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc); -+ br = au_sbr(a->sb, a->mvd_bsrc); -+ err = au_br_rdonly(br); -+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) { -+ if (unlikely(err)) -+ goto out; -+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode) -+ || IS_APPEND(a->mvd_h_src_inode))) { -+ if (err) -+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R; -+ /* go on */ -+ } else -+ goto out; -+ -+ err = -EINVAL; -+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) { -+ a->mvd_bdst = find_lower_writable(a); -+ if (unlikely(a->mvd_bdst < 0)) { -+ a->mvd_errno = EAU_MVDOWN_BOTTOM; -+ AU_MVD_PR(dmsg, "no writable lower branch\n"); -+ goto out; -+ } -+ } else { -+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid); -+ if (unlikely(a->mvd_bdst < 0 -+ || au_sbend(a->sb) < a->mvd_bdst)) { -+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR; -+ AU_MVD_PR(dmsg, "no lower brid\n"); -+ goto out; -+ } -+ } -+ -+ err = au_mvd_args_busy(dmsg, a); -+ if (!err) -+ err = au_mvd_args_parent(dmsg, a); -+ if (!err) -+ err = au_mvd_args_intermediate(dmsg, a); -+ if (!err) -+ err = au_mvd_args_exist(dmsg, a); -+ if (!err) -+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst); -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg) -+{ -+ int err, e; -+ unsigned char dmsg; -+ struct au_mvd_args *args; -+ -+ err = -EPERM; -+ if (unlikely(!capable(CAP_SYS_ADMIN))) -+ goto out; -+ -+ WARN_ONCE(1, "move-down is still testing...\n"); -+ -+ err = -ENOMEM; -+ args = kmalloc(sizeof(*args), GFP_NOFS); -+ if (unlikely(!args)) -+ goto out; -+ -+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown)); -+ if (!err) -+ err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg)); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ goto out_free; -+ } -+ AuDbg("flags 0x%x\n", args->mvdown.flags); -+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R); -+ args->mvdown.au_errno = 0; -+ args->dentry = dentry; -+ args->inode = dentry->d_inode; -+ args->sb = dentry->d_sb; -+ -+ err = -ENOENT; -+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG); -+ args->parent = dget_parent(dentry); -+ args->dir = args->parent->d_inode; -+ mutex_lock_nested(&args->dir->i_mutex, I_MUTEX_PARENT); -+ dput(args->parent); -+ if (unlikely(args->parent != dentry->d_parent)) { -+ AU_MVD_PR(dmsg, "parent dir is moved\n"); -+ goto out_dir; -+ } -+ -+ mutex_lock_nested(&args->inode->i_mutex, I_MUTEX_CHILD); -+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH); -+ if (unlikely(err)) -+ goto out_inode; -+ -+ di_write_lock_parent(args->parent); -+ err = au_mvd_args(dmsg, args); -+ if (unlikely(err)) -+ goto out_parent; -+ -+ AuDbgDentry(dentry); -+ AuDbgInode(args->inode); -+ err = au_do_mvdown(dmsg, args); -+ if (unlikely(err)) -+ goto out_parent; -+ AuDbgDentry(dentry); -+ AuDbgInode(args->inode); -+ -+ au_cpup_attr_timesizes(args->dir); -+ au_cpup_attr_timesizes(args->inode); -+ au_cpup_igen(args->inode, au_h_iptr(args->inode, args->mvd_bdst)); -+ /* au_digen_dec(dentry); */ -+ -+out_parent: -+ di_write_unlock(args->parent); -+ aufs_read_unlock(dentry, AuLock_DW); -+out_inode: -+ mutex_unlock(&args->inode->i_mutex); -+out_dir: -+ mutex_unlock(&args->dir->i_mutex); -+out_free: -+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown)); -+ if (unlikely(e)) -+ err = -EFAULT; -+ kfree(args); -+out: -+ AuTraceErr(err); -+ return err; -+} -diff --git fs/aufs/opts.c fs/aufs/opts.c -new file mode 100644 -index 0000000..ce6a7d5 ---- /dev/null -+++ fs/aufs/opts.c -@@ -0,0 +1,1737 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * mount options/flags -+ */ -+ -+#include -+#include /* a distribution requires */ -+#include -+#include "aufs.h" -+ -+/* ---------------------------------------------------------------------- */ -+ -+enum { -+ Opt_br, -+ Opt_add, Opt_del, Opt_mod, Opt_reorder, Opt_append, Opt_prepend, -+ Opt_idel, Opt_imod, Opt_ireorder, -+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash, Opt_rendir, -+ Opt_rdblk_def, Opt_rdhash_def, -+ Opt_xino, Opt_zxino, Opt_noxino, -+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino, -+ Opt_trunc_xino_path, Opt_itrunc_xino, -+ Opt_trunc_xib, Opt_notrunc_xib, -+ Opt_shwh, Opt_noshwh, -+ Opt_plink, Opt_noplink, Opt_list_plink, -+ Opt_udba, -+ Opt_dio, Opt_nodio, -+ /* Opt_lock, Opt_unlock, */ -+ Opt_cmd, Opt_cmd_args, -+ Opt_diropq_a, Opt_diropq_w, -+ Opt_warn_perm, Opt_nowarn_perm, -+ Opt_wbr_copyup, Opt_wbr_create, -+ Opt_refrof, Opt_norefrof, -+ Opt_verbose, Opt_noverbose, -+ Opt_sum, Opt_nosum, Opt_wsum, -+ Opt_dirperm1, Opt_nodirperm1, -+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err -+}; -+ -+static match_table_t options = { -+ {Opt_br, "br=%s"}, -+ {Opt_br, "br:%s"}, -+ -+ {Opt_add, "add=%d:%s"}, -+ {Opt_add, "add:%d:%s"}, -+ {Opt_add, "ins=%d:%s"}, -+ {Opt_add, "ins:%d:%s"}, -+ {Opt_append, "append=%s"}, -+ {Opt_append, "append:%s"}, -+ {Opt_prepend, "prepend=%s"}, -+ {Opt_prepend, "prepend:%s"}, -+ -+ {Opt_del, "del=%s"}, -+ {Opt_del, "del:%s"}, -+ /* {Opt_idel, "idel:%d"}, */ -+ {Opt_mod, "mod=%s"}, -+ {Opt_mod, "mod:%s"}, -+ /* {Opt_imod, "imod:%d:%s"}, */ -+ -+ {Opt_dirwh, "dirwh=%d"}, -+ -+ {Opt_xino, "xino=%s"}, -+ {Opt_noxino, "noxino"}, -+ {Opt_trunc_xino, "trunc_xino"}, -+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"}, -+ {Opt_notrunc_xino, "notrunc_xino"}, -+ {Opt_trunc_xino_path, "trunc_xino=%s"}, -+ {Opt_itrunc_xino, "itrunc_xino=%d"}, -+ /* {Opt_zxino, "zxino=%s"}, */ -+ {Opt_trunc_xib, "trunc_xib"}, -+ {Opt_notrunc_xib, "notrunc_xib"}, -+ -+#ifdef CONFIG_PROC_FS -+ {Opt_plink, "plink"}, -+#else -+ {Opt_ignore_silent, "plink"}, -+#endif -+ -+ {Opt_noplink, "noplink"}, -+ -+#ifdef CONFIG_AUFS_DEBUG -+ {Opt_list_plink, "list_plink"}, -+#endif -+ -+ {Opt_udba, "udba=%s"}, -+ -+ {Opt_dio, "dio"}, -+ {Opt_nodio, "nodio"}, -+ -+ {Opt_diropq_a, "diropq=always"}, -+ {Opt_diropq_a, "diropq=a"}, -+ {Opt_diropq_w, "diropq=whiteouted"}, -+ {Opt_diropq_w, "diropq=w"}, -+ -+ {Opt_warn_perm, "warn_perm"}, -+ {Opt_nowarn_perm, "nowarn_perm"}, -+ -+ /* keep them temporary */ -+ {Opt_ignore_silent, "nodlgt"}, -+ {Opt_ignore_silent, "clean_plink"}, -+ -+#ifdef CONFIG_AUFS_SHWH -+ {Opt_shwh, "shwh"}, -+#endif -+ {Opt_noshwh, "noshwh"}, -+ -+ {Opt_dirperm1, "dirperm1"}, -+ {Opt_nodirperm1, "nodirperm1"}, -+ -+ {Opt_rendir, "rendir=%d"}, -+ -+ {Opt_refrof, "refrof"}, -+ {Opt_norefrof, "norefrof"}, -+ -+ {Opt_verbose, "verbose"}, -+ {Opt_verbose, "v"}, -+ {Opt_noverbose, "noverbose"}, -+ {Opt_noverbose, "quiet"}, -+ {Opt_noverbose, "q"}, -+ {Opt_noverbose, "silent"}, -+ -+ {Opt_sum, "sum"}, -+ {Opt_nosum, "nosum"}, -+ {Opt_wsum, "wsum"}, -+ -+ {Opt_rdcache, "rdcache=%d"}, -+ {Opt_rdblk, "rdblk=%d"}, -+ {Opt_rdblk_def, "rdblk=def"}, -+ {Opt_rdhash, "rdhash=%d"}, -+ {Opt_rdhash_def, "rdhash=def"}, -+ -+ {Opt_wbr_create, "create=%s"}, -+ {Opt_wbr_create, "create_policy=%s"}, -+ {Opt_wbr_copyup, "cpup=%s"}, -+ {Opt_wbr_copyup, "copyup=%s"}, -+ {Opt_wbr_copyup, "copyup_policy=%s"}, -+ -+ /* internal use for the scripts */ -+ {Opt_ignore_silent, "si=%s"}, -+ -+ {Opt_br, "dirs=%s"}, -+ {Opt_ignore, "debug=%d"}, -+ {Opt_ignore, "delete=whiteout"}, -+ {Opt_ignore, "delete=all"}, -+ {Opt_ignore, "imap=%s"}, -+ -+ /* temporary workaround, due to old mount(8)? */ -+ {Opt_ignore_silent, "relatime"}, -+ -+ {Opt_err, NULL} -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+static const char *au_parser_pattern(int val, match_table_t tbl) -+{ -+ struct match_token *p; -+ -+ p = tbl; -+ while (p->pattern) { -+ if (p->token == val) -+ return p->pattern; -+ p++; -+ } -+ BUG(); -+ return "??"; -+} -+ -+static const char *au_optstr(int *val, match_table_t tbl) -+{ -+ struct match_token *p; -+ int v; -+ -+ v = *val; -+ p = tbl; -+ while (p->token) { -+ if ((v & p->token) == p->token) { -+ *val &= ~p->token; -+ return p->pattern; -+ } -+ p++; -+ } -+ return NULL; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static match_table_t brperm = { -+ {AuBrPerm_RO, AUFS_BRPERM_RO}, -+ {AuBrPerm_RR, AUFS_BRPERM_RR}, -+ {AuBrPerm_RW, AUFS_BRPERM_RW}, -+ {0, NULL} -+}; -+ -+static match_table_t brattr = { -+ /* general */ -+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG}, -+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL}, -+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN}, -+ -+ /* ro/rr branch */ -+ {AuBrRAttr_WH, AUFS_BRRATTR_WH}, -+ -+ /* rw branch */ -+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO}, -+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH}, -+ -+ {0, NULL} -+}; -+ -+static int br_attr_val(char *str, match_table_t table, substring_t args[]) -+{ -+ int attr, v; -+ char *p; -+ -+ attr = 0; -+ do { -+ p = strchr(str, '+'); -+ if (p) -+ *p = 0; -+ v = match_token(str, table, args); -+ if (v) { -+ if (v & AuBrAttr_CMOO_Mask) -+ attr &= ~AuBrAttr_CMOO_Mask; -+ attr |= v; -+ } else { -+ if (p) -+ *p = '+'; -+ pr_warn("ignored branch attribute %s\n", str); -+ break; -+ } -+ if (p) -+ str = p + 1; -+ } while (p); -+ -+ return attr; -+} -+ -+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm) -+{ -+ int sz; -+ const char *p; -+ char *q; -+ -+ sz = 0; -+ q = str->a; -+ *q = 0; -+ p = au_optstr(&perm, brattr); -+ if (p) { -+ sz = strlen(p); -+ memcpy(q, p, sz + 1); -+ q += sz; -+ } else -+ goto out; -+ -+ do { -+ p = au_optstr(&perm, brattr); -+ if (p) { -+ *q++ = '+'; -+ sz = strlen(p); -+ memcpy(q, p, sz + 1); -+ q += sz; -+ } -+ } while (p); -+ -+out: -+ return sz; -+} -+ -+static int noinline_for_stack br_perm_val(char *perm) -+{ -+ int val, bad, sz; -+ char *p; -+ substring_t args[MAX_OPT_ARGS]; -+ au_br_perm_str_t attr; -+ -+ p = strchr(perm, '+'); -+ if (p) -+ *p = 0; -+ val = match_token(perm, brperm, args); -+ if (!val) { -+ if (p) -+ *p = '+'; -+ pr_warn("ignored branch permission %s\n", perm); -+ val = AuBrPerm_RO; -+ goto out; -+ } -+ if (!p) -+ goto out; -+ -+ val |= br_attr_val(p + 1, brattr, args); -+ -+ bad = 0; -+ switch (val & AuBrPerm_Mask) { -+ case AuBrPerm_RO: -+ case AuBrPerm_RR: -+ bad = val & AuBrWAttr_Mask; -+ val &= ~AuBrWAttr_Mask; -+ break; -+ case AuBrPerm_RW: -+ bad = val & AuBrRAttr_Mask; -+ val &= ~AuBrRAttr_Mask; -+ break; -+ } -+ if (unlikely(bad)) { -+ sz = au_do_optstr_br_attr(&attr, bad); -+ AuDebugOn(!sz); -+ pr_warn("ignored branch attribute %s\n", attr.a); -+ } -+ -+out: -+ return val; -+} -+ -+void au_optstr_br_perm(au_br_perm_str_t *str, int perm) -+{ -+ au_br_perm_str_t attr; -+ const char *p; -+ char *q; -+ int sz; -+ -+ q = str->a; -+ p = au_optstr(&perm, brperm); -+ AuDebugOn(!p || !*p); -+ sz = strlen(p); -+ memcpy(q, p, sz + 1); -+ q += sz; -+ -+ sz = au_do_optstr_br_attr(&attr, perm); -+ if (sz) { -+ *q++ = '+'; -+ memcpy(q, attr.a, sz + 1); -+ } -+ -+ AuDebugOn(strlen(str->a) >= sizeof(str->a)); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static match_table_t udbalevel = { -+ {AuOpt_UDBA_REVAL, "reval"}, -+ {AuOpt_UDBA_NONE, "none"}, -+#ifdef CONFIG_AUFS_HNOTIFY -+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */ -+#ifdef CONFIG_AUFS_HFSNOTIFY -+ {AuOpt_UDBA_HNOTIFY, "fsnotify"}, -+#endif -+#endif -+ {-1, NULL} -+}; -+ -+static int noinline_for_stack udba_val(char *str) -+{ -+ substring_t args[MAX_OPT_ARGS]; -+ -+ return match_token(str, udbalevel, args); -+} -+ -+const char *au_optstr_udba(int udba) -+{ -+ return au_parser_pattern(udba, udbalevel); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static match_table_t au_wbr_create_policy = { -+ {AuWbrCreate_TDP, "tdp"}, -+ {AuWbrCreate_TDP, "top-down-parent"}, -+ {AuWbrCreate_RR, "rr"}, -+ {AuWbrCreate_RR, "round-robin"}, -+ {AuWbrCreate_MFS, "mfs"}, -+ {AuWbrCreate_MFS, "most-free-space"}, -+ {AuWbrCreate_MFSV, "mfs:%d"}, -+ {AuWbrCreate_MFSV, "most-free-space:%d"}, -+ -+ {AuWbrCreate_MFSRR, "mfsrr:%d"}, -+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"}, -+ {AuWbrCreate_PMFS, "pmfs"}, -+ {AuWbrCreate_PMFSV, "pmfs:%d"}, -+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"}, -+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"}, -+ -+ {-1, NULL} -+}; -+ -+/* -+ * cf. linux/lib/parser.c and cmdline.c -+ * gave up calling memparse() since it uses simple_strtoull() instead of -+ * kstrto...(). -+ */ -+static int noinline_for_stack -+au_match_ull(substring_t *s, unsigned long long *result) -+{ -+ int err; -+ unsigned int len; -+ char a[32]; -+ -+ err = -ERANGE; -+ len = s->to - s->from; -+ if (len + 1 <= sizeof(a)) { -+ memcpy(a, s->from, len); -+ a[len] = '\0'; -+ err = kstrtoull(a, 0, result); -+ } -+ return err; -+} -+ -+static int au_wbr_mfs_wmark(substring_t *arg, char *str, -+ struct au_opt_wbr_create *create) -+{ -+ int err; -+ unsigned long long ull; -+ -+ err = 0; -+ if (!au_match_ull(arg, &ull)) -+ create->mfsrr_watermark = ull; -+ else { -+ pr_err("bad integer in %s\n", str); -+ err = -EINVAL; -+ } -+ -+ return err; -+} -+ -+static int au_wbr_mfs_sec(substring_t *arg, char *str, -+ struct au_opt_wbr_create *create) -+{ -+ int n, err; -+ -+ err = 0; -+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC) -+ create->mfs_second = n; -+ else { -+ pr_err("bad integer in %s\n", str); -+ err = -EINVAL; -+ } -+ -+ return err; -+} -+ -+static int noinline_for_stack -+au_wbr_create_val(char *str, struct au_opt_wbr_create *create) -+{ -+ int err, e; -+ substring_t args[MAX_OPT_ARGS]; -+ -+ err = match_token(str, au_wbr_create_policy, args); -+ create->wbr_create = err; -+ switch (err) { -+ case AuWbrCreate_MFSRRV: -+ case AuWbrCreate_PMFSRRV: -+ e = au_wbr_mfs_wmark(&args[0], str, create); -+ if (!e) -+ e = au_wbr_mfs_sec(&args[1], str, create); -+ if (unlikely(e)) -+ err = e; -+ break; -+ case AuWbrCreate_MFSRR: -+ case AuWbrCreate_PMFSRR: -+ e = au_wbr_mfs_wmark(&args[0], str, create); -+ if (unlikely(e)) { -+ err = e; -+ break; -+ } -+ /*FALLTHROUGH*/ -+ case AuWbrCreate_MFS: -+ case AuWbrCreate_PMFS: -+ create->mfs_second = AUFS_MFS_DEF_SEC; -+ break; -+ case AuWbrCreate_MFSV: -+ case AuWbrCreate_PMFSV: -+ e = au_wbr_mfs_sec(&args[0], str, create); -+ if (unlikely(e)) -+ err = e; -+ break; -+ } -+ -+ return err; -+} -+ -+const char *au_optstr_wbr_create(int wbr_create) -+{ -+ return au_parser_pattern(wbr_create, au_wbr_create_policy); -+} -+ -+static match_table_t au_wbr_copyup_policy = { -+ {AuWbrCopyup_TDP, "tdp"}, -+ {AuWbrCopyup_TDP, "top-down-parent"}, -+ {AuWbrCopyup_BUP, "bup"}, -+ {AuWbrCopyup_BUP, "bottom-up-parent"}, -+ {AuWbrCopyup_BU, "bu"}, -+ {AuWbrCopyup_BU, "bottom-up"}, -+ {-1, NULL} -+}; -+ -+static int noinline_for_stack au_wbr_copyup_val(char *str) -+{ -+ substring_t args[MAX_OPT_ARGS]; -+ -+ return match_token(str, au_wbr_copyup_policy, args); -+} -+ -+const char *au_optstr_wbr_copyup(int wbr_copyup) -+{ -+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; -+ -+static void dump_opts(struct au_opts *opts) -+{ -+#ifdef CONFIG_AUFS_DEBUG -+ /* reduce stack space */ -+ union { -+ struct au_opt_add *add; -+ struct au_opt_del *del; -+ struct au_opt_mod *mod; -+ struct au_opt_xino *xino; -+ struct au_opt_xino_itrunc *xino_itrunc; -+ struct au_opt_wbr_create *create; -+ } u; -+ struct au_opt *opt; -+ -+ opt = opts->opt; -+ while (opt->type != Opt_tail) { -+ switch (opt->type) { -+ case Opt_add: -+ u.add = &opt->add; -+ AuDbg("add {b%d, %s, 0x%x, %p}\n", -+ u.add->bindex, u.add->pathname, u.add->perm, -+ u.add->path.dentry); -+ break; -+ case Opt_del: -+ case Opt_idel: -+ u.del = &opt->del; -+ AuDbg("del {%s, %p}\n", -+ u.del->pathname, u.del->h_path.dentry); -+ break; -+ case Opt_mod: -+ case Opt_imod: -+ u.mod = &opt->mod; -+ AuDbg("mod {%s, 0x%x, %p}\n", -+ u.mod->path, u.mod->perm, u.mod->h_root); -+ break; -+ case Opt_append: -+ u.add = &opt->add; -+ AuDbg("append {b%d, %s, 0x%x, %p}\n", -+ u.add->bindex, u.add->pathname, u.add->perm, -+ u.add->path.dentry); -+ break; -+ case Opt_prepend: -+ u.add = &opt->add; -+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n", -+ u.add->bindex, u.add->pathname, u.add->perm, -+ u.add->path.dentry); -+ break; -+ case Opt_dirwh: -+ AuDbg("dirwh %d\n", opt->dirwh); -+ break; -+ case Opt_rdcache: -+ AuDbg("rdcache %d\n", opt->rdcache); -+ break; -+ case Opt_rdblk: -+ AuDbg("rdblk %u\n", opt->rdblk); -+ break; -+ case Opt_rdblk_def: -+ AuDbg("rdblk_def\n"); -+ break; -+ case Opt_rdhash: -+ AuDbg("rdhash %u\n", opt->rdhash); -+ break; -+ case Opt_rdhash_def: -+ AuDbg("rdhash_def\n"); -+ break; -+ case Opt_xino: -+ u.xino = &opt->xino; -+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file); -+ break; -+ case Opt_trunc_xino: -+ AuLabel(trunc_xino); -+ break; -+ case Opt_notrunc_xino: -+ AuLabel(notrunc_xino); -+ break; -+ case Opt_trunc_xino_path: -+ case Opt_itrunc_xino: -+ u.xino_itrunc = &opt->xino_itrunc; -+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex); -+ break; -+ -+ case Opt_noxino: -+ AuLabel(noxino); -+ break; -+ case Opt_trunc_xib: -+ AuLabel(trunc_xib); -+ break; -+ case Opt_notrunc_xib: -+ AuLabel(notrunc_xib); -+ break; -+ case Opt_shwh: -+ AuLabel(shwh); -+ break; -+ case Opt_noshwh: -+ AuLabel(noshwh); -+ break; -+ case Opt_dirperm1: -+ AuLabel(dirperm1); -+ break; -+ case Opt_nodirperm1: -+ AuLabel(nodirperm1); -+ break; -+ case Opt_plink: -+ AuLabel(plink); -+ break; -+ case Opt_noplink: -+ AuLabel(noplink); -+ break; -+ case Opt_list_plink: -+ AuLabel(list_plink); -+ break; -+ case Opt_udba: -+ AuDbg("udba %d, %s\n", -+ opt->udba, au_optstr_udba(opt->udba)); -+ break; -+ case Opt_dio: -+ AuLabel(dio); -+ break; -+ case Opt_nodio: -+ AuLabel(nodio); -+ break; -+ case Opt_diropq_a: -+ AuLabel(diropq_a); -+ break; -+ case Opt_diropq_w: -+ AuLabel(diropq_w); -+ break; -+ case Opt_warn_perm: -+ AuLabel(warn_perm); -+ break; -+ case Opt_nowarn_perm: -+ AuLabel(nowarn_perm); -+ break; -+ case Opt_refrof: -+ AuLabel(refrof); -+ break; -+ case Opt_norefrof: -+ AuLabel(norefrof); -+ break; -+ case Opt_verbose: -+ AuLabel(verbose); -+ break; -+ case Opt_noverbose: -+ AuLabel(noverbose); -+ break; -+ case Opt_sum: -+ AuLabel(sum); -+ break; -+ case Opt_nosum: -+ AuLabel(nosum); -+ break; -+ case Opt_wsum: -+ AuLabel(wsum); -+ break; -+ case Opt_wbr_create: -+ u.create = &opt->wbr_create; -+ AuDbg("create %d, %s\n", u.create->wbr_create, -+ au_optstr_wbr_create(u.create->wbr_create)); -+ switch (u.create->wbr_create) { -+ case AuWbrCreate_MFSV: -+ case AuWbrCreate_PMFSV: -+ AuDbg("%d sec\n", u.create->mfs_second); -+ break; -+ case AuWbrCreate_MFSRR: -+ AuDbg("%llu watermark\n", -+ u.create->mfsrr_watermark); -+ break; -+ case AuWbrCreate_MFSRRV: -+ case AuWbrCreate_PMFSRRV: -+ AuDbg("%llu watermark, %d sec\n", -+ u.create->mfsrr_watermark, -+ u.create->mfs_second); -+ break; -+ } -+ break; -+ case Opt_wbr_copyup: -+ AuDbg("copyup %d, %s\n", opt->wbr_copyup, -+ au_optstr_wbr_copyup(opt->wbr_copyup)); -+ break; -+ default: -+ BUG(); -+ } -+ opt++; -+ } -+#endif -+} -+ -+void au_opts_free(struct au_opts *opts) -+{ -+ struct au_opt *opt; -+ -+ opt = opts->opt; -+ while (opt->type != Opt_tail) { -+ switch (opt->type) { -+ case Opt_add: -+ case Opt_append: -+ case Opt_prepend: -+ path_put(&opt->add.path); -+ break; -+ case Opt_del: -+ case Opt_idel: -+ path_put(&opt->del.h_path); -+ break; -+ case Opt_mod: -+ case Opt_imod: -+ dput(opt->mod.h_root); -+ break; -+ case Opt_xino: -+ fput(opt->xino.file); -+ break; -+ } -+ opt++; -+ } -+} -+ -+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags, -+ aufs_bindex_t bindex) -+{ -+ int err; -+ struct au_opt_add *add = &opt->add; -+ char *p; -+ -+ add->bindex = bindex; -+ add->perm = AuBrPerm_RO; -+ add->pathname = opt_str; -+ p = strchr(opt_str, '='); -+ if (p) { -+ *p++ = 0; -+ if (*p) -+ add->perm = br_perm_val(p); -+ } -+ -+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path); -+ if (!err) { -+ if (!p) { -+ add->perm = AuBrPerm_RO; -+ if (au_test_fs_rr(add->path.dentry->d_sb)) -+ add->perm = AuBrPerm_RR; -+ else if (!bindex && !(sb_flags & MS_RDONLY)) -+ add->perm = AuBrPerm_RW; -+ } -+ opt->type = Opt_add; -+ goto out; -+ } -+ pr_err("lookup failed %s (%d)\n", add->pathname, err); -+ err = -EINVAL; -+ -+out: -+ return err; -+} -+ -+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[]) -+{ -+ int err; -+ -+ del->pathname = args[0].from; -+ AuDbg("del path %s\n", del->pathname); -+ -+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path); -+ if (unlikely(err)) -+ pr_err("lookup failed %s (%d)\n", del->pathname, err); -+ -+ return err; -+} -+ -+#if 0 /* reserved for future use */ -+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex, -+ struct au_opt_del *del, substring_t args[]) -+{ -+ int err; -+ struct dentry *root; -+ -+ err = -EINVAL; -+ root = sb->s_root; -+ aufs_read_lock(root, AuLock_FLUSH); -+ if (bindex < 0 || au_sbend(sb) < bindex) { -+ pr_err("out of bounds, %d\n", bindex); -+ goto out; -+ } -+ -+ err = 0; -+ del->h_path.dentry = dget(au_h_dptr(root, bindex)); -+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex)); -+ -+out: -+ aufs_read_unlock(root, !AuLock_IR); -+ return err; -+} -+#endif -+ -+static int noinline_for_stack -+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[]) -+{ -+ int err; -+ struct path path; -+ char *p; -+ -+ err = -EINVAL; -+ mod->path = args[0].from; -+ p = strchr(mod->path, '='); -+ if (unlikely(!p)) { -+ pr_err("no permssion %s\n", args[0].from); -+ goto out; -+ } -+ -+ *p++ = 0; -+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path); -+ if (unlikely(err)) { -+ pr_err("lookup failed %s (%d)\n", mod->path, err); -+ goto out; -+ } -+ -+ mod->perm = br_perm_val(p); -+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p); -+ mod->h_root = dget(path.dentry); -+ path_put(&path); -+ -+out: -+ return err; -+} -+ -+#if 0 /* reserved for future use */ -+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex, -+ struct au_opt_mod *mod, substring_t args[]) -+{ -+ int err; -+ struct dentry *root; -+ -+ err = -EINVAL; -+ root = sb->s_root; -+ aufs_read_lock(root, AuLock_FLUSH); -+ if (bindex < 0 || au_sbend(sb) < bindex) { -+ pr_err("out of bounds, %d\n", bindex); -+ goto out; -+ } -+ -+ err = 0; -+ mod->perm = br_perm_val(args[1].from); -+ AuDbg("mod path %s, perm 0x%x, %s\n", -+ mod->path, mod->perm, args[1].from); -+ mod->h_root = dget(au_h_dptr(root, bindex)); -+ -+out: -+ aufs_read_unlock(root, !AuLock_IR); -+ return err; -+} -+#endif -+ -+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino, -+ substring_t args[]) -+{ -+ int err; -+ struct file *file; -+ -+ file = au_xino_create(sb, args[0].from, /*silent*/0); -+ err = PTR_ERR(file); -+ if (IS_ERR(file)) -+ goto out; -+ -+ err = -EINVAL; -+ if (unlikely(file->f_dentry->d_sb == sb)) { -+ fput(file); -+ pr_err("%s must be outside\n", args[0].from); -+ goto out; -+ } -+ -+ err = 0; -+ xino->file = file; -+ xino->path = args[0].from; -+ -+out: -+ return err; -+} -+ -+static int noinline_for_stack -+au_opts_parse_xino_itrunc_path(struct super_block *sb, -+ struct au_opt_xino_itrunc *xino_itrunc, -+ substring_t args[]) -+{ -+ int err; -+ aufs_bindex_t bend, bindex; -+ struct path path; -+ struct dentry *root; -+ -+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path); -+ if (unlikely(err)) { -+ pr_err("lookup failed %s (%d)\n", args[0].from, err); -+ goto out; -+ } -+ -+ xino_itrunc->bindex = -1; -+ root = sb->s_root; -+ aufs_read_lock(root, AuLock_FLUSH); -+ bend = au_sbend(sb); -+ for (bindex = 0; bindex <= bend; bindex++) { -+ if (au_h_dptr(root, bindex) == path.dentry) { -+ xino_itrunc->bindex = bindex; -+ break; -+ } -+ } -+ aufs_read_unlock(root, !AuLock_IR); -+ path_put(&path); -+ -+ if (unlikely(xino_itrunc->bindex < 0)) { -+ pr_err("no such branch %s\n", args[0].from); -+ err = -EINVAL; -+ } -+ -+out: -+ return err; -+} -+ -+/* called without aufs lock */ -+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts) -+{ -+ int err, n, token; -+ aufs_bindex_t bindex; -+ unsigned char skipped; -+ struct dentry *root; -+ struct au_opt *opt, *opt_tail; -+ char *opt_str; -+ /* reduce the stack space */ -+ union { -+ struct au_opt_xino_itrunc *xino_itrunc; -+ struct au_opt_wbr_create *create; -+ } u; -+ struct { -+ substring_t args[MAX_OPT_ARGS]; -+ } *a; -+ -+ err = -ENOMEM; -+ a = kmalloc(sizeof(*a), GFP_NOFS); -+ if (unlikely(!a)) -+ goto out; -+ -+ root = sb->s_root; -+ err = 0; -+ bindex = 0; -+ opt = opts->opt; -+ opt_tail = opt + opts->max_opt - 1; -+ opt->type = Opt_tail; -+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) { -+ err = -EINVAL; -+ skipped = 0; -+ token = match_token(opt_str, options, a->args); -+ switch (token) { -+ case Opt_br: -+ err = 0; -+ while (!err && (opt_str = strsep(&a->args[0].from, ":")) -+ && *opt_str) { -+ err = opt_add(opt, opt_str, opts->sb_flags, -+ bindex++); -+ if (unlikely(!err && ++opt > opt_tail)) { -+ err = -E2BIG; -+ break; -+ } -+ opt->type = Opt_tail; -+ skipped = 1; -+ } -+ break; -+ case Opt_add: -+ if (unlikely(match_int(&a->args[0], &n))) { -+ pr_err("bad integer in %s\n", opt_str); -+ break; -+ } -+ bindex = n; -+ err = opt_add(opt, a->args[1].from, opts->sb_flags, -+ bindex); -+ if (!err) -+ opt->type = token; -+ break; -+ case Opt_append: -+ err = opt_add(opt, a->args[0].from, opts->sb_flags, -+ /*dummy bindex*/1); -+ if (!err) -+ opt->type = token; -+ break; -+ case Opt_prepend: -+ err = opt_add(opt, a->args[0].from, opts->sb_flags, -+ /*bindex*/0); -+ if (!err) -+ opt->type = token; -+ break; -+ case Opt_del: -+ err = au_opts_parse_del(&opt->del, a->args); -+ if (!err) -+ opt->type = token; -+ break; -+#if 0 /* reserved for future use */ -+ case Opt_idel: -+ del->pathname = "(indexed)"; -+ if (unlikely(match_int(&args[0], &n))) { -+ pr_err("bad integer in %s\n", opt_str); -+ break; -+ } -+ err = au_opts_parse_idel(sb, n, &opt->del, a->args); -+ if (!err) -+ opt->type = token; -+ break; -+#endif -+ case Opt_mod: -+ err = au_opts_parse_mod(&opt->mod, a->args); -+ if (!err) -+ opt->type = token; -+ break; -+#ifdef IMOD /* reserved for future use */ -+ case Opt_imod: -+ u.mod->path = "(indexed)"; -+ if (unlikely(match_int(&a->args[0], &n))) { -+ pr_err("bad integer in %s\n", opt_str); -+ break; -+ } -+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args); -+ if (!err) -+ opt->type = token; -+ break; -+#endif -+ case Opt_xino: -+ err = au_opts_parse_xino(sb, &opt->xino, a->args); -+ if (!err) -+ opt->type = token; -+ break; -+ -+ case Opt_trunc_xino_path: -+ err = au_opts_parse_xino_itrunc_path -+ (sb, &opt->xino_itrunc, a->args); -+ if (!err) -+ opt->type = token; -+ break; -+ -+ case Opt_itrunc_xino: -+ u.xino_itrunc = &opt->xino_itrunc; -+ if (unlikely(match_int(&a->args[0], &n))) { -+ pr_err("bad integer in %s\n", opt_str); -+ break; -+ } -+ u.xino_itrunc->bindex = n; -+ aufs_read_lock(root, AuLock_FLUSH); -+ if (n < 0 || au_sbend(sb) < n) { -+ pr_err("out of bounds, %d\n", n); -+ aufs_read_unlock(root, !AuLock_IR); -+ break; -+ } -+ aufs_read_unlock(root, !AuLock_IR); -+ err = 0; -+ opt->type = token; -+ break; -+ -+ case Opt_dirwh: -+ if (unlikely(match_int(&a->args[0], &opt->dirwh))) -+ break; -+ err = 0; -+ opt->type = token; -+ break; -+ -+ case Opt_rdcache: -+ if (unlikely(match_int(&a->args[0], &n))) { -+ pr_err("bad integer in %s\n", opt_str); -+ break; -+ } -+ if (unlikely(n > AUFS_RDCACHE_MAX)) { -+ pr_err("rdcache must be smaller than %d\n", -+ AUFS_RDCACHE_MAX); -+ break; -+ } -+ opt->rdcache = n; -+ err = 0; -+ opt->type = token; -+ break; -+ case Opt_rdblk: -+ if (unlikely(match_int(&a->args[0], &n) -+ || n < 0 -+ || n > KMALLOC_MAX_SIZE)) { -+ pr_err("bad integer in %s\n", opt_str); -+ break; -+ } -+ if (unlikely(n && n < NAME_MAX)) { -+ pr_err("rdblk must be larger than %d\n", -+ NAME_MAX); -+ break; -+ } -+ opt->rdblk = n; -+ err = 0; -+ opt->type = token; -+ break; -+ case Opt_rdhash: -+ if (unlikely(match_int(&a->args[0], &n) -+ || n < 0 -+ || n * sizeof(struct hlist_head) -+ > KMALLOC_MAX_SIZE)) { -+ pr_err("bad integer in %s\n", opt_str); -+ break; -+ } -+ opt->rdhash = n; -+ err = 0; -+ opt->type = token; -+ break; -+ -+ case Opt_trunc_xino: -+ case Opt_notrunc_xino: -+ case Opt_noxino: -+ case Opt_trunc_xib: -+ case Opt_notrunc_xib: -+ case Opt_shwh: -+ case Opt_noshwh: -+ case Opt_dirperm1: -+ case Opt_nodirperm1: -+ case Opt_plink: -+ case Opt_noplink: -+ case Opt_list_plink: -+ case Opt_dio: -+ case Opt_nodio: -+ case Opt_diropq_a: -+ case Opt_diropq_w: -+ case Opt_warn_perm: -+ case Opt_nowarn_perm: -+ case Opt_refrof: -+ case Opt_norefrof: -+ case Opt_verbose: -+ case Opt_noverbose: -+ case Opt_sum: -+ case Opt_nosum: -+ case Opt_wsum: -+ case Opt_rdblk_def: -+ case Opt_rdhash_def: -+ err = 0; -+ opt->type = token; -+ break; -+ -+ case Opt_udba: -+ opt->udba = udba_val(a->args[0].from); -+ if (opt->udba >= 0) { -+ err = 0; -+ opt->type = token; -+ } else -+ pr_err("wrong value, %s\n", opt_str); -+ break; -+ -+ case Opt_wbr_create: -+ u.create = &opt->wbr_create; -+ u.create->wbr_create -+ = au_wbr_create_val(a->args[0].from, u.create); -+ if (u.create->wbr_create >= 0) { -+ err = 0; -+ opt->type = token; -+ } else -+ pr_err("wrong value, %s\n", opt_str); -+ break; -+ case Opt_wbr_copyup: -+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from); -+ if (opt->wbr_copyup >= 0) { -+ err = 0; -+ opt->type = token; -+ } else -+ pr_err("wrong value, %s\n", opt_str); -+ break; -+ -+ case Opt_ignore: -+ pr_warn("ignored %s\n", opt_str); -+ /*FALLTHROUGH*/ -+ case Opt_ignore_silent: -+ skipped = 1; -+ err = 0; -+ break; -+ case Opt_err: -+ pr_err("unknown option %s\n", opt_str); -+ break; -+ } -+ -+ if (!err && !skipped) { -+ if (unlikely(++opt > opt_tail)) { -+ err = -E2BIG; -+ opt--; -+ opt->type = Opt_tail; -+ break; -+ } -+ opt->type = Opt_tail; -+ } -+ } -+ -+ kfree(a); -+ dump_opts(opts); -+ if (unlikely(err)) -+ au_opts_free(opts); -+ -+out: -+ return err; -+} -+ -+static int au_opt_wbr_create(struct super_block *sb, -+ struct au_opt_wbr_create *create) -+{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ -+ SiMustWriteLock(sb); -+ -+ err = 1; /* handled */ -+ sbinfo = au_sbi(sb); -+ if (sbinfo->si_wbr_create_ops->fin) { -+ err = sbinfo->si_wbr_create_ops->fin(sb); -+ if (!err) -+ err = 1; -+ } -+ -+ sbinfo->si_wbr_create = create->wbr_create; -+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create; -+ switch (create->wbr_create) { -+ case AuWbrCreate_MFSRRV: -+ case AuWbrCreate_MFSRR: -+ case AuWbrCreate_PMFSRR: -+ case AuWbrCreate_PMFSRRV: -+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark; -+ /*FALLTHROUGH*/ -+ case AuWbrCreate_MFS: -+ case AuWbrCreate_MFSV: -+ case AuWbrCreate_PMFS: -+ case AuWbrCreate_PMFSV: -+ sbinfo->si_wbr_mfs.mfs_expire -+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC); -+ break; -+ } -+ -+ if (sbinfo->si_wbr_create_ops->init) -+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */ -+ -+ return err; -+} -+ -+/* -+ * returns, -+ * plus: processed without an error -+ * zero: unprocessed -+ */ -+static int au_opt_simple(struct super_block *sb, struct au_opt *opt, -+ struct au_opts *opts) -+{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ -+ SiMustWriteLock(sb); -+ -+ err = 1; /* handled */ -+ sbinfo = au_sbi(sb); -+ switch (opt->type) { -+ case Opt_udba: -+ sbinfo->si_mntflags &= ~AuOptMask_UDBA; -+ sbinfo->si_mntflags |= opt->udba; -+ opts->given_udba |= opt->udba; -+ break; -+ -+ case Opt_plink: -+ au_opt_set(sbinfo->si_mntflags, PLINK); -+ break; -+ case Opt_noplink: -+ if (au_opt_test(sbinfo->si_mntflags, PLINK)) -+ au_plink_put(sb, /*verbose*/1); -+ au_opt_clr(sbinfo->si_mntflags, PLINK); -+ break; -+ case Opt_list_plink: -+ if (au_opt_test(sbinfo->si_mntflags, PLINK)) -+ au_plink_list(sb); -+ break; -+ -+ case Opt_dio: -+ au_opt_set(sbinfo->si_mntflags, DIO); -+ au_fset_opts(opts->flags, REFRESH_DYAOP); -+ break; -+ case Opt_nodio: -+ au_opt_clr(sbinfo->si_mntflags, DIO); -+ au_fset_opts(opts->flags, REFRESH_DYAOP); -+ break; -+ -+ case Opt_diropq_a: -+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ); -+ break; -+ case Opt_diropq_w: -+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ); -+ break; -+ -+ case Opt_warn_perm: -+ au_opt_set(sbinfo->si_mntflags, WARN_PERM); -+ break; -+ case Opt_nowarn_perm: -+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM); -+ break; -+ -+ case Opt_refrof: -+ au_opt_set(sbinfo->si_mntflags, REFROF); -+ break; -+ case Opt_norefrof: -+ au_opt_clr(sbinfo->si_mntflags, REFROF); -+ break; -+ -+ case Opt_verbose: -+ au_opt_set(sbinfo->si_mntflags, VERBOSE); -+ break; -+ case Opt_noverbose: -+ au_opt_clr(sbinfo->si_mntflags, VERBOSE); -+ break; -+ -+ case Opt_sum: -+ au_opt_set(sbinfo->si_mntflags, SUM); -+ break; -+ case Opt_wsum: -+ au_opt_clr(sbinfo->si_mntflags, SUM); -+ au_opt_set(sbinfo->si_mntflags, SUM_W); -+ case Opt_nosum: -+ au_opt_clr(sbinfo->si_mntflags, SUM); -+ au_opt_clr(sbinfo->si_mntflags, SUM_W); -+ break; -+ -+ case Opt_wbr_create: -+ err = au_opt_wbr_create(sb, &opt->wbr_create); -+ break; -+ case Opt_wbr_copyup: -+ sbinfo->si_wbr_copyup = opt->wbr_copyup; -+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup; -+ break; -+ -+ case Opt_dirwh: -+ sbinfo->si_dirwh = opt->dirwh; -+ break; -+ -+ case Opt_rdcache: -+ sbinfo->si_rdcache -+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC); -+ break; -+ case Opt_rdblk: -+ sbinfo->si_rdblk = opt->rdblk; -+ break; -+ case Opt_rdblk_def: -+ sbinfo->si_rdblk = AUFS_RDBLK_DEF; -+ break; -+ case Opt_rdhash: -+ sbinfo->si_rdhash = opt->rdhash; -+ break; -+ case Opt_rdhash_def: -+ sbinfo->si_rdhash = AUFS_RDHASH_DEF; -+ break; -+ -+ case Opt_shwh: -+ au_opt_set(sbinfo->si_mntflags, SHWH); -+ break; -+ case Opt_noshwh: -+ au_opt_clr(sbinfo->si_mntflags, SHWH); -+ break; -+ -+ case Opt_dirperm1: -+ au_opt_set(sbinfo->si_mntflags, DIRPERM1); -+ break; -+ case Opt_nodirperm1: -+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1); -+ break; -+ -+ case Opt_trunc_xino: -+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO); -+ break; -+ case Opt_notrunc_xino: -+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO); -+ break; -+ -+ case Opt_trunc_xino_path: -+ case Opt_itrunc_xino: -+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex); -+ if (!err) -+ err = 1; -+ break; -+ -+ case Opt_trunc_xib: -+ au_fset_opts(opts->flags, TRUNC_XIB); -+ break; -+ case Opt_notrunc_xib: -+ au_fclr_opts(opts->flags, TRUNC_XIB); -+ break; -+ -+ default: -+ err = 0; -+ break; -+ } -+ -+ return err; -+} -+ -+/* -+ * returns tri-state. -+ * plus: processed without an error -+ * zero: unprocessed -+ * minus: error -+ */ -+static int au_opt_br(struct super_block *sb, struct au_opt *opt, -+ struct au_opts *opts) -+{ -+ int err, do_refresh; -+ -+ err = 0; -+ switch (opt->type) { -+ case Opt_append: -+ opt->add.bindex = au_sbend(sb) + 1; -+ if (opt->add.bindex < 0) -+ opt->add.bindex = 0; -+ goto add; -+ case Opt_prepend: -+ opt->add.bindex = 0; -+ add: /* indented label */ -+ case Opt_add: -+ err = au_br_add(sb, &opt->add, -+ au_ftest_opts(opts->flags, REMOUNT)); -+ if (!err) { -+ err = 1; -+ au_fset_opts(opts->flags, REFRESH); -+ } -+ break; -+ -+ case Opt_del: -+ case Opt_idel: -+ err = au_br_del(sb, &opt->del, -+ au_ftest_opts(opts->flags, REMOUNT)); -+ if (!err) { -+ err = 1; -+ au_fset_opts(opts->flags, TRUNC_XIB); -+ au_fset_opts(opts->flags, REFRESH); -+ } -+ break; -+ -+ case Opt_mod: -+ case Opt_imod: -+ err = au_br_mod(sb, &opt->mod, -+ au_ftest_opts(opts->flags, REMOUNT), -+ &do_refresh); -+ if (!err) { -+ err = 1; -+ if (do_refresh) -+ au_fset_opts(opts->flags, REFRESH); -+ } -+ break; -+ } -+ -+ return err; -+} -+ -+static int au_opt_xino(struct super_block *sb, struct au_opt *opt, -+ struct au_opt_xino **opt_xino, -+ struct au_opts *opts) -+{ -+ int err; -+ aufs_bindex_t bend, bindex; -+ struct dentry *root, *parent, *h_root; -+ -+ err = 0; -+ switch (opt->type) { -+ case Opt_xino: -+ err = au_xino_set(sb, &opt->xino, -+ !!au_ftest_opts(opts->flags, REMOUNT)); -+ if (unlikely(err)) -+ break; -+ -+ *opt_xino = &opt->xino; -+ au_xino_brid_set(sb, -1); -+ -+ /* safe d_parent access */ -+ parent = opt->xino.file->f_dentry->d_parent; -+ root = sb->s_root; -+ bend = au_sbend(sb); -+ for (bindex = 0; bindex <= bend; bindex++) { -+ h_root = au_h_dptr(root, bindex); -+ if (h_root == parent) { -+ au_xino_brid_set(sb, au_sbr_id(sb, bindex)); -+ break; -+ } -+ } -+ break; -+ -+ case Opt_noxino: -+ au_xino_clr(sb); -+ au_xino_brid_set(sb, -1); -+ *opt_xino = (void *)-1; -+ break; -+ } -+ -+ return err; -+} -+ -+int au_opts_verify(struct super_block *sb, unsigned long sb_flags, -+ unsigned int pending) -+{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ unsigned char do_plink, skip, do_free; -+ struct au_branch *br; -+ struct au_wbr *wbr; -+ struct dentry *root; -+ struct inode *dir, *h_dir; -+ struct au_sbinfo *sbinfo; -+ struct au_hinode *hdir; -+ -+ SiMustAnyLock(sb); -+ -+ sbinfo = au_sbi(sb); -+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA)); -+ -+ if (!(sb_flags & MS_RDONLY)) { -+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0)))) -+ pr_warn("first branch should be rw\n"); -+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH))) -+ pr_warn("shwh should be used with ro\n"); -+ } -+ -+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY) -+ && !au_opt_test(sbinfo->si_mntflags, XINO)) -+ pr_warn("udba=*notify requires xino\n"); -+ -+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1)) -+ pr_warn("dirperm1 breaks the protection" -+ " by the permission bits on the lower branch\n"); -+ -+ err = 0; -+ root = sb->s_root; -+ dir = root->d_inode; -+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK); -+ bend = au_sbend(sb); -+ for (bindex = 0; !err && bindex <= bend; bindex++) { -+ skip = 0; -+ h_dir = au_h_iptr(dir, bindex); -+ br = au_sbr(sb, bindex); -+ do_free = 0; -+ -+ wbr = br->br_wbr; -+ if (wbr) -+ wbr_wh_read_lock(wbr); -+ -+ if (!au_br_writable(br->br_perm)) { -+ do_free = !!wbr; -+ skip = (!wbr -+ || (!wbr->wbr_whbase -+ && !wbr->wbr_plink -+ && !wbr->wbr_orph)); -+ } else if (!au_br_wh_linkable(br->br_perm)) { -+ /* skip = (!br->br_whbase && !br->br_orph); */ -+ skip = (!wbr || !wbr->wbr_whbase); -+ if (skip && wbr) { -+ if (do_plink) -+ skip = !!wbr->wbr_plink; -+ else -+ skip = !wbr->wbr_plink; -+ } -+ } else { -+ /* skip = (br->br_whbase && br->br_ohph); */ -+ skip = (wbr && wbr->wbr_whbase); -+ if (skip) { -+ if (do_plink) -+ skip = !!wbr->wbr_plink; -+ else -+ skip = !wbr->wbr_plink; -+ } -+ } -+ if (wbr) -+ wbr_wh_read_unlock(wbr); -+ -+ if (skip) -+ continue; -+ -+ hdir = au_hi(dir, bindex); -+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT); -+ if (wbr) -+ wbr_wh_write_lock(wbr); -+ err = au_wh_init(br, sb); -+ if (wbr) -+ wbr_wh_write_unlock(wbr); -+ au_hn_imtx_unlock(hdir); -+ -+ if (!err && do_free) { -+ kfree(wbr); -+ br->br_wbr = NULL; -+ } -+ } -+ -+ return err; -+} -+ -+int au_opts_mount(struct super_block *sb, struct au_opts *opts) -+{ -+ int err; -+ unsigned int tmp; -+ aufs_bindex_t bindex, bend; -+ struct au_opt *opt; -+ struct au_opt_xino *opt_xino, xino; -+ struct au_sbinfo *sbinfo; -+ struct au_branch *br; -+ struct inode *dir; -+ -+ SiMustWriteLock(sb); -+ -+ err = 0; -+ opt_xino = NULL; -+ opt = opts->opt; -+ while (err >= 0 && opt->type != Opt_tail) -+ err = au_opt_simple(sb, opt++, opts); -+ if (err > 0) -+ err = 0; -+ else if (unlikely(err < 0)) -+ goto out; -+ -+ /* disable xino and udba temporary */ -+ sbinfo = au_sbi(sb); -+ tmp = sbinfo->si_mntflags; -+ au_opt_clr(sbinfo->si_mntflags, XINO); -+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL); -+ -+ opt = opts->opt; -+ while (err >= 0 && opt->type != Opt_tail) -+ err = au_opt_br(sb, opt++, opts); -+ if (err > 0) -+ err = 0; -+ else if (unlikely(err < 0)) -+ goto out; -+ -+ bend = au_sbend(sb); -+ if (unlikely(bend < 0)) { -+ err = -EINVAL; -+ pr_err("no branches\n"); -+ goto out; -+ } -+ -+ if (au_opt_test(tmp, XINO)) -+ au_opt_set(sbinfo->si_mntflags, XINO); -+ opt = opts->opt; -+ while (!err && opt->type != Opt_tail) -+ err = au_opt_xino(sb, opt++, &opt_xino, opts); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_opts_verify(sb, sb->s_flags, tmp); -+ if (unlikely(err)) -+ goto out; -+ -+ /* restore xino */ -+ if (au_opt_test(tmp, XINO) && !opt_xino) { -+ xino.file = au_xino_def(sb); -+ err = PTR_ERR(xino.file); -+ if (IS_ERR(xino.file)) -+ goto out; -+ -+ err = au_xino_set(sb, &xino, /*remount*/0); -+ fput(xino.file); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+ /* restore udba */ -+ tmp &= AuOptMask_UDBA; -+ sbinfo->si_mntflags &= ~AuOptMask_UDBA; -+ sbinfo->si_mntflags |= tmp; -+ bend = au_sbend(sb); -+ for (bindex = 0; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ err = au_hnotify_reset_br(tmp, br, br->br_perm); -+ if (unlikely(err)) -+ AuIOErr("hnotify failed on br %d, %d, ignored\n", -+ bindex, err); -+ /* go on even if err */ -+ } -+ if (au_opt_test(tmp, UDBA_HNOTIFY)) { -+ dir = sb->s_root->d_inode; -+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO); -+ } -+ -+out: -+ return err; -+} -+ -+int au_opts_remount(struct super_block *sb, struct au_opts *opts) -+{ -+ int err, rerr; -+ struct inode *dir; -+ struct au_opt_xino *opt_xino; -+ struct au_opt *opt; -+ struct au_sbinfo *sbinfo; -+ -+ SiMustWriteLock(sb); -+ -+ dir = sb->s_root->d_inode; -+ sbinfo = au_sbi(sb); -+ err = 0; -+ opt_xino = NULL; -+ opt = opts->opt; -+ while (err >= 0 && opt->type != Opt_tail) { -+ err = au_opt_simple(sb, opt, opts); -+ if (!err) -+ err = au_opt_br(sb, opt, opts); -+ if (!err) -+ err = au_opt_xino(sb, opt, &opt_xino, opts); -+ opt++; -+ } -+ if (err > 0) -+ err = 0; -+ AuTraceErr(err); -+ /* go on even err */ -+ -+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0); -+ if (unlikely(rerr && !err)) -+ err = rerr; -+ -+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) { -+ rerr = au_xib_trunc(sb); -+ if (unlikely(rerr && !err)) -+ err = rerr; -+ } -+ -+ /* will be handled by the caller */ -+ if (!au_ftest_opts(opts->flags, REFRESH) -+ && (opts->given_udba || au_opt_test(sbinfo->si_mntflags, XINO))) -+ au_fset_opts(opts->flags, REFRESH); -+ -+ AuDbg("status 0x%x\n", opts->flags); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+unsigned int au_opt_udba(struct super_block *sb) -+{ -+ return au_mntflags(sb) & AuOptMask_UDBA; -+} -diff --git fs/aufs/opts.h fs/aufs/opts.h -new file mode 100644 -index 0000000..845df03 ---- /dev/null -+++ fs/aufs/opts.h -@@ -0,0 +1,199 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * mount options/flags -+ */ -+ -+#ifndef __AUFS_OPTS_H__ -+#define __AUFS_OPTS_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include "branch.h" -+ -+struct file; -+struct super_block; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* mount flags */ -+#define AuOpt_XINO 1 /* external inode number bitmap -+ and translation table */ -+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */ -+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */ -+#define AuOpt_UDBA_REVAL (1 << 3) -+#define AuOpt_UDBA_HNOTIFY (1 << 4) -+#define AuOpt_SHWH (1 << 5) /* show whiteout */ -+#define AuOpt_PLINK (1 << 6) /* pseudo-link */ -+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm -+ bits */ -+#define AuOpt_REFROF (1 << 8) /* unimplemented */ -+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */ -+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */ -+#define AuOpt_SUM_W (1 << 11) /* unimplemented */ -+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */ -+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */ -+#define AuOpt_DIO (1 << 14) /* direct io */ -+ -+#ifndef CONFIG_AUFS_HNOTIFY -+#undef AuOpt_UDBA_HNOTIFY -+#define AuOpt_UDBA_HNOTIFY 0 -+#endif -+#ifndef CONFIG_AUFS_SHWH -+#undef AuOpt_SHWH -+#define AuOpt_SHWH 0 -+#endif -+ -+#define AuOpt_Def (AuOpt_XINO \ -+ | AuOpt_UDBA_REVAL \ -+ | AuOpt_PLINK \ -+ /* | AuOpt_DIRPERM1 */ \ -+ | AuOpt_WARN_PERM) -+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \ -+ | AuOpt_UDBA_REVAL \ -+ | AuOpt_UDBA_HNOTIFY) -+ -+#define au_opt_test(flags, name) (flags & AuOpt_##name) -+#define au_opt_set(flags, name) do { \ -+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \ -+ ((flags) |= AuOpt_##name); \ -+} while (0) -+#define au_opt_set_udba(flags, name) do { \ -+ (flags) &= ~AuOptMask_UDBA; \ -+ ((flags) |= AuOpt_##name); \ -+} while (0) -+#define au_opt_clr(flags, name) do { \ -+ ((flags) &= ~AuOpt_##name); \ -+} while (0) -+ -+static inline unsigned int au_opts_plink(unsigned int mntflags) -+{ -+#ifdef CONFIG_PROC_FS -+ return mntflags; -+#else -+ return mntflags & ~AuOpt_PLINK; -+#endif -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* policies to select one among multiple writable branches */ -+enum { -+ AuWbrCreate_TDP, /* top down parent */ -+ AuWbrCreate_RR, /* round robin */ -+ AuWbrCreate_MFS, /* most free space */ -+ AuWbrCreate_MFSV, /* mfs with seconds */ -+ AuWbrCreate_MFSRR, /* mfs then rr */ -+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */ -+ AuWbrCreate_PMFS, /* parent and mfs */ -+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */ -+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */ -+ AuWbrCreate_PMFSRRV, /* plus seconds */ -+ -+ AuWbrCreate_Def = AuWbrCreate_TDP -+}; -+ -+enum { -+ AuWbrCopyup_TDP, /* top down parent */ -+ AuWbrCopyup_BUP, /* bottom up parent */ -+ AuWbrCopyup_BU, /* bottom up */ -+ -+ AuWbrCopyup_Def = AuWbrCopyup_TDP -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_opt_add { -+ aufs_bindex_t bindex; -+ char *pathname; -+ int perm; -+ struct path path; -+}; -+ -+struct au_opt_del { -+ char *pathname; -+ struct path h_path; -+}; -+ -+struct au_opt_mod { -+ char *path; -+ int perm; -+ struct dentry *h_root; -+}; -+ -+struct au_opt_xino { -+ char *path; -+ struct file *file; -+}; -+ -+struct au_opt_xino_itrunc { -+ aufs_bindex_t bindex; -+}; -+ -+struct au_opt_wbr_create { -+ int wbr_create; -+ int mfs_second; -+ unsigned long long mfsrr_watermark; -+}; -+ -+struct au_opt { -+ int type; -+ union { -+ struct au_opt_xino xino; -+ struct au_opt_xino_itrunc xino_itrunc; -+ struct au_opt_add add; -+ struct au_opt_del del; -+ struct au_opt_mod mod; -+ int dirwh; -+ int rdcache; -+ unsigned int rdblk; -+ unsigned int rdhash; -+ int udba; -+ struct au_opt_wbr_create wbr_create; -+ int wbr_copyup; -+ }; -+}; -+ -+/* opts flags */ -+#define AuOpts_REMOUNT 1 -+#define AuOpts_REFRESH (1 << 1) -+#define AuOpts_TRUNC_XIB (1 << 2) -+#define AuOpts_REFRESH_DYAOP (1 << 3) -+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name) -+#define au_fset_opts(flags, name) \ -+ do { (flags) |= AuOpts_##name; } while (0) -+#define au_fclr_opts(flags, name) \ -+ do { (flags) &= ~AuOpts_##name; } while (0) -+ -+struct au_opts { -+ struct au_opt *opt; -+ int max_opt; -+ -+ unsigned int given_udba; -+ unsigned int flags; -+ unsigned long sb_flags; -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_optstr_br_perm(au_br_perm_str_t *str, int perm); -+const char *au_optstr_udba(int udba); -+const char *au_optstr_wbr_copyup(int wbr_copyup); -+const char *au_optstr_wbr_create(int wbr_create); -+ -+void au_opts_free(struct au_opts *opts); -+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts); -+int au_opts_verify(struct super_block *sb, unsigned long sb_flags, -+ unsigned int pending); -+int au_opts_mount(struct super_block *sb, struct au_opts *opts); -+int au_opts_remount(struct super_block *sb, struct au_opts *opts); -+ -+unsigned int au_opt_udba(struct super_block *sb); -+ -+/* ---------------------------------------------------------------------- */ -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_OPTS_H__ */ -diff --git fs/aufs/plink.c fs/aufs/plink.c -new file mode 100644 -index 0000000..2b0384f ---- /dev/null -+++ fs/aufs/plink.c -@@ -0,0 +1,519 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * pseudo-link -+ */ -+ -+#include "aufs.h" -+ -+/* -+ * the pseudo-link maintenance mode. -+ * during a user process maintains the pseudo-links, -+ * prohibit adding a new plink and branch manipulation. -+ * -+ * Flags -+ * NOPLM: -+ * For entry functions which will handle plink, and i_mutex is already held -+ * in VFS. -+ * They cannot wait and should return an error at once. -+ * Callers has to check the error. -+ * NOPLMW: -+ * For entry functions which will handle plink, but i_mutex is not held -+ * in VFS. -+ * They can wait the plink maintenance mode to finish. -+ * -+ * They behave like F_SETLK and F_SETLKW. -+ * If the caller never handle plink, then both flags are unnecessary. -+ */ -+ -+int au_plink_maint(struct super_block *sb, int flags) -+{ -+ int err; -+ pid_t pid, ppid; -+ struct au_sbinfo *sbi; -+ -+ SiMustAnyLock(sb); -+ -+ err = 0; -+ if (!au_opt_test(au_mntflags(sb), PLINK)) -+ goto out; -+ -+ sbi = au_sbi(sb); -+ pid = sbi->si_plink_maint_pid; -+ if (!pid || pid == current->pid) -+ goto out; -+ -+ /* todo: it highly depends upon /sbin/mount.aufs */ -+ rcu_read_lock(); -+ ppid = task_pid_vnr(rcu_dereference(current->real_parent)); -+ rcu_read_unlock(); -+ if (pid == ppid) -+ goto out; -+ -+ if (au_ftest_lock(flags, NOPLMW)) { -+ /* if there is no i_mutex lock in VFS, we don't need to wait */ -+ /* AuDebugOn(!lockdep_depth(current)); */ -+ while (sbi->si_plink_maint_pid) { -+ si_read_unlock(sb); -+ /* gave up wake_up_bit() */ -+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid); -+ -+ if (au_ftest_lock(flags, FLUSH)) -+ au_nwt_flush(&sbi->si_nowait); -+ si_noflush_read_lock(sb); -+ } -+ } else if (au_ftest_lock(flags, NOPLM)) { -+ AuDbg("ppid %d, pid %d\n", ppid, pid); -+ err = -EAGAIN; -+ } -+ -+out: -+ return err; -+} -+ -+void au_plink_maint_leave(struct au_sbinfo *sbinfo) -+{ -+ spin_lock(&sbinfo->si_plink_maint_lock); -+ sbinfo->si_plink_maint_pid = 0; -+ spin_unlock(&sbinfo->si_plink_maint_lock); -+ wake_up_all(&sbinfo->si_plink_wq); -+} -+ -+int au_plink_maint_enter(struct super_block *sb) -+{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ -+ err = 0; -+ sbinfo = au_sbi(sb); -+ /* make sure i am the only one in this fs */ -+ si_write_lock(sb, AuLock_FLUSH); -+ if (au_opt_test(au_mntflags(sb), PLINK)) { -+ spin_lock(&sbinfo->si_plink_maint_lock); -+ if (!sbinfo->si_plink_maint_pid) -+ sbinfo->si_plink_maint_pid = current->pid; -+ else -+ err = -EBUSY; -+ spin_unlock(&sbinfo->si_plink_maint_lock); -+ } -+ si_write_unlock(sb); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+#ifdef CONFIG_AUFS_DEBUG -+void au_plink_list(struct super_block *sb) -+{ -+ int i; -+ struct au_sbinfo *sbinfo; -+ struct hlist_head *plink_hlist; -+ struct pseudo_link *plink; -+ -+ SiMustAnyLock(sb); -+ -+ sbinfo = au_sbi(sb); -+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); -+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM)); -+ -+ for (i = 0; i < AuPlink_NHASH; i++) { -+ plink_hlist = &sbinfo->si_plink[i].head; -+ rcu_read_lock(); -+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist) -+ AuDbg("%lu\n", plink->inode->i_ino); -+ rcu_read_unlock(); -+ } -+} -+#endif -+ -+/* is the inode pseudo-linked? */ -+int au_plink_test(struct inode *inode) -+{ -+ int found, i; -+ struct au_sbinfo *sbinfo; -+ struct hlist_head *plink_hlist; -+ struct pseudo_link *plink; -+ -+ sbinfo = au_sbi(inode->i_sb); -+ AuRwMustAnyLock(&sbinfo->si_rwsem); -+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK)); -+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM)); -+ -+ found = 0; -+ i = au_plink_hash(inode->i_ino); -+ plink_hlist = &sbinfo->si_plink[i].head; -+ rcu_read_lock(); -+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist) -+ if (plink->inode == inode) { -+ found = 1; -+ break; -+ } -+ rcu_read_unlock(); -+ return found; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * generate a name for plink. -+ * the file will be stored under AUFS_WH_PLINKDIR. -+ */ -+/* 20 is max digits length of ulong 64 */ -+#define PLINK_NAME_LEN ((20 + 1) * 2) -+ -+static int plink_name(char *name, int len, struct inode *inode, -+ aufs_bindex_t bindex) -+{ -+ int rlen; -+ struct inode *h_inode; -+ -+ h_inode = au_h_iptr(inode, bindex); -+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino); -+ return rlen; -+} -+ -+struct au_do_plink_lkup_args { -+ struct dentry **errp; -+ struct qstr *tgtname; -+ struct dentry *h_parent; -+ struct au_branch *br; -+}; -+ -+static struct dentry *au_do_plink_lkup(struct qstr *tgtname, -+ struct dentry *h_parent, -+ struct au_branch *br) -+{ -+ struct dentry *h_dentry; -+ struct mutex *h_mtx; -+ -+ h_mtx = &h_parent->d_inode->i_mutex; -+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2); -+ h_dentry = vfsub_lkup_one(tgtname, h_parent); -+ mutex_unlock(h_mtx); -+ return h_dentry; -+} -+ -+static void au_call_do_plink_lkup(void *args) -+{ -+ struct au_do_plink_lkup_args *a = args; -+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br); -+} -+ -+/* lookup the plink-ed @inode under the branch at @bindex */ -+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex) -+{ -+ struct dentry *h_dentry, *h_parent; -+ struct au_branch *br; -+ struct inode *h_dir; -+ int wkq_err; -+ char a[PLINK_NAME_LEN]; -+ struct qstr tgtname = QSTR_INIT(a, 0); -+ -+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM)); -+ -+ br = au_sbr(inode->i_sb, bindex); -+ h_parent = br->br_wbr->wbr_plink; -+ h_dir = h_parent->d_inode; -+ tgtname.len = plink_name(a, sizeof(a), inode, bindex); -+ -+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) { -+ struct au_do_plink_lkup_args args = { -+ .errp = &h_dentry, -+ .tgtname = &tgtname, -+ .h_parent = h_parent, -+ .br = br -+ }; -+ -+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args); -+ if (unlikely(wkq_err)) -+ h_dentry = ERR_PTR(wkq_err); -+ } else -+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br); -+ -+ return h_dentry; -+} -+ -+/* create a pseudo-link */ -+static int do_whplink(struct qstr *tgt, struct dentry *h_parent, -+ struct dentry *h_dentry, struct au_branch *br) -+{ -+ int err; -+ struct path h_path = { -+ .mnt = au_br_mnt(br) -+ }; -+ struct inode *h_dir, *delegated; -+ -+ h_dir = h_parent->d_inode; -+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2); -+again: -+ h_path.dentry = vfsub_lkup_one(tgt, h_parent); -+ err = PTR_ERR(h_path.dentry); -+ if (IS_ERR(h_path.dentry)) -+ goto out; -+ -+ err = 0; -+ /* wh.plink dir is not monitored */ -+ /* todo: is it really safe? */ -+ if (h_path.dentry->d_inode -+ && h_path.dentry->d_inode != h_dentry->d_inode) { -+ delegated = NULL; -+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal unlink\n"); -+ iput(delegated); -+ } -+ dput(h_path.dentry); -+ h_path.dentry = NULL; -+ if (!err) -+ goto again; -+ } -+ if (!err && !h_path.dentry->d_inode) { -+ delegated = NULL; -+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal link\n"); -+ iput(delegated); -+ } -+ } -+ dput(h_path.dentry); -+ -+out: -+ mutex_unlock(&h_dir->i_mutex); -+ return err; -+} -+ -+struct do_whplink_args { -+ int *errp; -+ struct qstr *tgt; -+ struct dentry *h_parent; -+ struct dentry *h_dentry; -+ struct au_branch *br; -+}; -+ -+static void call_do_whplink(void *args) -+{ -+ struct do_whplink_args *a = args; -+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br); -+} -+ -+static int whplink(struct dentry *h_dentry, struct inode *inode, -+ aufs_bindex_t bindex, struct au_branch *br) -+{ -+ int err, wkq_err; -+ struct au_wbr *wbr; -+ struct dentry *h_parent; -+ struct inode *h_dir; -+ char a[PLINK_NAME_LEN]; -+ struct qstr tgtname = QSTR_INIT(a, 0); -+ -+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr; -+ h_parent = wbr->wbr_plink; -+ h_dir = h_parent->d_inode; -+ tgtname.len = plink_name(a, sizeof(a), inode, bindex); -+ -+ /* always superio. */ -+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) { -+ struct do_whplink_args args = { -+ .errp = &err, -+ .tgt = &tgtname, -+ .h_parent = h_parent, -+ .h_dentry = h_dentry, -+ .br = br -+ }; -+ wkq_err = au_wkq_wait(call_do_whplink, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } else -+ err = do_whplink(&tgtname, h_parent, h_dentry, br); -+ -+ return err; -+} -+ -+/* free a single plink */ -+static void do_put_plink(struct pseudo_link *plink, int do_del) -+{ -+ if (do_del) -+ hlist_del(&plink->hlist); -+ iput(plink->inode); -+ kfree(plink); -+} -+ -+static void do_put_plink_rcu(struct rcu_head *rcu) -+{ -+ struct pseudo_link *plink; -+ -+ plink = container_of(rcu, struct pseudo_link, rcu); -+ iput(plink->inode); -+ kfree(plink); -+} -+ -+/* -+ * create a new pseudo-link for @h_dentry on @bindex. -+ * the linked inode is held in aufs @inode. -+ */ -+void au_plink_append(struct inode *inode, aufs_bindex_t bindex, -+ struct dentry *h_dentry) -+{ -+ struct super_block *sb; -+ struct au_sbinfo *sbinfo; -+ struct hlist_head *plink_hlist; -+ struct pseudo_link *plink, *tmp; -+ struct au_sphlhead *sphl; -+ int found, err, cnt, i; -+ -+ sb = inode->i_sb; -+ sbinfo = au_sbi(sb); -+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); -+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM)); -+ -+ found = au_plink_test(inode); -+ if (found) -+ return; -+ -+ i = au_plink_hash(inode->i_ino); -+ sphl = sbinfo->si_plink + i; -+ plink_hlist = &sphl->head; -+ tmp = kmalloc(sizeof(*plink), GFP_NOFS); -+ if (tmp) -+ tmp->inode = au_igrab(inode); -+ else { -+ err = -ENOMEM; -+ goto out; -+ } -+ -+ spin_lock(&sphl->spin); -+ hlist_for_each_entry(plink, plink_hlist, hlist) { -+ if (plink->inode == inode) { -+ found = 1; -+ break; -+ } -+ } -+ if (!found) -+ hlist_add_head_rcu(&tmp->hlist, plink_hlist); -+ spin_unlock(&sphl->spin); -+ if (!found) { -+ cnt = au_sphl_count(sphl); -+#define msg "unexpectedly unblanced or too many pseudo-links" -+ if (cnt > AUFS_PLINK_WARN) -+ AuWarn1(msg ", %d\n", cnt); -+#undef msg -+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex)); -+ } else { -+ do_put_plink(tmp, 0); -+ return; -+ } -+ -+out: -+ if (unlikely(err)) { -+ pr_warn("err %d, damaged pseudo link.\n", err); -+ if (tmp) { -+ au_sphl_del_rcu(&tmp->hlist, sphl); -+ call_rcu(&tmp->rcu, do_put_plink_rcu); -+ } -+ } -+} -+ -+/* free all plinks */ -+void au_plink_put(struct super_block *sb, int verbose) -+{ -+ int i, warned; -+ struct au_sbinfo *sbinfo; -+ struct hlist_head *plink_hlist; -+ struct hlist_node *tmp; -+ struct pseudo_link *plink; -+ -+ SiMustWriteLock(sb); -+ -+ sbinfo = au_sbi(sb); -+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); -+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM)); -+ -+ /* no spin_lock since sbinfo is write-locked */ -+ warned = 0; -+ for (i = 0; i < AuPlink_NHASH; i++) { -+ plink_hlist = &sbinfo->si_plink[i].head; -+ if (!warned && verbose && !hlist_empty(plink_hlist)) { -+ pr_warn("pseudo-link is not flushed"); -+ warned = 1; -+ } -+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist) -+ do_put_plink(plink, 0); -+ INIT_HLIST_HEAD(plink_hlist); -+ } -+} -+ -+void au_plink_clean(struct super_block *sb, int verbose) -+{ -+ struct dentry *root; -+ -+ root = sb->s_root; -+ aufs_write_lock(root); -+ if (au_opt_test(au_mntflags(sb), PLINK)) -+ au_plink_put(sb, verbose); -+ aufs_write_unlock(root); -+} -+ -+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id) -+{ -+ int do_put; -+ aufs_bindex_t bstart, bend, bindex; -+ -+ do_put = 0; -+ bstart = au_ibstart(inode); -+ bend = au_ibend(inode); -+ if (bstart >= 0) { -+ for (bindex = bstart; bindex <= bend; bindex++) { -+ if (!au_h_iptr(inode, bindex) -+ || au_ii_br_id(inode, bindex) != br_id) -+ continue; -+ au_set_h_iptr(inode, bindex, NULL, 0); -+ do_put = 1; -+ break; -+ } -+ if (do_put) -+ for (bindex = bstart; bindex <= bend; bindex++) -+ if (au_h_iptr(inode, bindex)) { -+ do_put = 0; -+ break; -+ } -+ } else -+ do_put = 1; -+ -+ return do_put; -+} -+ -+/* free the plinks on a branch specified by @br_id */ -+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id) -+{ -+ struct au_sbinfo *sbinfo; -+ struct hlist_head *plink_hlist; -+ struct hlist_node *tmp; -+ struct pseudo_link *plink; -+ struct inode *inode; -+ int i, do_put; -+ -+ SiMustWriteLock(sb); -+ -+ sbinfo = au_sbi(sb); -+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK)); -+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM)); -+ -+ /* no spin_lock since sbinfo is write-locked */ -+ for (i = 0; i < AuPlink_NHASH; i++) { -+ plink_hlist = &sbinfo->si_plink[i].head; -+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist) { -+ inode = au_igrab(plink->inode); -+ ii_write_lock_child(inode); -+ do_put = au_plink_do_half_refresh(inode, br_id); -+ if (do_put) -+ do_put_plink(plink, 1); -+ ii_write_unlock(inode); -+ iput(inode); -+ } -+ } -+} -diff --git fs/aufs/poll.c fs/aufs/poll.c -new file mode 100644 -index 0000000..a0ccc9b ---- /dev/null -+++ fs/aufs/poll.c -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * poll operation -+ * There is only one filesystem which implements ->poll operation, currently. -+ */ -+ -+#include "aufs.h" -+ -+unsigned int aufs_poll(struct file *file, poll_table *wait) -+{ -+ unsigned int mask; -+ int err; -+ struct file *h_file; -+ struct dentry *dentry; -+ struct super_block *sb; -+ -+ /* We should pretend an error happened. */ -+ mask = POLLERR /* | POLLIN | POLLOUT */; -+ dentry = file->f_dentry; -+ sb = dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW); -+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0); -+ if (unlikely(err)) -+ goto out; -+ -+ /* it is not an error if h_file has no operation */ -+ mask = DEFAULT_POLLMASK; -+ h_file = au_hf_top(file); -+ if (h_file->f_op->poll) -+ mask = h_file->f_op->poll(h_file, wait); -+ -+ di_read_unlock(dentry, AuLock_IR); -+ fi_read_unlock(file); -+ -+out: -+ si_read_unlock(sb); -+ AuTraceErr((int)mask); -+ return mask; -+} -diff --git fs/aufs/procfs.c fs/aufs/procfs.c -new file mode 100644 -index 0000000..bcc686b ---- /dev/null -+++ fs/aufs/procfs.c -@@ -0,0 +1,156 @@ -+/* -+ * Copyright (C) 2010-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * procfs interfaces -+ */ -+ -+#include -+#include "aufs.h" -+ -+static int au_procfs_plm_release(struct inode *inode, struct file *file) -+{ -+ struct au_sbinfo *sbinfo; -+ -+ sbinfo = file->private_data; -+ if (sbinfo) { -+ au_plink_maint_leave(sbinfo); -+ kobject_put(&sbinfo->si_kobj); -+ } -+ -+ return 0; -+} -+ -+static void au_procfs_plm_write_clean(struct file *file) -+{ -+ struct au_sbinfo *sbinfo; -+ -+ sbinfo = file->private_data; -+ if (sbinfo) -+ au_plink_clean(sbinfo->si_sb, /*verbose*/0); -+} -+ -+static int au_procfs_plm_write_si(struct file *file, unsigned long id) -+{ -+ int err; -+ struct super_block *sb; -+ struct au_sbinfo *sbinfo; -+ -+ err = -EBUSY; -+ if (unlikely(file->private_data)) -+ goto out; -+ -+ sb = NULL; -+ /* don't use au_sbilist_lock() here */ -+ spin_lock(&au_sbilist.spin); -+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list) -+ if (id == sysaufs_si_id(sbinfo)) { -+ kobject_get(&sbinfo->si_kobj); -+ sb = sbinfo->si_sb; -+ break; -+ } -+ spin_unlock(&au_sbilist.spin); -+ -+ err = -EINVAL; -+ if (unlikely(!sb)) -+ goto out; -+ -+ err = au_plink_maint_enter(sb); -+ if (!err) -+ /* keep kobject_get() */ -+ file->private_data = sbinfo; -+ else -+ kobject_put(&sbinfo->si_kobj); -+out: -+ return err; -+} -+ -+/* -+ * Accept a valid "si=xxxx" only. -+ * Once it is accepted successfully, accept "clean" too. -+ */ -+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf, -+ size_t count, loff_t *ppos) -+{ -+ ssize_t err; -+ unsigned long id; -+ /* last newline is allowed */ -+ char buf[3 + sizeof(unsigned long) * 2 + 1]; -+ -+ err = -EACCES; -+ if (unlikely(!capable(CAP_SYS_ADMIN))) -+ goto out; -+ -+ err = -EINVAL; -+ if (unlikely(count > sizeof(buf))) -+ goto out; -+ -+ err = copy_from_user(buf, ubuf, count); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ goto out; -+ } -+ buf[count] = 0; -+ -+ err = -EINVAL; -+ if (!strcmp("clean", buf)) { -+ au_procfs_plm_write_clean(file); -+ goto out_success; -+ } else if (unlikely(strncmp("si=", buf, 3))) -+ goto out; -+ -+ err = kstrtoul(buf + 3, 16, &id); -+ if (unlikely(err)) -+ goto out; -+ -+ err = au_procfs_plm_write_si(file, id); -+ if (unlikely(err)) -+ goto out; -+ -+out_success: -+ err = count; /* success */ -+out: -+ return err; -+} -+ -+static const struct file_operations au_procfs_plm_fop = { -+ .write = au_procfs_plm_write, -+ .release = au_procfs_plm_release, -+ .owner = THIS_MODULE -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct proc_dir_entry *au_procfs_dir; -+ -+void au_procfs_fin(void) -+{ -+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir); -+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL); -+} -+ -+int __init au_procfs_init(void) -+{ -+ int err; -+ struct proc_dir_entry *entry; -+ -+ err = -ENOMEM; -+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL); -+ if (unlikely(!au_procfs_dir)) -+ goto out; -+ -+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR, -+ au_procfs_dir, &au_procfs_plm_fop); -+ if (unlikely(!entry)) -+ goto out_dir; -+ -+ err = 0; -+ goto out; /* success */ -+ -+ -+out_dir: -+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL); -+out: -+ return err; -+} -diff --git fs/aufs/rdu.c fs/aufs/rdu.c -new file mode 100644 -index 0000000..010e829 ---- /dev/null -+++ fs/aufs/rdu.c -@@ -0,0 +1,375 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * readdir in userspace. -+ */ -+ -+#include -+#include -+#include -+#include "aufs.h" -+ -+/* bits for struct aufs_rdu.flags */ -+#define AuRdu_CALLED 1 -+#define AuRdu_CONT (1 << 1) -+#define AuRdu_FULL (1 << 2) -+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name) -+#define au_fset_rdu(flags, name) \ -+ do { (flags) |= AuRdu_##name; } while (0) -+#define au_fclr_rdu(flags, name) \ -+ do { (flags) &= ~AuRdu_##name; } while (0) -+ -+struct au_rdu_arg { -+ struct dir_context ctx; -+ struct aufs_rdu *rdu; -+ union au_rdu_ent_ul ent; -+ unsigned long end; -+ -+ struct super_block *sb; -+ int err; -+}; -+ -+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen, -+ loff_t offset, u64 h_ino, unsigned int d_type) -+{ -+ int err, len; -+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx); -+ struct aufs_rdu *rdu = arg->rdu; -+ struct au_rdu_ent ent; -+ -+ err = 0; -+ arg->err = 0; -+ au_fset_rdu(rdu->cookie.flags, CALLED); -+ len = au_rdu_len(nlen); -+ if (arg->ent.ul + len < arg->end) { -+ ent.ino = h_ino; -+ ent.bindex = rdu->cookie.bindex; -+ ent.type = d_type; -+ ent.nlen = nlen; -+ if (unlikely(nlen > AUFS_MAX_NAMELEN)) -+ ent.type = DT_UNKNOWN; -+ -+ /* unnecessary to support mmap_sem since this is a dir */ -+ err = -EFAULT; -+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent))) -+ goto out; -+ if (copy_to_user(arg->ent.e->name, name, nlen)) -+ goto out; -+ /* the terminating NULL */ -+ if (__put_user(0, arg->ent.e->name + nlen)) -+ goto out; -+ err = 0; -+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */ -+ arg->ent.ul += len; -+ rdu->rent++; -+ } else { -+ err = -EFAULT; -+ au_fset_rdu(rdu->cookie.flags, FULL); -+ rdu->full = 1; -+ rdu->tail = arg->ent; -+ } -+ -+out: -+ /* AuTraceErr(err); */ -+ return err; -+} -+ -+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg) -+{ -+ int err; -+ loff_t offset; -+ struct au_rdu_cookie *cookie = &arg->rdu->cookie; -+ -+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */ -+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET); -+ err = offset; -+ if (unlikely(offset != cookie->h_pos)) -+ goto out; -+ -+ err = 0; -+ do { -+ arg->err = 0; -+ au_fclr_rdu(cookie->flags, CALLED); -+ /* smp_mb(); */ -+ err = vfsub_iterate_dir(h_file, &arg->ctx); -+ if (err >= 0) -+ err = arg->err; -+ } while (!err -+ && au_ftest_rdu(cookie->flags, CALLED) -+ && !au_ftest_rdu(cookie->flags, FULL)); -+ cookie->h_pos = h_file->f_pos; -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static int au_rdu(struct file *file, struct aufs_rdu *rdu) -+{ -+ int err; -+ aufs_bindex_t bend; -+ struct au_rdu_arg arg = { -+ .ctx = { -+ .actor = au_diractor(au_rdu_fill) -+ } -+ }; -+ struct dentry *dentry; -+ struct inode *inode; -+ struct file *h_file; -+ struct au_rdu_cookie *cookie = &rdu->cookie; -+ -+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ goto out; -+ } -+ rdu->rent = 0; -+ rdu->tail = rdu->ent; -+ rdu->full = 0; -+ arg.rdu = rdu; -+ arg.ent = rdu->ent; -+ arg.end = arg.ent.ul; -+ arg.end += rdu->sz; -+ -+ err = -ENOTDIR; -+ if (unlikely(!file->f_op->iterate)) -+ goto out; -+ -+ err = security_file_permission(file, MAY_READ); -+ AuTraceErr(err); -+ if (unlikely(err)) -+ goto out; -+ -+ dentry = file->f_dentry; -+ inode = dentry->d_inode; -+#if 1 -+ mutex_lock(&inode->i_mutex); -+#else -+ err = mutex_lock_killable(&inode->i_mutex); -+ AuTraceErr(err); -+ if (unlikely(err)) -+ goto out; -+#endif -+ -+ arg.sb = inode->i_sb; -+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (unlikely(err)) -+ goto out_mtx; -+ err = au_alive_dir(dentry); -+ if (unlikely(err)) -+ goto out_si; -+ /* todo: reval? */ -+ fi_read_lock(file); -+ -+ err = -EAGAIN; -+ if (unlikely(au_ftest_rdu(cookie->flags, CONT) -+ && cookie->generation != au_figen(file))) -+ goto out_unlock; -+ -+ err = 0; -+ if (!rdu->blk) { -+ rdu->blk = au_sbi(arg.sb)->si_rdblk; -+ if (!rdu->blk) -+ rdu->blk = au_dir_size(file, /*dentry*/NULL); -+ } -+ bend = au_fbstart(file); -+ if (cookie->bindex < bend) -+ cookie->bindex = bend; -+ bend = au_fbend_dir(file); -+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */ -+ for (; !err && cookie->bindex <= bend; -+ cookie->bindex++, cookie->h_pos = 0) { -+ h_file = au_hf_dir(file, cookie->bindex); -+ if (!h_file) -+ continue; -+ -+ au_fclr_rdu(cookie->flags, FULL); -+ err = au_rdu_do(h_file, &arg); -+ AuTraceErr(err); -+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err)) -+ break; -+ } -+ AuDbg("rent %llu\n", rdu->rent); -+ -+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) { -+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH); -+ au_fset_rdu(cookie->flags, CONT); -+ cookie->generation = au_figen(file); -+ } -+ -+ ii_read_lock_child(inode); -+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode))); -+ ii_read_unlock(inode); -+ -+out_unlock: -+ fi_read_unlock(file); -+out_si: -+ si_read_unlock(arg.sb); -+out_mtx: -+ mutex_unlock(&inode->i_mutex); -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu) -+{ -+ int err; -+ ino_t ino; -+ unsigned long long nent; -+ union au_rdu_ent_ul *u; -+ struct au_rdu_ent ent; -+ struct super_block *sb; -+ -+ err = 0; -+ nent = rdu->nent; -+ u = &rdu->ent; -+ sb = file->f_dentry->d_sb; -+ si_read_lock(sb, AuLock_FLUSH); -+ while (nent-- > 0) { -+ /* unnecessary to support mmap_sem since this is a dir */ -+ err = copy_from_user(&ent, u->e, sizeof(ent)); -+ if (!err) -+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino)); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ break; -+ } -+ -+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */ -+ if (!ent.wh) -+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino); -+ else -+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type, -+ &ino); -+ if (unlikely(err)) { -+ AuTraceErr(err); -+ break; -+ } -+ -+ err = __put_user(ino, &u->e->ino); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ break; -+ } -+ u->ul += au_rdu_len(ent.nlen); -+ } -+ si_read_unlock(sb); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_rdu_verify(struct aufs_rdu *rdu) -+{ -+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | " -+ "%llu, b%d, 0x%x, g%u}\n", -+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ], -+ rdu->blk, -+ rdu->rent, rdu->shwh, rdu->full, -+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags, -+ rdu->cookie.generation); -+ -+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu)) -+ return 0; -+ -+ AuDbg("%u:%u\n", -+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu)); -+ return -EINVAL; -+} -+ -+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ long err, e; -+ struct aufs_rdu rdu; -+ void __user *p = (void __user *)arg; -+ -+ err = copy_from_user(&rdu, p, sizeof(rdu)); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ goto out; -+ } -+ err = au_rdu_verify(&rdu); -+ if (unlikely(err)) -+ goto out; -+ -+ switch (cmd) { -+ case AUFS_CTL_RDU: -+ err = au_rdu(file, &rdu); -+ if (unlikely(err)) -+ break; -+ -+ e = copy_to_user(p, &rdu, sizeof(rdu)); -+ if (unlikely(e)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ } -+ break; -+ case AUFS_CTL_RDU_INO: -+ err = au_rdu_ino(file, &rdu); -+ break; -+ -+ default: -+ /* err = -ENOTTY; */ -+ err = -EINVAL; -+ } -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+#ifdef CONFIG_COMPAT -+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -+{ -+ long err, e; -+ struct aufs_rdu rdu; -+ void __user *p = compat_ptr(arg); -+ -+ /* todo: get_user()? */ -+ err = copy_from_user(&rdu, p, sizeof(rdu)); -+ if (unlikely(err)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ goto out; -+ } -+ rdu.ent.e = compat_ptr(rdu.ent.ul); -+ err = au_rdu_verify(&rdu); -+ if (unlikely(err)) -+ goto out; -+ -+ switch (cmd) { -+ case AUFS_CTL_RDU: -+ err = au_rdu(file, &rdu); -+ if (unlikely(err)) -+ break; -+ -+ rdu.ent.ul = ptr_to_compat(rdu.ent.e); -+ rdu.tail.ul = ptr_to_compat(rdu.tail.e); -+ e = copy_to_user(p, &rdu, sizeof(rdu)); -+ if (unlikely(e)) { -+ err = -EFAULT; -+ AuTraceErr(err); -+ } -+ break; -+ case AUFS_CTL_RDU_INO: -+ err = au_rdu_ino(file, &rdu); -+ break; -+ -+ default: -+ /* err = -ENOTTY; */ -+ err = -EINVAL; -+ } -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+#endif -diff --git fs/aufs/rwsem.h fs/aufs/rwsem.h -new file mode 100644 -index 0000000..052171d ---- /dev/null -+++ fs/aufs/rwsem.h -@@ -0,0 +1,178 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * simple read-write semaphore wrappers -+ */ -+ -+#ifndef __AUFS_RWSEM_H__ -+#define __AUFS_RWSEM_H__ -+ -+#ifdef __KERNEL__ -+ -+#include "debug.h" -+ -+struct au_rwsem { -+ struct rw_semaphore rwsem; -+#ifdef CONFIG_AUFS_DEBUG -+ /* just for debugging, not almighty counter */ -+ atomic_t rcnt, wcnt; -+#endif -+}; -+ -+#ifdef CONFIG_AUFS_DEBUG -+#define AuDbgCntInit(rw) do { \ -+ atomic_set(&(rw)->rcnt, 0); \ -+ atomic_set(&(rw)->wcnt, 0); \ -+ smp_mb(); /* atomic set */ \ -+} while (0) -+ -+#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt) -+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0) -+#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt) -+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0) -+#else -+#define AuDbgCntInit(rw) do {} while (0) -+#define AuDbgRcntInc(rw) do {} while (0) -+#define AuDbgRcntDec(rw) do {} while (0) -+#define AuDbgWcntInc(rw) do {} while (0) -+#define AuDbgWcntDec(rw) do {} while (0) -+#endif /* CONFIG_AUFS_DEBUG */ -+ -+/* to debug easier, do not make them inlined functions */ -+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list)) -+/* rwsem_is_locked() is unusable */ -+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0) -+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0) -+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \ -+ && atomic_read(&(rw)->wcnt) <= 0) -+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \ -+ || atomic_read(&(rw)->wcnt)) -+ -+#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key) -+ -+static inline void au_rw_init(struct au_rwsem *rw) -+{ -+ AuDbgCntInit(rw); -+ init_rwsem(&rw->rwsem); -+} -+ -+static inline void au_rw_init_wlock(struct au_rwsem *rw) -+{ -+ au_rw_init(rw); -+ down_write(&rw->rwsem); -+ AuDbgWcntInc(rw); -+} -+ -+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw, -+ unsigned int lsc) -+{ -+ au_rw_init(rw); -+ down_write_nested(&rw->rwsem, lsc); -+ AuDbgWcntInc(rw); -+} -+ -+static inline void au_rw_read_lock(struct au_rwsem *rw) -+{ -+ down_read(&rw->rwsem); -+ AuDbgRcntInc(rw); -+} -+ -+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc) -+{ -+ down_read_nested(&rw->rwsem, lsc); -+ AuDbgRcntInc(rw); -+} -+ -+static inline void au_rw_read_unlock(struct au_rwsem *rw) -+{ -+ AuRwMustReadLock(rw); -+ AuDbgRcntDec(rw); -+ up_read(&rw->rwsem); -+} -+ -+static inline void au_rw_dgrade_lock(struct au_rwsem *rw) -+{ -+ AuRwMustWriteLock(rw); -+ AuDbgRcntInc(rw); -+ AuDbgWcntDec(rw); -+ downgrade_write(&rw->rwsem); -+} -+ -+static inline void au_rw_write_lock(struct au_rwsem *rw) -+{ -+ down_write(&rw->rwsem); -+ AuDbgWcntInc(rw); -+} -+ -+static inline void au_rw_write_lock_nested(struct au_rwsem *rw, -+ unsigned int lsc) -+{ -+ down_write_nested(&rw->rwsem, lsc); -+ AuDbgWcntInc(rw); -+} -+ -+static inline void au_rw_write_unlock(struct au_rwsem *rw) -+{ -+ AuRwMustWriteLock(rw); -+ AuDbgWcntDec(rw); -+ up_write(&rw->rwsem); -+} -+ -+/* why is not _nested version defined */ -+static inline int au_rw_read_trylock(struct au_rwsem *rw) -+{ -+ int ret; -+ -+ ret = down_read_trylock(&rw->rwsem); -+ if (ret) -+ AuDbgRcntInc(rw); -+ return ret; -+} -+ -+static inline int au_rw_write_trylock(struct au_rwsem *rw) -+{ -+ int ret; -+ -+ ret = down_write_trylock(&rw->rwsem); -+ if (ret) -+ AuDbgWcntInc(rw); -+ return ret; -+} -+ -+#undef AuDbgCntInit -+#undef AuDbgRcntInc -+#undef AuDbgRcntDec -+#undef AuDbgWcntInc -+#undef AuDbgWcntDec -+ -+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \ -+static inline void prefix##_read_lock(param) \ -+{ au_rw_read_lock(rwsem); } \ -+static inline void prefix##_write_lock(param) \ -+{ au_rw_write_lock(rwsem); } \ -+static inline int prefix##_read_trylock(param) \ -+{ return au_rw_read_trylock(rwsem); } \ -+static inline int prefix##_write_trylock(param) \ -+{ return au_rw_write_trylock(rwsem); } -+/* why is not _nested version defined */ -+/* static inline void prefix##_read_trylock_nested(param, lsc) -+{ au_rw_read_trylock_nested(rwsem, lsc)); } -+static inline void prefix##_write_trylock_nestd(param, lsc) -+{ au_rw_write_trylock_nested(rwsem, lsc); } */ -+ -+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \ -+static inline void prefix##_read_unlock(param) \ -+{ au_rw_read_unlock(rwsem); } \ -+static inline void prefix##_write_unlock(param) \ -+{ au_rw_write_unlock(rwsem); } \ -+static inline void prefix##_downgrade_lock(param) \ -+{ au_rw_dgrade_lock(rwsem); } -+ -+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \ -+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \ -+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_RWSEM_H__ */ -diff --git fs/aufs/sbinfo.c fs/aufs/sbinfo.c -new file mode 100644 -index 0000000..c388f57 ---- /dev/null -+++ fs/aufs/sbinfo.c -@@ -0,0 +1,338 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * superblock private data -+ */ -+ -+#include "aufs.h" -+ -+/* -+ * they are necessary regardless sysfs is disabled. -+ */ -+void au_si_free(struct kobject *kobj) -+{ -+ int i; -+ struct au_sbinfo *sbinfo; -+ char *locked __maybe_unused; /* debug only */ -+ -+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj); -+ for (i = 0; i < AuPlink_NHASH; i++) -+ AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head)); -+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len)); -+ -+ au_rw_write_lock(&sbinfo->si_rwsem); -+ au_br_free(sbinfo); -+ au_rw_write_unlock(&sbinfo->si_rwsem); -+ -+ AuDebugOn(radix_tree_gang_lookup -+ (&sbinfo->au_si_pid.tree, (void **)&locked, -+ /*first_index*/PID_MAX_DEFAULT - 1, -+ /*max_items*/sizeof(locked)/sizeof(*locked))); -+ -+ kfree(sbinfo->si_branch); -+ kfree(sbinfo->au_si_pid.bitmap); -+ mutex_destroy(&sbinfo->si_xib_mtx); -+ AuRwDestroy(&sbinfo->si_rwsem); -+ -+ kfree(sbinfo); -+} -+ -+int au_si_alloc(struct super_block *sb) -+{ -+ int err, i; -+ struct au_sbinfo *sbinfo; -+ static struct lock_class_key aufs_si; -+ -+ err = -ENOMEM; -+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS); -+ if (unlikely(!sbinfo)) -+ goto out; -+ -+ BUILD_BUG_ON(sizeof(unsigned long) != -+ sizeof(*sbinfo->au_si_pid.bitmap)); -+ sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT), -+ sizeof(*sbinfo->au_si_pid.bitmap), -+ GFP_NOFS); -+ if (unlikely(!sbinfo->au_si_pid.bitmap)) -+ goto out_sbinfo; -+ -+ /* will be reallocated separately */ -+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS); -+ if (unlikely(!sbinfo->si_branch)) -+ goto out_pidmap; -+ -+ err = sysaufs_si_init(sbinfo); -+ if (unlikely(err)) -+ goto out_br; -+ -+ au_nwt_init(&sbinfo->si_nowait); -+ au_rw_init_wlock(&sbinfo->si_rwsem); -+ au_rw_class(&sbinfo->si_rwsem, &aufs_si); -+ spin_lock_init(&sbinfo->au_si_pid.tree_lock); -+ INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL); -+ -+ atomic_long_set(&sbinfo->si_ninodes, 0); -+ atomic_long_set(&sbinfo->si_nfiles, 0); -+ -+ sbinfo->si_bend = -1; -+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2; -+ -+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def; -+ sbinfo->si_wbr_create = AuWbrCreate_Def; -+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup; -+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create; -+ -+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def); -+ -+ sbinfo->si_xino_jiffy = jiffies; -+ sbinfo->si_xino_expire -+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC); -+ mutex_init(&sbinfo->si_xib_mtx); -+ sbinfo->si_xino_brid = -1; -+ /* leave si_xib_last_pindex and si_xib_next_bit */ -+ -+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC); -+ sbinfo->si_rdblk = AUFS_RDBLK_DEF; -+ sbinfo->si_rdhash = AUFS_RDHASH_DEF; -+ sbinfo->si_dirwh = AUFS_DIRWH_DEF; -+ -+ for (i = 0; i < AuPlink_NHASH; i++) -+ au_sphl_init(sbinfo->si_plink + i); -+ init_waitqueue_head(&sbinfo->si_plink_wq); -+ spin_lock_init(&sbinfo->si_plink_maint_lock); -+ -+ au_sphl_init(&sbinfo->si_files); -+ -+ /* leave other members for sysaufs and si_mnt. */ -+ sbinfo->si_sb = sb; -+ sb->s_fs_info = sbinfo; -+ si_pid_set(sb); -+ au_debug_sbinfo_init(sbinfo); -+ return 0; /* success */ -+ -+out_br: -+ kfree(sbinfo->si_branch); -+out_pidmap: -+ kfree(sbinfo->au_si_pid.bitmap); -+out_sbinfo: -+ kfree(sbinfo); -+out: -+ return err; -+} -+ -+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr) -+{ -+ int err, sz; -+ struct au_branch **brp; -+ -+ AuRwMustWriteLock(&sbinfo->si_rwsem); -+ -+ err = -ENOMEM; -+ sz = sizeof(*brp) * (sbinfo->si_bend + 1); -+ if (unlikely(!sz)) -+ sz = sizeof(*brp); -+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS); -+ if (brp) { -+ sbinfo->si_branch = brp; -+ err = 0; -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+unsigned int au_sigen_inc(struct super_block *sb) -+{ -+ unsigned int gen; -+ -+ SiMustWriteLock(sb); -+ -+ gen = ++au_sbi(sb)->si_generation; -+ au_update_digen(sb->s_root); -+ au_update_iigen(sb->s_root->d_inode, /*half*/0); -+ sb->s_root->d_inode->i_version++; -+ return gen; -+} -+ -+aufs_bindex_t au_new_br_id(struct super_block *sb) -+{ -+ aufs_bindex_t br_id; -+ int i; -+ struct au_sbinfo *sbinfo; -+ -+ SiMustWriteLock(sb); -+ -+ sbinfo = au_sbi(sb); -+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) { -+ br_id = ++sbinfo->si_last_br_id; -+ AuDebugOn(br_id < 0); -+ if (br_id && au_br_index(sb, br_id) < 0) -+ return br_id; -+ } -+ -+ return -1; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* it is ok that new 'nwt' tasks are appended while we are sleeping */ -+int si_read_lock(struct super_block *sb, int flags) -+{ -+ int err; -+ -+ err = 0; -+ if (au_ftest_lock(flags, FLUSH)) -+ au_nwt_flush(&au_sbi(sb)->si_nowait); -+ -+ si_noflush_read_lock(sb); -+ err = au_plink_maint(sb, flags); -+ if (unlikely(err)) -+ si_read_unlock(sb); -+ -+ return err; -+} -+ -+int si_write_lock(struct super_block *sb, int flags) -+{ -+ int err; -+ -+ if (au_ftest_lock(flags, FLUSH)) -+ au_nwt_flush(&au_sbi(sb)->si_nowait); -+ -+ si_noflush_write_lock(sb); -+ err = au_plink_maint(sb, flags); -+ if (unlikely(err)) -+ si_write_unlock(sb); -+ -+ return err; -+} -+ -+/* dentry and super_block lock. call at entry point */ -+int aufs_read_lock(struct dentry *dentry, int flags) -+{ -+ int err; -+ struct super_block *sb; -+ -+ sb = dentry->d_sb; -+ err = si_read_lock(sb, flags); -+ if (unlikely(err)) -+ goto out; -+ -+ if (au_ftest_lock(flags, DW)) -+ di_write_lock_child(dentry); -+ else -+ di_read_lock_child(dentry, flags); -+ -+ if (au_ftest_lock(flags, GEN)) { -+ err = au_digen_test(dentry, au_sigen(sb)); -+ AuDebugOn(!err && au_dbrange_test(dentry)); -+ if (unlikely(err)) -+ aufs_read_unlock(dentry, flags); -+ } -+ -+out: -+ return err; -+} -+ -+void aufs_read_unlock(struct dentry *dentry, int flags) -+{ -+ if (au_ftest_lock(flags, DW)) -+ di_write_unlock(dentry); -+ else -+ di_read_unlock(dentry, flags); -+ si_read_unlock(dentry->d_sb); -+} -+ -+void aufs_write_lock(struct dentry *dentry) -+{ -+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW); -+ di_write_lock_child(dentry); -+} -+ -+void aufs_write_unlock(struct dentry *dentry) -+{ -+ di_write_unlock(dentry); -+ si_write_unlock(dentry->d_sb); -+} -+ -+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags) -+{ -+ int err; -+ unsigned int sigen; -+ struct super_block *sb; -+ -+ sb = d1->d_sb; -+ err = si_read_lock(sb, flags); -+ if (unlikely(err)) -+ goto out; -+ -+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIR)); -+ -+ if (au_ftest_lock(flags, GEN)) { -+ sigen = au_sigen(sb); -+ err = au_digen_test(d1, sigen); -+ AuDebugOn(!err && au_dbrange_test(d1)); -+ if (!err) { -+ err = au_digen_test(d2, sigen); -+ AuDebugOn(!err && au_dbrange_test(d2)); -+ } -+ if (unlikely(err)) -+ aufs_read_and_write_unlock2(d1, d2); -+ } -+ -+out: -+ return err; -+} -+ -+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2) -+{ -+ di_write_unlock2(d1, d2); -+ si_read_unlock(d1->d_sb); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int si_pid_test_slow(struct super_block *sb) -+{ -+ void *p; -+ -+ rcu_read_lock(); -+ p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid); -+ rcu_read_unlock(); -+ -+ return (long)!!p; -+} -+ -+void si_pid_set_slow(struct super_block *sb) -+{ -+ int err; -+ struct au_sbinfo *sbinfo; -+ -+ AuDebugOn(si_pid_test_slow(sb)); -+ -+ sbinfo = au_sbi(sb); -+ err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); -+ AuDebugOn(err); -+ spin_lock(&sbinfo->au_si_pid.tree_lock); -+ err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid, -+ /*any valid ptr*/sb); -+ spin_unlock(&sbinfo->au_si_pid.tree_lock); -+ AuDebugOn(err); -+ radix_tree_preload_end(); -+} -+ -+void si_pid_clr_slow(struct super_block *sb) -+{ -+ void *p; -+ struct au_sbinfo *sbinfo; -+ -+ AuDebugOn(!si_pid_test_slow(sb)); -+ -+ sbinfo = au_sbi(sb); -+ spin_lock(&sbinfo->au_si_pid.tree_lock); -+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid); -+ spin_unlock(&sbinfo->au_si_pid.tree_lock); -+} -diff --git fs/aufs/spl.h fs/aufs/spl.h -new file mode 100644 -index 0000000..686473b ---- /dev/null -+++ fs/aufs/spl.h -@@ -0,0 +1,98 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * simple list protected by a spinlock -+ */ -+ -+#ifndef __AUFS_SPL_H__ -+#define __AUFS_SPL_H__ -+ -+#ifdef __KERNEL__ -+ -+struct au_splhead { -+ spinlock_t spin; -+ struct list_head head; -+}; -+ -+static inline void au_spl_init(struct au_splhead *spl) -+{ -+ spin_lock_init(&spl->spin); -+ INIT_LIST_HEAD(&spl->head); -+} -+ -+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl) -+{ -+ spin_lock(&spl->spin); -+ list_add(list, &spl->head); -+ spin_unlock(&spl->spin); -+} -+ -+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl) -+{ -+ spin_lock(&spl->spin); -+ list_del(list); -+ spin_unlock(&spl->spin); -+} -+ -+static inline void au_spl_del_rcu(struct list_head *list, -+ struct au_splhead *spl) -+{ -+ spin_lock(&spl->spin); -+ list_del_rcu(list); -+ spin_unlock(&spl->spin); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_sphlhead { -+ spinlock_t spin; -+ struct hlist_head head; -+}; -+ -+static inline void au_sphl_init(struct au_sphlhead *sphl) -+{ -+ spin_lock_init(&sphl->spin); -+ INIT_HLIST_HEAD(&sphl->head); -+} -+ -+static inline void au_sphl_add(struct hlist_node *hlist, -+ struct au_sphlhead *sphl) -+{ -+ spin_lock(&sphl->spin); -+ hlist_add_head(hlist, &sphl->head); -+ spin_unlock(&sphl->spin); -+} -+ -+static inline void au_sphl_del(struct hlist_node *hlist, -+ struct au_sphlhead *sphl) -+{ -+ spin_lock(&sphl->spin); -+ hlist_del(hlist); -+ spin_unlock(&sphl->spin); -+} -+ -+static inline void au_sphl_del_rcu(struct hlist_node *hlist, -+ struct au_sphlhead *sphl) -+{ -+ spin_lock(&sphl->spin); -+ hlist_del_rcu(hlist); -+ spin_unlock(&sphl->spin); -+} -+ -+static inline unsigned long au_sphl_count(struct au_sphlhead *sphl) -+{ -+ unsigned long cnt; -+ struct hlist_node *pos; -+ -+ cnt = 0; -+ spin_lock(&sphl->spin); -+ hlist_for_each(pos, &sphl->head) -+ cnt++; -+ spin_unlock(&sphl->spin); -+ return cnt; -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_SPL_H__ */ -diff --git fs/aufs/super.c fs/aufs/super.c -new file mode 100644 -index 0000000..5d0f229 ---- /dev/null -+++ fs/aufs/super.c -@@ -0,0 +1,985 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * mount and super_block operations -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "aufs.h" -+ -+/* -+ * super_operations -+ */ -+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused) -+{ -+ struct au_icntnr *c; -+ -+ c = au_cache_alloc_icntnr(); -+ if (c) { -+ au_icntnr_init(c); -+ c->vfs_inode.i_version = 1; /* sigen(sb); */ -+ c->iinfo.ii_hinode = NULL; -+ return &c->vfs_inode; -+ } -+ return NULL; -+} -+ -+static void aufs_destroy_inode_cb(struct rcu_head *head) -+{ -+ struct inode *inode = container_of(head, struct inode, i_rcu); -+ -+ INIT_HLIST_HEAD(&inode->i_dentry); -+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode)); -+} -+ -+static void aufs_destroy_inode(struct inode *inode) -+{ -+ au_iinfo_fin(inode); -+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb); -+} -+ -+struct inode *au_iget_locked(struct super_block *sb, ino_t ino) -+{ -+ struct inode *inode; -+ int err; -+ -+ inode = iget_locked(sb, ino); -+ if (unlikely(!inode)) { -+ inode = ERR_PTR(-ENOMEM); -+ goto out; -+ } -+ if (!(inode->i_state & I_NEW)) -+ goto out; -+ -+ err = au_xigen_new(inode); -+ if (!err) -+ err = au_iinfo_init(inode); -+ if (!err) -+ inode->i_version++; -+ else { -+ iget_failed(inode); -+ inode = ERR_PTR(err); -+ } -+ -+out: -+ /* never return NULL */ -+ AuDebugOn(!inode); -+ AuTraceErrPtr(inode); -+ return inode; -+} -+ -+/* lock free root dinfo */ -+static int au_show_brs(struct seq_file *seq, struct super_block *sb) -+{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ struct path path; -+ struct au_hdentry *hdp; -+ struct au_branch *br; -+ au_br_perm_str_t perm; -+ -+ err = 0; -+ bend = au_sbend(sb); -+ hdp = au_di(sb->s_root)->di_hdentry; -+ for (bindex = 0; !err && bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ path.mnt = au_br_mnt(br); -+ path.dentry = hdp[bindex].hd_dentry; -+ err = au_seq_path(seq, &path); -+ if (err > 0) { -+ au_optstr_br_perm(&perm, br->br_perm); -+ err = seq_printf(seq, "=%s", perm.a); -+ if (err == -1) -+ err = -E2BIG; -+ } -+ if (!err && bindex != bend) -+ err = seq_putc(seq, ':'); -+ } -+ -+ return err; -+} -+ -+static void au_show_wbr_create(struct seq_file *m, int v, -+ struct au_sbinfo *sbinfo) -+{ -+ const char *pat; -+ -+ AuRwMustAnyLock(&sbinfo->si_rwsem); -+ -+ seq_puts(m, ",create="); -+ pat = au_optstr_wbr_create(v); -+ switch (v) { -+ case AuWbrCreate_TDP: -+ case AuWbrCreate_RR: -+ case AuWbrCreate_MFS: -+ case AuWbrCreate_PMFS: -+ seq_puts(m, pat); -+ break; -+ case AuWbrCreate_MFSV: -+ seq_printf(m, /*pat*/"mfs:%lu", -+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire) -+ / MSEC_PER_SEC); -+ break; -+ case AuWbrCreate_PMFSV: -+ seq_printf(m, /*pat*/"pmfs:%lu", -+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire) -+ / MSEC_PER_SEC); -+ break; -+ case AuWbrCreate_MFSRR: -+ seq_printf(m, /*pat*/"mfsrr:%llu", -+ sbinfo->si_wbr_mfs.mfsrr_watermark); -+ break; -+ case AuWbrCreate_MFSRRV: -+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu", -+ sbinfo->si_wbr_mfs.mfsrr_watermark, -+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire) -+ / MSEC_PER_SEC); -+ break; -+ case AuWbrCreate_PMFSRR: -+ seq_printf(m, /*pat*/"pmfsrr:%llu", -+ sbinfo->si_wbr_mfs.mfsrr_watermark); -+ break; -+ case AuWbrCreate_PMFSRRV: -+ seq_printf(m, /*pat*/"pmfsrr:%llu:%lu", -+ sbinfo->si_wbr_mfs.mfsrr_watermark, -+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire) -+ / MSEC_PER_SEC); -+ break; -+ } -+} -+ -+static int au_show_xino(struct seq_file *seq, struct super_block *sb) -+{ -+#ifdef CONFIG_SYSFS -+ return 0; -+#else -+ int err; -+ const int len = sizeof(AUFS_XINO_FNAME) - 1; -+ aufs_bindex_t bindex, brid; -+ struct qstr *name; -+ struct file *f; -+ struct dentry *d, *h_root; -+ struct au_hdentry *hdp; -+ -+ AuRwMustAnyLock(&sbinfo->si_rwsem); -+ -+ err = 0; -+ f = au_sbi(sb)->si_xib; -+ if (!f) -+ goto out; -+ -+ /* stop printing the default xino path on the first writable branch */ -+ h_root = NULL; -+ brid = au_xino_brid(sb); -+ if (brid >= 0) { -+ bindex = au_br_index(sb, brid); -+ hdp = au_di(sb->s_root)->di_hdentry; -+ h_root = hdp[0 + bindex].hd_dentry; -+ } -+ d = f->f_dentry; -+ name = &d->d_name; -+ /* safe ->d_parent because the file is unlinked */ -+ if (d->d_parent == h_root -+ && name->len == len -+ && !memcmp(name->name, AUFS_XINO_FNAME, len)) -+ goto out; -+ -+ seq_puts(seq, ",xino="); -+ err = au_xino_path(seq, f); -+ -+out: -+ return err; -+#endif -+} -+ -+/* seq_file will re-call me in case of too long string */ -+static int aufs_show_options(struct seq_file *m, struct dentry *dentry) -+{ -+ int err; -+ unsigned int mnt_flags, v; -+ struct super_block *sb; -+ struct au_sbinfo *sbinfo; -+ -+#define AuBool(name, str) do { \ -+ v = au_opt_test(mnt_flags, name); \ -+ if (v != au_opt_test(AuOpt_Def, name)) \ -+ seq_printf(m, ",%s" #str, v ? "" : "no"); \ -+} while (0) -+ -+#define AuStr(name, str) do { \ -+ v = mnt_flags & AuOptMask_##name; \ -+ if (v != (AuOpt_Def & AuOptMask_##name)) \ -+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \ -+} while (0) -+ -+#define AuUInt(name, str, val) do { \ -+ if (val != AUFS_##name##_DEF) \ -+ seq_printf(m, "," #str "=%u", val); \ -+} while (0) -+ -+ /* lock free root dinfo */ -+ sb = dentry->d_sb; -+ si_noflush_read_lock(sb); -+ sbinfo = au_sbi(sb); -+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo)); -+ -+ mnt_flags = au_mntflags(sb); -+ if (au_opt_test(mnt_flags, XINO)) { -+ err = au_show_xino(m, sb); -+ if (unlikely(err)) -+ goto out; -+ } else -+ seq_puts(m, ",noxino"); -+ -+ AuBool(TRUNC_XINO, trunc_xino); -+ AuStr(UDBA, udba); -+ AuBool(SHWH, shwh); -+ AuBool(PLINK, plink); -+ AuBool(DIO, dio); -+ AuBool(DIRPERM1, dirperm1); -+ /* AuBool(REFROF, refrof); */ -+ -+ v = sbinfo->si_wbr_create; -+ if (v != AuWbrCreate_Def) -+ au_show_wbr_create(m, v, sbinfo); -+ -+ v = sbinfo->si_wbr_copyup; -+ if (v != AuWbrCopyup_Def) -+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v)); -+ -+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ); -+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ)) -+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w'); -+ -+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh); -+ -+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC; -+ AuUInt(RDCACHE, rdcache, v); -+ -+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk); -+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash); -+ -+ AuBool(SUM, sum); -+ /* AuBool(SUM_W, wsum); */ -+ AuBool(WARN_PERM, warn_perm); -+ AuBool(VERBOSE, verbose); -+ -+out: -+ /* be sure to print "br:" last */ -+ if (!sysaufs_brs) { -+ seq_puts(m, ",br:"); -+ au_show_brs(m, sb); -+ } -+ si_read_unlock(sb); -+ return 0; -+ -+#undef AuBool -+#undef AuStr -+#undef AuUInt -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* sum mode which returns the summation for statfs(2) */ -+ -+static u64 au_add_till_max(u64 a, u64 b) -+{ -+ u64 old; -+ -+ old = a; -+ a += b; -+ if (old <= a) -+ return a; -+ return ULLONG_MAX; -+} -+ -+static u64 au_mul_till_max(u64 a, long mul) -+{ -+ u64 old; -+ -+ old = a; -+ a *= mul; -+ if (old <= a) -+ return a; -+ return ULLONG_MAX; -+} -+ -+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf) -+{ -+ int err; -+ long bsize, factor; -+ u64 blocks, bfree, bavail, files, ffree; -+ aufs_bindex_t bend, bindex, i; -+ unsigned char shared; -+ struct path h_path; -+ struct super_block *h_sb; -+ -+ err = 0; -+ bsize = LONG_MAX; -+ files = 0; -+ ffree = 0; -+ blocks = 0; -+ bfree = 0; -+ bavail = 0; -+ bend = au_sbend(sb); -+ for (bindex = 0; bindex <= bend; bindex++) { -+ h_path.mnt = au_sbr_mnt(sb, bindex); -+ h_sb = h_path.mnt->mnt_sb; -+ shared = 0; -+ for (i = 0; !shared && i < bindex; i++) -+ shared = (au_sbr_sb(sb, i) == h_sb); -+ if (shared) -+ continue; -+ -+ /* sb->s_root for NFS is unreliable */ -+ h_path.dentry = h_path.mnt->mnt_root; -+ err = vfs_statfs(&h_path, buf); -+ if (unlikely(err)) -+ goto out; -+ -+ if (bsize > buf->f_bsize) { -+ /* -+ * we will reduce bsize, so we have to expand blocks -+ * etc. to match them again -+ */ -+ factor = (bsize / buf->f_bsize); -+ blocks = au_mul_till_max(blocks, factor); -+ bfree = au_mul_till_max(bfree, factor); -+ bavail = au_mul_till_max(bavail, factor); -+ bsize = buf->f_bsize; -+ } -+ -+ factor = (buf->f_bsize / bsize); -+ blocks = au_add_till_max(blocks, -+ au_mul_till_max(buf->f_blocks, factor)); -+ bfree = au_add_till_max(bfree, -+ au_mul_till_max(buf->f_bfree, factor)); -+ bavail = au_add_till_max(bavail, -+ au_mul_till_max(buf->f_bavail, factor)); -+ files = au_add_till_max(files, buf->f_files); -+ ffree = au_add_till_max(ffree, buf->f_ffree); -+ } -+ -+ buf->f_bsize = bsize; -+ buf->f_blocks = blocks; -+ buf->f_bfree = bfree; -+ buf->f_bavail = bavail; -+ buf->f_files = files; -+ buf->f_ffree = ffree; -+ buf->f_frsize = 0; -+ -+out: -+ return err; -+} -+ -+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf) -+{ -+ int err; -+ struct path h_path; -+ struct super_block *sb; -+ -+ /* lock free root dinfo */ -+ sb = dentry->d_sb; -+ si_noflush_read_lock(sb); -+ if (!au_opt_test(au_mntflags(sb), SUM)) { -+ /* sb->s_root for NFS is unreliable */ -+ h_path.mnt = au_sbr_mnt(sb, 0); -+ h_path.dentry = h_path.mnt->mnt_root; -+ err = vfs_statfs(&h_path, buf); -+ } else -+ err = au_statfs_sum(sb, buf); -+ si_read_unlock(sb); -+ -+ if (!err) { -+ buf->f_type = AUFS_SUPER_MAGIC; -+ buf->f_namelen = AUFS_MAX_NAMELEN; -+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid)); -+ } -+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */ -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int aufs_sync_fs(struct super_block *sb, int wait) -+{ -+ int err, e; -+ aufs_bindex_t bend, bindex; -+ struct au_branch *br; -+ struct super_block *h_sb; -+ -+ err = 0; -+ si_noflush_read_lock(sb); -+ bend = au_sbend(sb); -+ for (bindex = 0; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ if (!au_br_writable(br->br_perm)) -+ continue; -+ -+ h_sb = au_sbr_sb(sb, bindex); -+ if (h_sb->s_op->sync_fs) { -+ e = h_sb->s_op->sync_fs(h_sb, wait); -+ if (unlikely(e && !err)) -+ err = e; -+ /* go on even if an error happens */ -+ } -+ } -+ si_read_unlock(sb); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* final actions when unmounting a file system */ -+static void aufs_put_super(struct super_block *sb) -+{ -+ struct au_sbinfo *sbinfo; -+ -+ sbinfo = au_sbi(sb); -+ if (!sbinfo) -+ return; -+ -+ dbgaufs_si_fin(sbinfo); -+ kobject_put(&sbinfo->si_kobj); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_array_free(void *array) -+{ -+ if (array) { -+ if (!is_vmalloc_addr(array)) -+ kfree(array); -+ else -+ vfree(array); -+ } -+} -+ -+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg) -+{ -+ void *array; -+ unsigned long long n, sz; -+ -+ array = NULL; -+ n = 0; -+ if (!*hint) -+ goto out; -+ -+ if (*hint > ULLONG_MAX / sizeof(array)) { -+ array = ERR_PTR(-EMFILE); -+ pr_err("hint %llu\n", *hint); -+ goto out; -+ } -+ -+ sz = sizeof(array) * *hint; -+ array = kmalloc(sz, GFP_NOFS); -+ if (unlikely(!array)) -+ array = vmalloc(sz); -+ if (unlikely(!array)) { -+ array = ERR_PTR(-ENOMEM); -+ goto out; -+ } -+ -+ n = cb(array, *hint, arg); -+ AuDebugOn(n > *hint); -+ -+out: -+ *hint = n; -+ return array; -+} -+ -+static unsigned long long au_iarray_cb(void *a, -+ unsigned long long max __maybe_unused, -+ void *arg) -+{ -+ unsigned long long n; -+ struct inode **p, *inode; -+ struct list_head *head; -+ -+ n = 0; -+ p = a; -+ head = arg; -+ spin_lock(&inode_sb_list_lock); -+ list_for_each_entry(inode, head, i_sb_list) { -+ if (!is_bad_inode(inode) -+ && au_ii(inode)->ii_bstart >= 0) { -+ spin_lock(&inode->i_lock); -+ if (atomic_read(&inode->i_count)) { -+ au_igrab(inode); -+ *p++ = inode; -+ n++; -+ AuDebugOn(n > max); -+ } -+ spin_unlock(&inode->i_lock); -+ } -+ } -+ spin_unlock(&inode_sb_list_lock); -+ -+ return n; -+} -+ -+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max) -+{ -+ *max = atomic_long_read(&au_sbi(sb)->si_ninodes); -+ return au_array_alloc(max, au_iarray_cb, &sb->s_inodes); -+} -+ -+void au_iarray_free(struct inode **a, unsigned long long max) -+{ -+ unsigned long long ull; -+ -+ for (ull = 0; ull < max; ull++) -+ iput(a[ull]); -+ au_array_free(a); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * refresh dentry and inode at remount time. -+ */ -+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */ -+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags, -+ struct dentry *parent) -+{ -+ int err; -+ -+ di_write_lock_child(dentry); -+ di_read_lock_parent(parent, AuLock_IR); -+ err = au_refresh_dentry(dentry, parent); -+ if (!err && dir_flags) -+ au_hn_reset(dentry->d_inode, dir_flags); -+ di_read_unlock(parent, AuLock_IR); -+ di_write_unlock(dentry); -+ -+ return err; -+} -+ -+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen, -+ struct au_sbinfo *sbinfo, -+ const unsigned int dir_flags) -+{ -+ int err; -+ struct dentry *parent; -+ struct inode *inode; -+ -+ err = 0; -+ parent = dget_parent(dentry); -+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) { -+ inode = dentry->d_inode; -+ if (inode) { -+ if (!S_ISDIR(inode->i_mode)) -+ err = au_do_refresh(dentry, /*dir_flags*/0, -+ parent); -+ else { -+ err = au_do_refresh(dentry, dir_flags, parent); -+ if (unlikely(err)) -+ au_fset_si(sbinfo, FAILED_REFRESH_DIR); -+ } -+ } else -+ err = au_do_refresh(dentry, /*dir_flags*/0, parent); -+ AuDbgDentry(dentry); -+ } -+ dput(parent); -+ -+ AuTraceErr(err); -+ return err; -+} -+ -+static int au_refresh_d(struct super_block *sb) -+{ -+ int err, i, j, ndentry, e; -+ unsigned int sigen; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry **dentries, *d; -+ struct au_sbinfo *sbinfo; -+ struct dentry *root = sb->s_root; -+ const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1); -+ -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_dcsub_pages(&dpages, root, NULL, NULL); -+ if (unlikely(err)) -+ goto out_dpages; -+ -+ sigen = au_sigen(sb); -+ sbinfo = au_sbi(sb); -+ for (i = 0; i < dpages.ndpage; i++) { -+ dpage = dpages.dpages + i; -+ dentries = dpage->dentries; -+ ndentry = dpage->ndentry; -+ for (j = 0; j < ndentry; j++) { -+ d = dentries[j]; -+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags); -+ if (unlikely(e && !err)) -+ err = e; -+ /* go on even err */ -+ } -+ } -+ -+out_dpages: -+ au_dpages_free(&dpages); -+out: -+ return err; -+} -+ -+static int au_refresh_i(struct super_block *sb) -+{ -+ int err, e; -+ unsigned int sigen; -+ unsigned long long max, ull; -+ struct inode *inode, **array; -+ -+ array = au_iarray_alloc(sb, &max); -+ err = PTR_ERR(array); -+ if (IS_ERR(array)) -+ goto out; -+ -+ err = 0; -+ sigen = au_sigen(sb); -+ for (ull = 0; ull < max; ull++) { -+ inode = array[ull]; -+ if (au_iigen(inode, NULL) != sigen) { -+ ii_write_lock_child(inode); -+ e = au_refresh_hinode_self(inode); -+ ii_write_unlock(inode); -+ if (unlikely(e)) { -+ pr_err("error %d, i%lu\n", e, inode->i_ino); -+ if (!err) -+ err = e; -+ /* go on even if err */ -+ } -+ } -+ } -+ -+ au_iarray_free(array, max); -+ -+out: -+ return err; -+} -+ -+static void au_remount_refresh(struct super_block *sb) -+{ -+ int err, e; -+ unsigned int udba; -+ aufs_bindex_t bindex, bend; -+ struct dentry *root; -+ struct inode *inode; -+ struct au_branch *br; -+ -+ au_sigen_inc(sb); -+ au_fclr_si(au_sbi(sb), FAILED_REFRESH_DIR); -+ -+ root = sb->s_root; -+ DiMustNoWaiters(root); -+ inode = root->d_inode; -+ IiMustNoWaiters(inode); -+ -+ udba = au_opt_udba(sb); -+ bend = au_sbend(sb); -+ for (bindex = 0; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ err = au_hnotify_reset_br(udba, br, br->br_perm); -+ if (unlikely(err)) -+ AuIOErr("hnotify failed on br %d, %d, ignored\n", -+ bindex, err); -+ /* go on even if err */ -+ } -+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1)); -+ -+ di_write_unlock(root); -+ err = au_refresh_d(sb); -+ e = au_refresh_i(sb); -+ if (unlikely(e && !err)) -+ err = e; -+ /* aufs_write_lock() calls ..._child() */ -+ di_write_lock_child(root); -+ -+ au_cpup_attr_all(inode, /*force*/1); -+ -+ if (unlikely(err)) -+ AuIOErr("refresh failed, ignored, %d\n", err); -+} -+ -+/* stop extra interpretation of errno in mount(8), and strange error messages */ -+static int cvt_err(int err) -+{ -+ AuTraceErr(err); -+ -+ switch (err) { -+ case -ENOENT: -+ case -ENOTDIR: -+ case -EEXIST: -+ case -EIO: -+ err = -EINVAL; -+ } -+ return err; -+} -+ -+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data) -+{ -+ int err, do_dx; -+ unsigned int mntflags; -+ struct au_opts opts; -+ struct dentry *root; -+ struct inode *inode; -+ struct au_sbinfo *sbinfo; -+ -+ err = 0; -+ root = sb->s_root; -+ if (!data || !*data) { -+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (!err) { -+ di_write_lock_child(root); -+ err = au_opts_verify(sb, *flags, /*pending*/0); -+ aufs_write_unlock(root); -+ } -+ goto out; -+ } -+ -+ err = -ENOMEM; -+ memset(&opts, 0, sizeof(opts)); -+ opts.opt = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!opts.opt)) -+ goto out; -+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt); -+ opts.flags = AuOpts_REMOUNT; -+ opts.sb_flags = *flags; -+ -+ /* parse it before aufs lock */ -+ err = au_opts_parse(sb, data, &opts); -+ if (unlikely(err)) -+ goto out_opts; -+ -+ sbinfo = au_sbi(sb); -+ inode = root->d_inode; -+ mutex_lock(&inode->i_mutex); -+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM); -+ if (unlikely(err)) -+ goto out_mtx; -+ di_write_lock_child(root); -+ -+ /* au_opts_remount() may return an error */ -+ err = au_opts_remount(sb, &opts); -+ au_opts_free(&opts); -+ -+ if (au_ftest_opts(opts.flags, REFRESH)) -+ au_remount_refresh(sb); -+ -+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) { -+ mntflags = au_mntflags(sb); -+ do_dx = !!au_opt_test(mntflags, DIO); -+ au_dy_arefresh(do_dx); -+ } -+ -+ aufs_write_unlock(root); -+ -+out_mtx: -+ mutex_unlock(&inode->i_mutex); -+out_opts: -+ free_page((unsigned long)opts.opt); -+out: -+ err = cvt_err(err); -+ AuTraceErr(err); -+ return err; -+} -+ -+static const struct super_operations aufs_sop = { -+ .alloc_inode = aufs_alloc_inode, -+ .destroy_inode = aufs_destroy_inode, -+ /* always deleting, no clearing */ -+ .drop_inode = generic_delete_inode, -+ .show_options = aufs_show_options, -+ .statfs = aufs_statfs, -+ .put_super = aufs_put_super, -+ .sync_fs = aufs_sync_fs, -+ .remount_fs = aufs_remount_fs -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int alloc_root(struct super_block *sb) -+{ -+ int err; -+ struct inode *inode; -+ struct dentry *root; -+ -+ err = -ENOMEM; -+ inode = au_iget_locked(sb, AUFS_ROOT_INO); -+ err = PTR_ERR(inode); -+ if (IS_ERR(inode)) -+ goto out; -+ -+ inode->i_op = &aufs_dir_iop; -+ inode->i_fop = &aufs_dir_fop; -+ inode->i_mode = S_IFDIR; -+ set_nlink(inode, 2); -+ unlock_new_inode(inode); -+ -+ root = d_make_root(inode); -+ if (unlikely(!root)) -+ goto out; -+ err = PTR_ERR(root); -+ if (IS_ERR(root)) -+ goto out; -+ -+ err = au_di_init(root); -+ if (!err) { -+ sb->s_root = root; -+ return 0; /* success */ -+ } -+ dput(root); -+ -+out: -+ return err; -+} -+ -+static int aufs_fill_super(struct super_block *sb, void *raw_data, -+ int silent __maybe_unused) -+{ -+ int err; -+ struct au_opts opts; -+ struct dentry *root; -+ struct inode *inode; -+ char *arg = raw_data; -+ -+ if (unlikely(!arg || !*arg)) { -+ err = -EINVAL; -+ pr_err("no arg\n"); -+ goto out; -+ } -+ -+ err = -ENOMEM; -+ memset(&opts, 0, sizeof(opts)); -+ opts.opt = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!opts.opt)) -+ goto out; -+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt); -+ opts.sb_flags = sb->s_flags; -+ -+ err = au_si_alloc(sb); -+ if (unlikely(err)) -+ goto out_opts; -+ -+ /* all timestamps always follow the ones on the branch */ -+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME; -+ sb->s_op = &aufs_sop; -+ sb->s_d_op = &aufs_dop; -+ sb->s_magic = AUFS_SUPER_MAGIC; -+ sb->s_maxbytes = 0; -+ au_export_init(sb); -+ -+ err = alloc_root(sb); -+ if (unlikely(err)) { -+ si_write_unlock(sb); -+ goto out_info; -+ } -+ root = sb->s_root; -+ inode = root->d_inode; -+ -+ /* -+ * actually we can parse options regardless aufs lock here. -+ * but at remount time, parsing must be done before aufs lock. -+ * so we follow the same rule. -+ */ -+ ii_write_lock_parent(inode); -+ aufs_write_unlock(root); -+ err = au_opts_parse(sb, arg, &opts); -+ if (unlikely(err)) -+ goto out_root; -+ -+ /* lock vfs_inode first, then aufs. */ -+ mutex_lock(&inode->i_mutex); -+ aufs_write_lock(root); -+ err = au_opts_mount(sb, &opts); -+ au_opts_free(&opts); -+ aufs_write_unlock(root); -+ mutex_unlock(&inode->i_mutex); -+ if (!err) -+ goto out_opts; /* success */ -+ -+out_root: -+ dput(root); -+ sb->s_root = NULL; -+out_info: -+ dbgaufs_si_fin(au_sbi(sb)); -+ kobject_put(&au_sbi(sb)->si_kobj); -+ sb->s_fs_info = NULL; -+out_opts: -+ free_page((unsigned long)opts.opt); -+out: -+ AuTraceErr(err); -+ err = cvt_err(err); -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags, -+ const char *dev_name __maybe_unused, -+ void *raw_data) -+{ -+ struct dentry *root; -+ struct super_block *sb; -+ -+ /* all timestamps always follow the ones on the branch */ -+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */ -+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super); -+ if (IS_ERR(root)) -+ goto out; -+ -+ sb = root->d_sb; -+ si_write_lock(sb, !AuLock_FLUSH); -+ sysaufs_brs_add(sb, 0); -+ si_write_unlock(sb); -+ au_sbilist_add(sb); -+ -+out: -+ return root; -+} -+ -+static void aufs_kill_sb(struct super_block *sb) -+{ -+ struct au_sbinfo *sbinfo; -+ -+ sbinfo = au_sbi(sb); -+ if (sbinfo) { -+ au_sbilist_del(sb); -+ aufs_write_lock(sb->s_root); -+ if (sbinfo->si_wbr_create_ops->fin) -+ sbinfo->si_wbr_create_ops->fin(sb); -+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) { -+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE); -+ au_remount_refresh(sb); -+ } -+ if (au_opt_test(sbinfo->si_mntflags, PLINK)) -+ au_plink_put(sb, /*verbose*/1); -+ au_xino_clr(sb); -+ sbinfo->si_sb = NULL; -+ aufs_write_unlock(sb->s_root); -+ au_nwt_flush(&sbinfo->si_nowait); -+ } -+ kill_anon_super(sb); -+} -+ -+struct file_system_type aufs_fs_type = { -+ .name = AUFS_FSTYPE, -+ /* a race between rename and others */ -+ .fs_flags = FS_RENAME_DOES_D_MOVE, -+ .mount = aufs_mount, -+ .kill_sb = aufs_kill_sb, -+ /* no need to __module_get() and module_put(). */ -+ .owner = THIS_MODULE, -+}; -diff --git fs/aufs/super.h fs/aufs/super.h -new file mode 100644 -index 0000000..0f84d6d ---- /dev/null -+++ fs/aufs/super.h -@@ -0,0 +1,569 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * super_block operations -+ */ -+ -+#ifndef __AUFS_SUPER_H__ -+#define __AUFS_SUPER_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include "rwsem.h" -+#include "spl.h" -+#include "wkq.h" -+ -+typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *); -+typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t, -+ loff_t *); -+ -+/* policies to select one among multiple writable branches */ -+struct au_wbr_copyup_operations { -+ int (*copyup)(struct dentry *dentry); -+}; -+ -+#define AuWbr_DIR 1 /* target is a dir */ -+#define AuWbr_PARENT (1 << 1) /* always require a parent */ -+ -+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name) -+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; } -+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; } -+ -+struct au_wbr_create_operations { -+ int (*create)(struct dentry *dentry, unsigned int flags); -+ int (*init)(struct super_block *sb); -+ int (*fin)(struct super_block *sb); -+}; -+ -+struct au_wbr_mfs { -+ struct mutex mfs_lock; /* protect this structure */ -+ unsigned long mfs_jiffy; -+ unsigned long mfs_expire; -+ aufs_bindex_t mfs_bindex; -+ -+ unsigned long long mfsrr_bytes; -+ unsigned long long mfsrr_watermark; -+}; -+ -+struct pseudo_link { -+ union { -+ struct hlist_node hlist; -+ struct rcu_head rcu; -+ }; -+ struct inode *inode; -+}; -+ -+#define AuPlink_NHASH 100 -+static inline int au_plink_hash(ino_t ino) -+{ -+ return ino % AuPlink_NHASH; -+} -+ -+struct au_branch; -+struct au_sbinfo { -+ /* nowait tasks in the system-wide workqueue */ -+ struct au_nowait_tasks si_nowait; -+ -+ /* -+ * tried sb->s_umount, but failed due to the dependecy between i_mutex. -+ * rwsem for au_sbinfo is necessary. -+ */ -+ struct au_rwsem si_rwsem; -+ -+ /* prevent recursive locking in deleting inode */ -+ struct { -+ unsigned long *bitmap; -+ spinlock_t tree_lock; -+ struct radix_tree_root tree; -+ } au_si_pid; -+ -+ /* -+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from -+ * remount. -+ */ -+ atomic_long_t si_ninodes, si_nfiles; -+ -+ /* branch management */ -+ unsigned int si_generation; -+ -+ /* see above flags */ -+ unsigned char au_si_status; -+ -+ aufs_bindex_t si_bend; -+ -+ /* dirty trick to keep br_id plus */ -+ unsigned int si_last_br_id : -+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1; -+ struct au_branch **si_branch; -+ -+ /* policy to select a writable branch */ -+ unsigned char si_wbr_copyup; -+ unsigned char si_wbr_create; -+ struct au_wbr_copyup_operations *si_wbr_copyup_ops; -+ struct au_wbr_create_operations *si_wbr_create_ops; -+ -+ /* round robin */ -+ atomic_t si_wbr_rr_next; -+ -+ /* most free space */ -+ struct au_wbr_mfs si_wbr_mfs; -+ -+ /* mount flags */ -+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */ -+ unsigned int si_mntflags; -+ -+ /* external inode number (bitmap and translation table) */ -+ au_readf_t si_xread; -+ au_writef_t si_xwrite; -+ struct file *si_xib; -+ struct mutex si_xib_mtx; /* protect xib members */ -+ unsigned long *si_xib_buf; -+ unsigned long si_xib_last_pindex; -+ int si_xib_next_bit; -+ aufs_bindex_t si_xino_brid; -+ unsigned long si_xino_jiffy; -+ unsigned long si_xino_expire; -+ /* reserved for future use */ -+ /* unsigned long long si_xib_limit; */ /* Max xib file size */ -+ -+#ifdef CONFIG_AUFS_EXPORT -+ /* i_generation */ -+ struct file *si_xigen; -+ atomic_t si_xigen_next; -+#endif -+ -+ /* vdir parameters */ -+ unsigned long si_rdcache; /* max cache time in jiffies */ -+ unsigned int si_rdblk; /* deblk size */ -+ unsigned int si_rdhash; /* hash size */ -+ -+ /* -+ * If the number of whiteouts are larger than si_dirwh, leave all of -+ * them after au_whtmp_ren to reduce the cost of rmdir(2). -+ * future fsck.aufs or kernel thread will remove them later. -+ * Otherwise, remove all whiteouts and the dir in rmdir(2). -+ */ -+ unsigned int si_dirwh; -+ -+ /* -+ * rename(2) a directory with all children. -+ */ -+ /* reserved for future use */ -+ /* int si_rendir; */ -+ -+ /* pseudo_link list */ -+ struct au_sphlhead si_plink[AuPlink_NHASH]; -+ wait_queue_head_t si_plink_wq; -+ spinlock_t si_plink_maint_lock; -+ pid_t si_plink_maint_pid; -+ -+ /* file list */ -+ struct au_sphlhead si_files; -+ -+ /* -+ * sysfs and lifetime management. -+ * this is not a small structure and it may be a waste of memory in case -+ * of sysfs is disabled, particulary when many aufs-es are mounted. -+ * but using sysfs is majority. -+ */ -+ struct kobject si_kobj; -+#ifdef CONFIG_DEBUG_FS -+ struct dentry *si_dbgaufs; -+ struct dentry *si_dbgaufs_plink; -+ struct dentry *si_dbgaufs_xib; -+#ifdef CONFIG_AUFS_EXPORT -+ struct dentry *si_dbgaufs_xigen; -+#endif -+#endif -+ -+#ifdef CONFIG_AUFS_SBILIST -+ struct list_head si_list; -+#endif -+ -+ /* dirty, necessary for unmounting, sysfs and sysrq */ -+ struct super_block *si_sb; -+}; -+ -+/* sbinfo status flags */ -+/* -+ * set true when refresh_dirs() failed at remount time. -+ * then try refreshing dirs at access time again. -+ * if it is false, refreshing dirs at access time is unnecesary -+ */ -+#define AuSi_FAILED_REFRESH_DIR 1 -+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi, -+ unsigned int flag) -+{ -+ AuRwMustAnyLock(&sbi->si_rwsem); -+ return sbi->au_si_status & flag; -+} -+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name) -+#define au_fset_si(sbinfo, name) do { \ -+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \ -+ (sbinfo)->au_si_status |= AuSi_##name; \ -+} while (0) -+#define au_fclr_si(sbinfo, name) do { \ -+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \ -+ (sbinfo)->au_si_status &= ~AuSi_##name; \ -+} while (0) -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* policy to select one among writable branches */ -+#define AuWbrCopyup(sbinfo, ...) \ -+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__)) -+#define AuWbrCreate(sbinfo, ...) \ -+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__)) -+ -+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */ -+#define AuLock_DW 1 /* write-lock dentry */ -+#define AuLock_IR (1 << 1) /* read-lock inode */ -+#define AuLock_IW (1 << 2) /* write-lock inode */ -+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */ -+#define AuLock_DIR (1 << 4) /* target is a dir */ -+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */ -+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */ -+#define AuLock_GEN (1 << 7) /* test digen/iigen */ -+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name) -+#define au_fset_lock(flags, name) \ -+ do { (flags) |= AuLock_##name; } while (0) -+#define au_fclr_lock(flags, name) \ -+ do { (flags) &= ~AuLock_##name; } while (0) -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* super.c */ -+extern struct file_system_type aufs_fs_type; -+struct inode *au_iget_locked(struct super_block *sb, ino_t ino); -+typedef unsigned long long (*au_arraycb_t)(void *array, unsigned long long max, -+ void *arg); -+void au_array_free(void *array); -+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg); -+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max); -+void au_iarray_free(struct inode **a, unsigned long long max); -+ -+/* sbinfo.c */ -+void au_si_free(struct kobject *kobj); -+int au_si_alloc(struct super_block *sb); -+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr); -+ -+unsigned int au_sigen_inc(struct super_block *sb); -+aufs_bindex_t au_new_br_id(struct super_block *sb); -+ -+int si_read_lock(struct super_block *sb, int flags); -+int si_write_lock(struct super_block *sb, int flags); -+int aufs_read_lock(struct dentry *dentry, int flags); -+void aufs_read_unlock(struct dentry *dentry, int flags); -+void aufs_write_lock(struct dentry *dentry); -+void aufs_write_unlock(struct dentry *dentry); -+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags); -+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2); -+ -+int si_pid_test_slow(struct super_block *sb); -+void si_pid_set_slow(struct super_block *sb); -+void si_pid_clr_slow(struct super_block *sb); -+ -+/* wbr_policy.c */ -+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[]; -+extern struct au_wbr_create_operations au_wbr_create_ops[]; -+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst); -+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex); -+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart); -+ -+/* mvdown.c */ -+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg); -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct au_sbinfo *au_sbi(struct super_block *sb) -+{ -+ return sb->s_fs_info; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+#ifdef CONFIG_AUFS_EXPORT -+int au_test_nfsd(void); -+void au_export_init(struct super_block *sb); -+void au_xigen_inc(struct inode *inode); -+int au_xigen_new(struct inode *inode); -+int au_xigen_set(struct super_block *sb, struct file *base); -+void au_xigen_clr(struct super_block *sb); -+ -+static inline int au_busy_or_stale(void) -+{ -+ if (!au_test_nfsd()) -+ return -EBUSY; -+ return -ESTALE; -+} -+#else -+AuStubInt0(au_test_nfsd, void) -+AuStubVoid(au_export_init, struct super_block *sb) -+AuStubVoid(au_xigen_inc, struct inode *inode) -+AuStubInt0(au_xigen_new, struct inode *inode) -+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base) -+AuStubVoid(au_xigen_clr, struct super_block *sb) -+static inline int au_busy_or_stale(void) -+{ -+ return -EBUSY; -+} -+#endif /* CONFIG_AUFS_EXPORT */ -+ -+/* ---------------------------------------------------------------------- */ -+ -+#ifdef CONFIG_AUFS_SBILIST -+/* module.c */ -+extern struct au_splhead au_sbilist; -+ -+static inline void au_sbilist_init(void) -+{ -+ au_spl_init(&au_sbilist); -+} -+ -+static inline void au_sbilist_add(struct super_block *sb) -+{ -+ au_spl_add(&au_sbi(sb)->si_list, &au_sbilist); -+} -+ -+static inline void au_sbilist_del(struct super_block *sb) -+{ -+ au_spl_del(&au_sbi(sb)->si_list, &au_sbilist); -+} -+ -+#ifdef CONFIG_AUFS_MAGIC_SYSRQ -+static inline void au_sbilist_lock(void) -+{ -+ spin_lock(&au_sbilist.spin); -+} -+ -+static inline void au_sbilist_unlock(void) -+{ -+ spin_unlock(&au_sbilist.spin); -+} -+#define AuGFP_SBILIST GFP_ATOMIC -+#else -+AuStubVoid(au_sbilist_lock, void) -+AuStubVoid(au_sbilist_unlock, void) -+#define AuGFP_SBILIST GFP_NOFS -+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */ -+#else -+AuStubVoid(au_sbilist_init, void) -+AuStubVoid(au_sbilist_add, struct super_block*) -+AuStubVoid(au_sbilist_del, struct super_block*) -+AuStubVoid(au_sbilist_lock, void) -+AuStubVoid(au_sbilist_unlock, void) -+#define AuGFP_SBILIST GFP_NOFS -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo) -+{ -+ /* -+ * This function is a dynamic '__init' fucntion actually, -+ * so the tiny check for si_rwsem is unnecessary. -+ */ -+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */ -+#ifdef CONFIG_DEBUG_FS -+ sbinfo->si_dbgaufs = NULL; -+ sbinfo->si_dbgaufs_plink = NULL; -+ sbinfo->si_dbgaufs_xib = NULL; -+#ifdef CONFIG_AUFS_EXPORT -+ sbinfo->si_dbgaufs_xigen = NULL; -+#endif -+#endif -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline pid_t si_pid_bit(void) -+{ -+ /* the origin of pid is 1, but the bitmap's is 0 */ -+ return current->pid - 1; -+} -+ -+static inline int si_pid_test(struct super_block *sb) -+{ -+ pid_t bit; -+ -+ bit = si_pid_bit(); -+ if (bit < PID_MAX_DEFAULT) -+ return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap); -+ else -+ return si_pid_test_slow(sb); -+} -+ -+static inline void si_pid_set(struct super_block *sb) -+{ -+ pid_t bit; -+ -+ bit = si_pid_bit(); -+ if (bit < PID_MAX_DEFAULT) { -+ AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap)); -+ set_bit(bit, au_sbi(sb)->au_si_pid.bitmap); -+ /* smp_mb(); */ -+ } else -+ si_pid_set_slow(sb); -+} -+ -+static inline void si_pid_clr(struct super_block *sb) -+{ -+ pid_t bit; -+ -+ bit = si_pid_bit(); -+ if (bit < PID_MAX_DEFAULT) { -+ AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap)); -+ clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap); -+ /* smp_mb(); */ -+ } else -+ si_pid_clr_slow(sb); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* lock superblock. mainly for entry point functions */ -+/* -+ * __si_read_lock, __si_write_lock, -+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock -+ */ -+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem); -+ -+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem) -+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem) -+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem) -+ -+static inline void si_noflush_read_lock(struct super_block *sb) -+{ -+ __si_read_lock(sb); -+ si_pid_set(sb); -+} -+ -+static inline int si_noflush_read_trylock(struct super_block *sb) -+{ -+ int locked; -+ -+ locked = __si_read_trylock(sb); -+ if (locked) -+ si_pid_set(sb); -+ return locked; -+} -+ -+static inline void si_noflush_write_lock(struct super_block *sb) -+{ -+ __si_write_lock(sb); -+ si_pid_set(sb); -+} -+ -+static inline int si_noflush_write_trylock(struct super_block *sb) -+{ -+ int locked; -+ -+ locked = __si_write_trylock(sb); -+ if (locked) -+ si_pid_set(sb); -+ return locked; -+} -+ -+#if 0 /* unused */ -+static inline int si_read_trylock(struct super_block *sb, int flags) -+{ -+ if (au_ftest_lock(flags, FLUSH)) -+ au_nwt_flush(&au_sbi(sb)->si_nowait); -+ return si_noflush_read_trylock(sb); -+} -+#endif -+ -+static inline void si_read_unlock(struct super_block *sb) -+{ -+ si_pid_clr(sb); -+ __si_read_unlock(sb); -+} -+ -+#if 0 /* unused */ -+static inline int si_write_trylock(struct super_block *sb, int flags) -+{ -+ if (au_ftest_lock(flags, FLUSH)) -+ au_nwt_flush(&au_sbi(sb)->si_nowait); -+ return si_noflush_write_trylock(sb); -+} -+#endif -+ -+static inline void si_write_unlock(struct super_block *sb) -+{ -+ si_pid_clr(sb); -+ __si_write_unlock(sb); -+} -+ -+#if 0 /* unused */ -+static inline void si_downgrade_lock(struct super_block *sb) -+{ -+ __si_downgrade_lock(sb); -+} -+#endif -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline aufs_bindex_t au_sbend(struct super_block *sb) -+{ -+ SiMustAnyLock(sb); -+ return au_sbi(sb)->si_bend; -+} -+ -+static inline unsigned int au_mntflags(struct super_block *sb) -+{ -+ SiMustAnyLock(sb); -+ return au_sbi(sb)->si_mntflags; -+} -+ -+static inline unsigned int au_sigen(struct super_block *sb) -+{ -+ SiMustAnyLock(sb); -+ return au_sbi(sb)->si_generation; -+} -+ -+static inline void au_ninodes_inc(struct super_block *sb) -+{ -+ atomic_long_inc(&au_sbi(sb)->si_ninodes); -+} -+ -+static inline void au_ninodes_dec(struct super_block *sb) -+{ -+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes)); -+ atomic_long_dec(&au_sbi(sb)->si_ninodes); -+} -+ -+static inline void au_nfiles_inc(struct super_block *sb) -+{ -+ atomic_long_inc(&au_sbi(sb)->si_nfiles); -+} -+ -+static inline void au_nfiles_dec(struct super_block *sb) -+{ -+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles)); -+ atomic_long_dec(&au_sbi(sb)->si_nfiles); -+} -+ -+static inline struct au_branch *au_sbr(struct super_block *sb, -+ aufs_bindex_t bindex) -+{ -+ SiMustAnyLock(sb); -+ return au_sbi(sb)->si_branch[0 + bindex]; -+} -+ -+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid) -+{ -+ SiMustWriteLock(sb); -+ au_sbi(sb)->si_xino_brid = brid; -+} -+ -+static inline aufs_bindex_t au_xino_brid(struct super_block *sb) -+{ -+ SiMustAnyLock(sb); -+ return au_sbi(sb)->si_xino_brid; -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_SUPER_H__ */ -diff --git fs/aufs/sysaufs.c fs/aufs/sysaufs.c -new file mode 100644 -index 0000000..b31b8d3 ---- /dev/null -+++ fs/aufs/sysaufs.c -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * sysfs interface and lifetime management -+ * they are necessary regardless sysfs is disabled. -+ */ -+ -+#include -+#include "aufs.h" -+ -+unsigned long sysaufs_si_mask; -+struct kset *sysaufs_kset; -+ -+#define AuSiAttr(_name) { \ -+ .attr = { .name = __stringify(_name), .mode = 0444 }, \ -+ .show = sysaufs_si_##_name, \ -+} -+ -+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path); -+struct attribute *sysaufs_si_attrs[] = { -+ &sysaufs_si_attr_xi_path.attr, -+ NULL, -+}; -+ -+static const struct sysfs_ops au_sbi_ops = { -+ .show = sysaufs_si_show -+}; -+ -+static struct kobj_type au_sbi_ktype = { -+ .release = au_si_free, -+ .sysfs_ops = &au_sbi_ops, -+ .default_attrs = sysaufs_si_attrs -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+int sysaufs_si_init(struct au_sbinfo *sbinfo) -+{ -+ int err; -+ -+ sbinfo->si_kobj.kset = sysaufs_kset; -+ /* cf. sysaufs_name() */ -+ err = kobject_init_and_add -+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL, -+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo)); -+ -+ dbgaufs_si_null(sbinfo); -+ if (!err) { -+ err = dbgaufs_si_init(sbinfo); -+ if (unlikely(err)) -+ kobject_put(&sbinfo->si_kobj); -+ } -+ return err; -+} -+ -+void sysaufs_fin(void) -+{ -+ dbgaufs_fin(); -+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group); -+ kset_unregister(sysaufs_kset); -+} -+ -+int __init sysaufs_init(void) -+{ -+ int err; -+ -+ do { -+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask)); -+ } while (!sysaufs_si_mask); -+ -+ err = -EINVAL; -+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj); -+ if (unlikely(!sysaufs_kset)) -+ goto out; -+ err = PTR_ERR(sysaufs_kset); -+ if (IS_ERR(sysaufs_kset)) -+ goto out; -+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group); -+ if (unlikely(err)) { -+ kset_unregister(sysaufs_kset); -+ goto out; -+ } -+ -+ err = dbgaufs_init(); -+ if (unlikely(err)) -+ sysaufs_fin(); -+out: -+ return err; -+} -diff --git fs/aufs/sysaufs.h fs/aufs/sysaufs.h -new file mode 100644 -index 0000000..d6523c9 ---- /dev/null -+++ fs/aufs/sysaufs.h -@@ -0,0 +1,90 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * sysfs interface and mount lifetime management -+ */ -+ -+#ifndef __SYSAUFS_H__ -+#define __SYSAUFS_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include "module.h" -+ -+struct super_block; -+struct au_sbinfo; -+ -+struct sysaufs_si_attr { -+ struct attribute attr; -+ int (*show)(struct seq_file *seq, struct super_block *sb); -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* sysaufs.c */ -+extern unsigned long sysaufs_si_mask; -+extern struct kset *sysaufs_kset; -+extern struct attribute *sysaufs_si_attrs[]; -+int sysaufs_si_init(struct au_sbinfo *sbinfo); -+int __init sysaufs_init(void); -+void sysaufs_fin(void); -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* some people doesn't like to show a pointer in kernel */ -+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo) -+{ -+ return sysaufs_si_mask ^ (unsigned long)sbinfo; -+} -+ -+#define SysaufsSiNamePrefix "si_" -+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16) -+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name) -+{ -+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx", -+ sysaufs_si_id(sbinfo)); -+} -+ -+struct au_branch; -+#ifdef CONFIG_SYSFS -+/* sysfs.c */ -+extern struct attribute_group *sysaufs_attr_group; -+ -+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb); -+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr, -+ char *buf); -+ -+void sysaufs_br_init(struct au_branch *br); -+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex); -+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex); -+ -+#define sysaufs_brs_init() do {} while (0) -+ -+#else -+#define sysaufs_attr_group NULL -+ -+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb) -+ -+static inline -+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr, -+ char *buf) -+{ -+ return 0; -+} -+ -+AuStubVoid(sysaufs_br_init, struct au_branch *br) -+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex) -+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex) -+ -+static inline void sysaufs_brs_init(void) -+{ -+ sysaufs_brs = 0; -+} -+ -+#endif /* CONFIG_SYSFS */ -+ -+#endif /* __KERNEL__ */ -+#endif /* __SYSAUFS_H__ */ -diff --git fs/aufs/sysfs.c fs/aufs/sysfs.c -new file mode 100644 -index 0000000..4704e7c ---- /dev/null -+++ fs/aufs/sysfs.c -@@ -0,0 +1,256 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * sysfs interface -+ */ -+ -+#include -+#include "aufs.h" -+ -+static struct attribute *au_attr[] = { -+ NULL, /* need to NULL terminate the list of attributes */ -+}; -+ -+static struct attribute_group sysaufs_attr_group_body = { -+ .attrs = au_attr -+}; -+ -+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body; -+ -+/* ---------------------------------------------------------------------- */ -+ -+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb) -+{ -+ int err; -+ -+ SiMustAnyLock(sb); -+ -+ err = 0; -+ if (au_opt_test(au_mntflags(sb), XINO)) { -+ err = au_xino_path(seq, au_sbi(sb)->si_xib); -+ seq_putc(seq, '\n'); -+ } -+ return err; -+} -+ -+/* -+ * the lifetime of branch is independent from the entry under sysfs. -+ * sysfs handles the lifetime of the entry, and never call ->show() after it is -+ * unlinked. -+ */ -+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb, -+ aufs_bindex_t bindex, int idx) -+{ -+ int err; -+ struct path path; -+ struct dentry *root; -+ struct au_branch *br; -+ au_br_perm_str_t perm; -+ -+ AuDbg("b%d\n", bindex); -+ -+ err = 0; -+ root = sb->s_root; -+ di_read_lock_parent(root, !AuLock_IR); -+ br = au_sbr(sb, bindex); -+ -+ switch (idx) { -+ case AuBrSysfs_BR: -+ path.mnt = au_br_mnt(br); -+ path.dentry = au_h_dptr(root, bindex); -+ au_seq_path(seq, &path); -+ di_read_unlock(root, !AuLock_IR); -+ au_optstr_br_perm(&perm, br->br_perm); -+ err = seq_printf(seq, "=%s\n", perm.a); -+ if (err == -1) -+ err = -E2BIG; -+ break; -+ case AuBrSysfs_BRID: -+ err = seq_printf(seq, "%d\n", br->br_id); -+ di_read_unlock(root, !AuLock_IR); -+ if (err == -1) -+ err = -E2BIG; -+ break; -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static struct seq_file *au_seq(char *p, ssize_t len) -+{ -+ struct seq_file *seq; -+ -+ seq = kzalloc(sizeof(*seq), GFP_NOFS); -+ if (seq) { -+ /* mutex_init(&seq.lock); */ -+ seq->buf = p; -+ seq->size = len; -+ return seq; /* success */ -+ } -+ -+ seq = ERR_PTR(-ENOMEM); -+ return seq; -+} -+ -+#define SysaufsBr_PREFIX "br" -+#define SysaufsBrid_PREFIX "brid" -+ -+/* todo: file size may exceed PAGE_SIZE */ -+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr, -+ char *buf) -+{ -+ ssize_t err; -+ int idx; -+ long l; -+ aufs_bindex_t bend; -+ struct au_sbinfo *sbinfo; -+ struct super_block *sb; -+ struct seq_file *seq; -+ char *name; -+ struct attribute **cattr; -+ -+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj); -+ sb = sbinfo->si_sb; -+ -+ /* -+ * prevent a race condition between sysfs and aufs. -+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which -+ * prohibits maintaining the sysfs entries. -+ * hew we acquire read lock after sysfs_get_active_two(). -+ * on the other hand, the remount process may maintain the sysfs/aufs -+ * entries after acquiring write lock. -+ * it can cause a deadlock. -+ * simply we gave up processing read here. -+ */ -+ err = -EBUSY; -+ if (unlikely(!si_noflush_read_trylock(sb))) -+ goto out; -+ -+ seq = au_seq(buf, PAGE_SIZE); -+ err = PTR_ERR(seq); -+ if (IS_ERR(seq)) -+ goto out_unlock; -+ -+ name = (void *)attr->name; -+ cattr = sysaufs_si_attrs; -+ while (*cattr) { -+ if (!strcmp(name, (*cattr)->name)) { -+ err = container_of(*cattr, struct sysaufs_si_attr, attr) -+ ->show(seq, sb); -+ goto out_seq; -+ } -+ cattr++; -+ } -+ -+ if (!strncmp(name, SysaufsBrid_PREFIX, -+ sizeof(SysaufsBrid_PREFIX) - 1)) { -+ idx = AuBrSysfs_BRID; -+ name += sizeof(SysaufsBrid_PREFIX) - 1; -+ } else if (!strncmp(name, SysaufsBr_PREFIX, -+ sizeof(SysaufsBr_PREFIX) - 1)) { -+ idx = AuBrSysfs_BR; -+ name += sizeof(SysaufsBr_PREFIX) - 1; -+ } else -+ BUG(); -+ -+ err = kstrtol(name, 10, &l); -+ if (!err) { -+ bend = au_sbend(sb); -+ if (l <= bend) -+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx); -+ else -+ err = -ENOENT; -+ } -+ -+out_seq: -+ if (!err) { -+ err = seq->count; -+ /* sysfs limit */ -+ if (unlikely(err == PAGE_SIZE)) -+ err = -EFBIG; -+ } -+ kfree(seq); -+out_unlock: -+ si_read_unlock(sb); -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void sysaufs_br_init(struct au_branch *br) -+{ -+ int i; -+ struct au_brsysfs *br_sysfs; -+ struct attribute *attr; -+ -+ br_sysfs = br->br_sysfs; -+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) { -+ attr = &br_sysfs->attr; -+ sysfs_attr_init(attr); -+ attr->name = br_sysfs->name; -+ attr->mode = S_IRUGO; -+ br_sysfs++; -+ } -+} -+ -+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ struct au_branch *br; -+ struct kobject *kobj; -+ struct au_brsysfs *br_sysfs; -+ int i; -+ aufs_bindex_t bend; -+ -+ dbgaufs_brs_del(sb, bindex); -+ -+ if (!sysaufs_brs) -+ return; -+ -+ kobj = &au_sbi(sb)->si_kobj; -+ bend = au_sbend(sb); -+ for (; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ br_sysfs = br->br_sysfs; -+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) { -+ sysfs_remove_file(kobj, &br_sysfs->attr); -+ br_sysfs++; -+ } -+ } -+} -+ -+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ int err, i; -+ aufs_bindex_t bend; -+ struct kobject *kobj; -+ struct au_branch *br; -+ struct au_brsysfs *br_sysfs; -+ -+ dbgaufs_brs_add(sb, bindex); -+ -+ if (!sysaufs_brs) -+ return; -+ -+ kobj = &au_sbi(sb)->si_kobj; -+ bend = au_sbend(sb); -+ for (; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ br_sysfs = br->br_sysfs; -+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name), -+ SysaufsBr_PREFIX "%d", bindex); -+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name), -+ SysaufsBrid_PREFIX "%d", bindex); -+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) { -+ err = sysfs_create_file(kobj, &br_sysfs->attr); -+ if (unlikely(err)) -+ pr_warn("failed %s under sysfs(%d)\n", -+ br_sysfs->name, err); -+ br_sysfs++; -+ } -+ } -+} -diff --git fs/aufs/sysrq.c fs/aufs/sysrq.c -new file mode 100644 -index 0000000..73a27eb ---- /dev/null -+++ fs/aufs/sysrq.c -@@ -0,0 +1,144 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * magic sysrq hanlder -+ */ -+ -+/* #include */ -+#include -+#include "aufs.h" -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void sysrq_sb(struct super_block *sb) -+{ -+ char *plevel; -+ struct au_sbinfo *sbinfo; -+ struct file *file; -+ struct au_sphlhead *files; -+ struct au_finfo *finfo; -+ -+ plevel = au_plevel; -+ au_plevel = KERN_WARNING; -+ -+ /* since we define pr_fmt, call printk directly */ -+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str) -+ -+ sbinfo = au_sbi(sb); -+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo)); -+ pr("superblock\n"); -+ au_dpri_sb(sb); -+ -+#if 0 -+ pr("root dentry\n"); -+ au_dpri_dentry(sb->s_root); -+ pr("root inode\n"); -+ au_dpri_inode(sb->s_root->d_inode); -+#endif -+ -+#if 0 -+ do { -+ int err, i, j, ndentry; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ -+ err = au_dpages_init(&dpages, GFP_ATOMIC); -+ if (unlikely(err)) -+ break; -+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL); -+ if (!err) -+ for (i = 0; i < dpages.ndpage; i++) { -+ dpage = dpages.dpages + i; -+ ndentry = dpage->ndentry; -+ for (j = 0; j < ndentry; j++) -+ au_dpri_dentry(dpage->dentries[j]); -+ } -+ au_dpages_free(&dpages); -+ } while (0); -+#endif -+ -+#if 1 -+ { -+ struct inode *i; -+ -+ pr("isolated inode\n"); -+ spin_lock(&inode_sb_list_lock); -+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) { -+ spin_lock(&i->i_lock); -+ if (1 || hlist_empty(&i->i_dentry)) -+ au_dpri_inode(i); -+ spin_unlock(&i->i_lock); -+ } -+ spin_unlock(&inode_sb_list_lock); -+ } -+#endif -+ pr("files\n"); -+ files = &au_sbi(sb)->si_files; -+ spin_lock(&files->spin); -+ hlist_for_each_entry(finfo, &files->head, fi_hlist) { -+ umode_t mode; -+ -+ file = finfo->fi_file; -+ mode = file_inode(file)->i_mode; -+ if (!special_file(mode)) -+ au_dpri_file(file); -+ } -+ spin_unlock(&files->spin); -+ pr("done\n"); -+ -+#undef pr -+ au_plevel = plevel; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* module parameter */ -+static char *aufs_sysrq_key = "a"; -+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO); -+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME); -+ -+static void au_sysrq(int key __maybe_unused) -+{ -+ struct au_sbinfo *sbinfo; -+ -+ lockdep_off(); -+ au_sbilist_lock(); -+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list) -+ sysrq_sb(sbinfo->si_sb); -+ au_sbilist_unlock(); -+ lockdep_on(); -+} -+ -+static struct sysrq_key_op au_sysrq_op = { -+ .handler = au_sysrq, -+ .help_msg = "Aufs", -+ .action_msg = "Aufs", -+ .enable_mask = SYSRQ_ENABLE_DUMP -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+int __init au_sysrq_init(void) -+{ -+ int err; -+ char key; -+ -+ err = -1; -+ key = *aufs_sysrq_key; -+ if ('a' <= key && key <= 'z') -+ err = register_sysrq_key(key, &au_sysrq_op); -+ if (unlikely(err)) -+ pr_err("err %d, sysrq=%c\n", err, key); -+ return err; -+} -+ -+void au_sysrq_fin(void) -+{ -+ int err; -+ -+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op); -+ if (unlikely(err)) -+ pr_err("err %d (ignored)\n", err); -+} -diff --git fs/aufs/vdir.c fs/aufs/vdir.c -new file mode 100644 -index 0000000..cef77f5 ---- /dev/null -+++ fs/aufs/vdir.c -@@ -0,0 +1,876 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * virtual or vertical directory -+ */ -+ -+#include "aufs.h" -+ -+static unsigned int calc_size(int nlen) -+{ -+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t)); -+} -+ -+static int set_deblk_end(union au_vdir_deblk_p *p, -+ union au_vdir_deblk_p *deblk_end) -+{ -+ if (calc_size(0) <= deblk_end->deblk - p->deblk) { -+ p->de->de_str.len = 0; -+ /* smp_mb(); */ -+ return 0; -+ } -+ return -1; /* error */ -+} -+ -+/* returns true or false */ -+static int is_deblk_end(union au_vdir_deblk_p *p, -+ union au_vdir_deblk_p *deblk_end) -+{ -+ if (calc_size(0) <= deblk_end->deblk - p->deblk) -+ return !p->de->de_str.len; -+ return 1; -+} -+ -+static unsigned char *last_deblk(struct au_vdir *vdir) -+{ -+ return vdir->vd_deblk[vdir->vd_nblk - 1]; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* estimate the apropriate size for name hash table */ -+unsigned int au_rdhash_est(loff_t sz) -+{ -+ unsigned int n; -+ -+ n = UINT_MAX; -+ sz >>= 10; -+ if (sz < n) -+ n = sz; -+ if (sz < AUFS_RDHASH_DEF) -+ n = AUFS_RDHASH_DEF; -+ /* pr_info("n %u\n", n); */ -+ return n; -+} -+ -+/* -+ * the allocated memory has to be freed by -+ * au_nhash_wh_free() or au_nhash_de_free(). -+ */ -+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp) -+{ -+ struct hlist_head *head; -+ unsigned int u; -+ size_t sz; -+ -+ sz = sizeof(*nhash->nh_head) * num_hash; -+ head = kmalloc(sz, gfp); -+ if (head) { -+ nhash->nh_num = num_hash; -+ nhash->nh_head = head; -+ for (u = 0; u < num_hash; u++) -+ INIT_HLIST_HEAD(head++); -+ return 0; /* success */ -+ } -+ -+ return -ENOMEM; -+} -+ -+static void nhash_count(struct hlist_head *head) -+{ -+#if 0 -+ unsigned long n; -+ struct hlist_node *pos; -+ -+ n = 0; -+ hlist_for_each(pos, head) -+ n++; -+ pr_info("%lu\n", n); -+#endif -+} -+ -+static void au_nhash_wh_do_free(struct hlist_head *head) -+{ -+ struct au_vdir_wh *pos; -+ struct hlist_node *node; -+ -+ hlist_for_each_entry_safe(pos, node, head, wh_hash) -+ kfree(pos); -+} -+ -+static void au_nhash_de_do_free(struct hlist_head *head) -+{ -+ struct au_vdir_dehstr *pos; -+ struct hlist_node *node; -+ -+ hlist_for_each_entry_safe(pos, node, head, hash) -+ au_cache_free_vdir_dehstr(pos); -+} -+ -+static void au_nhash_do_free(struct au_nhash *nhash, -+ void (*free)(struct hlist_head *head)) -+{ -+ unsigned int n; -+ struct hlist_head *head; -+ -+ n = nhash->nh_num; -+ if (!n) -+ return; -+ -+ head = nhash->nh_head; -+ while (n-- > 0) { -+ nhash_count(head); -+ free(head++); -+ } -+ kfree(nhash->nh_head); -+} -+ -+void au_nhash_wh_free(struct au_nhash *whlist) -+{ -+ au_nhash_do_free(whlist, au_nhash_wh_do_free); -+} -+ -+static void au_nhash_de_free(struct au_nhash *delist) -+{ -+ au_nhash_do_free(delist, au_nhash_de_do_free); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt, -+ int limit) -+{ -+ int num; -+ unsigned int u, n; -+ struct hlist_head *head; -+ struct au_vdir_wh *pos; -+ -+ num = 0; -+ n = whlist->nh_num; -+ head = whlist->nh_head; -+ for (u = 0; u < n; u++, head++) -+ hlist_for_each_entry(pos, head, wh_hash) -+ if (pos->wh_bindex == btgt && ++num > limit) -+ return 1; -+ return 0; -+} -+ -+static struct hlist_head *au_name_hash(struct au_nhash *nhash, -+ unsigned char *name, -+ unsigned int len) -+{ -+ unsigned int v; -+ /* const unsigned int magic_bit = 12; */ -+ -+ AuDebugOn(!nhash->nh_num || !nhash->nh_head); -+ -+ v = 0; -+ while (len--) -+ v += *name++; -+ /* v = hash_long(v, magic_bit); */ -+ v %= nhash->nh_num; -+ return nhash->nh_head + v; -+} -+ -+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name, -+ int nlen) -+{ -+ return str->len == nlen && !memcmp(str->name, name, nlen); -+} -+ -+/* returns found or not */ -+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen) -+{ -+ struct hlist_head *head; -+ struct au_vdir_wh *pos; -+ struct au_vdir_destr *str; -+ -+ head = au_name_hash(whlist, name, nlen); -+ hlist_for_each_entry(pos, head, wh_hash) { -+ str = &pos->wh_str; -+ AuDbg("%.*s\n", str->len, str->name); -+ if (au_nhash_test_name(str, name, nlen)) -+ return 1; -+ } -+ return 0; -+} -+ -+/* returns found(true) or not */ -+static int test_known(struct au_nhash *delist, char *name, int nlen) -+{ -+ struct hlist_head *head; -+ struct au_vdir_dehstr *pos; -+ struct au_vdir_destr *str; -+ -+ head = au_name_hash(delist, name, nlen); -+ hlist_for_each_entry(pos, head, hash) { -+ str = pos->str; -+ AuDbg("%.*s\n", str->len, str->name); -+ if (au_nhash_test_name(str, name, nlen)) -+ return 1; -+ } -+ return 0; -+} -+ -+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino, -+ unsigned char d_type) -+{ -+#ifdef CONFIG_AUFS_SHWH -+ wh->wh_ino = ino; -+ wh->wh_type = d_type; -+#endif -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino, -+ unsigned int d_type, aufs_bindex_t bindex, -+ unsigned char shwh) -+{ -+ int err; -+ struct au_vdir_destr *str; -+ struct au_vdir_wh *wh; -+ -+ AuDbg("%.*s\n", nlen, name); -+ AuDebugOn(!whlist->nh_num || !whlist->nh_head); -+ -+ err = -ENOMEM; -+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS); -+ if (unlikely(!wh)) -+ goto out; -+ -+ err = 0; -+ wh->wh_bindex = bindex; -+ if (shwh) -+ au_shwh_init_wh(wh, ino, d_type); -+ str = &wh->wh_str; -+ str->len = nlen; -+ memcpy(str->name, name, nlen); -+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen)); -+ /* smp_mb(); */ -+ -+out: -+ return err; -+} -+ -+static int append_deblk(struct au_vdir *vdir) -+{ -+ int err; -+ unsigned long ul; -+ const unsigned int deblk_sz = vdir->vd_deblk_sz; -+ union au_vdir_deblk_p p, deblk_end; -+ unsigned char **o; -+ -+ err = -ENOMEM; -+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1), -+ GFP_NOFS); -+ if (unlikely(!o)) -+ goto out; -+ -+ vdir->vd_deblk = o; -+ p.deblk = kmalloc(deblk_sz, GFP_NOFS); -+ if (p.deblk) { -+ ul = vdir->vd_nblk++; -+ vdir->vd_deblk[ul] = p.deblk; -+ vdir->vd_last.ul = ul; -+ vdir->vd_last.p.deblk = p.deblk; -+ deblk_end.deblk = p.deblk + deblk_sz; -+ err = set_deblk_end(&p, &deblk_end); -+ } -+ -+out: -+ return err; -+} -+ -+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino, -+ unsigned int d_type, struct au_nhash *delist) -+{ -+ int err; -+ unsigned int sz; -+ const unsigned int deblk_sz = vdir->vd_deblk_sz; -+ union au_vdir_deblk_p p, *room, deblk_end; -+ struct au_vdir_dehstr *dehstr; -+ -+ p.deblk = last_deblk(vdir); -+ deblk_end.deblk = p.deblk + deblk_sz; -+ room = &vdir->vd_last.p; -+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk -+ || !is_deblk_end(room, &deblk_end)); -+ -+ sz = calc_size(nlen); -+ if (unlikely(sz > deblk_end.deblk - room->deblk)) { -+ err = append_deblk(vdir); -+ if (unlikely(err)) -+ goto out; -+ -+ p.deblk = last_deblk(vdir); -+ deblk_end.deblk = p.deblk + deblk_sz; -+ /* smp_mb(); */ -+ AuDebugOn(room->deblk != p.deblk); -+ } -+ -+ err = -ENOMEM; -+ dehstr = au_cache_alloc_vdir_dehstr(); -+ if (unlikely(!dehstr)) -+ goto out; -+ -+ dehstr->str = &room->de->de_str; -+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen)); -+ room->de->de_ino = ino; -+ room->de->de_type = d_type; -+ room->de->de_str.len = nlen; -+ memcpy(room->de->de_str.name, name, nlen); -+ -+ err = 0; -+ room->deblk += sz; -+ if (unlikely(set_deblk_end(room, &deblk_end))) -+ err = append_deblk(vdir); -+ /* smp_mb(); */ -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_vdir_free(struct au_vdir *vdir) -+{ -+ unsigned char **deblk; -+ -+ deblk = vdir->vd_deblk; -+ while (vdir->vd_nblk--) -+ kfree(*deblk++); -+ kfree(vdir->vd_deblk); -+ au_cache_free_vdir(vdir); -+} -+ -+static struct au_vdir *alloc_vdir(struct file *file) -+{ -+ struct au_vdir *vdir; -+ struct super_block *sb; -+ int err; -+ -+ sb = file->f_dentry->d_sb; -+ SiMustAnyLock(sb); -+ -+ err = -ENOMEM; -+ vdir = au_cache_alloc_vdir(); -+ if (unlikely(!vdir)) -+ goto out; -+ -+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS); -+ if (unlikely(!vdir->vd_deblk)) -+ goto out_free; -+ -+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk; -+ if (!vdir->vd_deblk_sz) { -+ /* estimate the apropriate size for deblk */ -+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL); -+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */ -+ } -+ vdir->vd_nblk = 0; -+ vdir->vd_version = 0; -+ vdir->vd_jiffy = 0; -+ err = append_deblk(vdir); -+ if (!err) -+ return vdir; /* success */ -+ -+ kfree(vdir->vd_deblk); -+ -+out_free: -+ au_cache_free_vdir(vdir); -+out: -+ vdir = ERR_PTR(err); -+ return vdir; -+} -+ -+static int reinit_vdir(struct au_vdir *vdir) -+{ -+ int err; -+ union au_vdir_deblk_p p, deblk_end; -+ -+ while (vdir->vd_nblk > 1) { -+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]); -+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */ -+ vdir->vd_nblk--; -+ } -+ p.deblk = vdir->vd_deblk[0]; -+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz; -+ err = set_deblk_end(&p, &deblk_end); -+ /* keep vd_dblk_sz */ -+ vdir->vd_last.ul = 0; -+ vdir->vd_last.p.deblk = vdir->vd_deblk[0]; -+ vdir->vd_version = 0; -+ vdir->vd_jiffy = 0; -+ /* smp_mb(); */ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+#define AuFillVdir_CALLED 1 -+#define AuFillVdir_WHABLE (1 << 1) -+#define AuFillVdir_SHWH (1 << 2) -+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name) -+#define au_fset_fillvdir(flags, name) \ -+ do { (flags) |= AuFillVdir_##name; } while (0) -+#define au_fclr_fillvdir(flags, name) \ -+ do { (flags) &= ~AuFillVdir_##name; } while (0) -+ -+#ifndef CONFIG_AUFS_SHWH -+#undef AuFillVdir_SHWH -+#define AuFillVdir_SHWH 0 -+#endif -+ -+struct fillvdir_arg { -+ struct dir_context ctx; -+ struct file *file; -+ struct au_vdir *vdir; -+ struct au_nhash delist; -+ struct au_nhash whlist; -+ aufs_bindex_t bindex; -+ unsigned int flags; -+ int err; -+}; -+ -+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen, -+ loff_t offset __maybe_unused, u64 h_ino, -+ unsigned int d_type) -+{ -+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx); -+ char *name = (void *)__name; -+ struct super_block *sb; -+ ino_t ino; -+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH); -+ -+ arg->err = 0; -+ sb = arg->file->f_dentry->d_sb; -+ au_fset_fillvdir(arg->flags, CALLED); -+ /* smp_mb(); */ -+ if (nlen <= AUFS_WH_PFX_LEN -+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { -+ if (test_known(&arg->delist, name, nlen) -+ || au_nhash_test_known_wh(&arg->whlist, name, nlen)) -+ goto out; /* already exists or whiteouted */ -+ -+ sb = arg->file->f_dentry->d_sb; -+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino); -+ if (!arg->err) { -+ if (unlikely(nlen > AUFS_MAX_NAMELEN)) -+ d_type = DT_UNKNOWN; -+ arg->err = append_de(arg->vdir, name, nlen, ino, -+ d_type, &arg->delist); -+ } -+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) { -+ name += AUFS_WH_PFX_LEN; -+ nlen -= AUFS_WH_PFX_LEN; -+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen)) -+ goto out; /* already whiteouted */ -+ -+ if (shwh) -+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type, -+ &ino); -+ if (!arg->err) { -+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN) -+ d_type = DT_UNKNOWN; -+ arg->err = au_nhash_append_wh -+ (&arg->whlist, name, nlen, ino, d_type, -+ arg->bindex, shwh); -+ } -+ } -+ -+out: -+ if (!arg->err) -+ arg->vdir->vd_jiffy = jiffies; -+ /* smp_mb(); */ -+ AuTraceErr(arg->err); -+ return arg->err; -+} -+ -+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir, -+ struct au_nhash *whlist, struct au_nhash *delist) -+{ -+#ifdef CONFIG_AUFS_SHWH -+ int err; -+ unsigned int nh, u; -+ struct hlist_head *head; -+ struct au_vdir_wh *pos; -+ struct hlist_node *n; -+ char *p, *o; -+ struct au_vdir_destr *destr; -+ -+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH)); -+ -+ err = -ENOMEM; -+ o = p = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!p)) -+ goto out; -+ -+ err = 0; -+ nh = whlist->nh_num; -+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN); -+ p += AUFS_WH_PFX_LEN; -+ for (u = 0; u < nh; u++) { -+ head = whlist->nh_head + u; -+ hlist_for_each_entry_safe(pos, n, head, wh_hash) { -+ destr = &pos->wh_str; -+ memcpy(p, destr->name, destr->len); -+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN, -+ pos->wh_ino, pos->wh_type, delist); -+ if (unlikely(err)) -+ break; -+ } -+ } -+ -+ free_page((unsigned long)o); -+ -+out: -+ AuTraceErr(err); -+ return err; -+#else -+ return 0; -+#endif -+} -+ -+static int au_do_read_vdir(struct fillvdir_arg *arg) -+{ -+ int err; -+ unsigned int rdhash; -+ loff_t offset; -+ aufs_bindex_t bend, bindex, bstart; -+ unsigned char shwh; -+ struct file *hf, *file; -+ struct super_block *sb; -+ -+ file = arg->file; -+ sb = file->f_dentry->d_sb; -+ SiMustAnyLock(sb); -+ -+ rdhash = au_sbi(sb)->si_rdhash; -+ if (!rdhash) -+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL)); -+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS); -+ if (unlikely(err)) -+ goto out_delist; -+ -+ err = 0; -+ arg->flags = 0; -+ shwh = 0; -+ if (au_opt_test(au_mntflags(sb), SHWH)) { -+ shwh = 1; -+ au_fset_fillvdir(arg->flags, SHWH); -+ } -+ bstart = au_fbstart(file); -+ bend = au_fbend_dir(file); -+ for (bindex = bstart; !err && bindex <= bend; bindex++) { -+ hf = au_hf_dir(file, bindex); -+ if (!hf) -+ continue; -+ -+ offset = vfsub_llseek(hf, 0, SEEK_SET); -+ err = offset; -+ if (unlikely(offset)) -+ break; -+ -+ arg->bindex = bindex; -+ au_fclr_fillvdir(arg->flags, WHABLE); -+ if (shwh -+ || (bindex != bend -+ && au_br_whable(au_sbr_perm(sb, bindex)))) -+ au_fset_fillvdir(arg->flags, WHABLE); -+ do { -+ arg->err = 0; -+ au_fclr_fillvdir(arg->flags, CALLED); -+ /* smp_mb(); */ -+ err = vfsub_iterate_dir(hf, &arg->ctx); -+ if (err >= 0) -+ err = arg->err; -+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED)); -+ -+ /* -+ * dir_relax() may be good for concurrency, but aufs should not -+ * use it since it will cause a lockdep problem. -+ */ -+ } -+ -+ if (!err && shwh) -+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist); -+ -+ au_nhash_wh_free(&arg->whlist); -+ -+out_delist: -+ au_nhash_de_free(&arg->delist); -+out: -+ return err; -+} -+ -+static int read_vdir(struct file *file, int may_read) -+{ -+ int err; -+ unsigned long expire; -+ unsigned char do_read; -+ struct fillvdir_arg arg = { -+ .ctx = { -+ .actor = au_diractor(fillvdir) -+ } -+ }; -+ struct inode *inode; -+ struct au_vdir *vdir, *allocated; -+ -+ err = 0; -+ inode = file_inode(file); -+ IMustLock(inode); -+ SiMustAnyLock(inode->i_sb); -+ -+ allocated = NULL; -+ do_read = 0; -+ expire = au_sbi(inode->i_sb)->si_rdcache; -+ vdir = au_ivdir(inode); -+ if (!vdir) { -+ do_read = 1; -+ vdir = alloc_vdir(file); -+ err = PTR_ERR(vdir); -+ if (IS_ERR(vdir)) -+ goto out; -+ err = 0; -+ allocated = vdir; -+ } else if (may_read -+ && (inode->i_version != vdir->vd_version -+ || time_after(jiffies, vdir->vd_jiffy + expire))) { -+ do_read = 1; -+ err = reinit_vdir(vdir); -+ if (unlikely(err)) -+ goto out; -+ } -+ -+ if (!do_read) -+ return 0; /* success */ -+ -+ arg.file = file; -+ arg.vdir = vdir; -+ err = au_do_read_vdir(&arg); -+ if (!err) { -+ /* file->f_pos = 0; */ /* todo: ctx->pos? */ -+ vdir->vd_version = inode->i_version; -+ vdir->vd_last.ul = 0; -+ vdir->vd_last.p.deblk = vdir->vd_deblk[0]; -+ if (allocated) -+ au_set_ivdir(inode, allocated); -+ } else if (allocated) -+ au_vdir_free(allocated); -+ -+out: -+ return err; -+} -+ -+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src) -+{ -+ int err, rerr; -+ unsigned long ul, n; -+ const unsigned int deblk_sz = src->vd_deblk_sz; -+ -+ AuDebugOn(tgt->vd_nblk != 1); -+ -+ err = -ENOMEM; -+ if (tgt->vd_nblk < src->vd_nblk) { -+ unsigned char **p; -+ -+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk, -+ GFP_NOFS); -+ if (unlikely(!p)) -+ goto out; -+ tgt->vd_deblk = p; -+ } -+ -+ if (tgt->vd_deblk_sz != deblk_sz) { -+ unsigned char *p; -+ -+ tgt->vd_deblk_sz = deblk_sz; -+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS); -+ if (unlikely(!p)) -+ goto out; -+ tgt->vd_deblk[0] = p; -+ } -+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz); -+ tgt->vd_version = src->vd_version; -+ tgt->vd_jiffy = src->vd_jiffy; -+ -+ n = src->vd_nblk; -+ for (ul = 1; ul < n; ul++) { -+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz, -+ GFP_NOFS); -+ if (unlikely(!tgt->vd_deblk[ul])) -+ goto out; -+ tgt->vd_nblk++; -+ } -+ tgt->vd_nblk = n; -+ tgt->vd_last.ul = tgt->vd_last.ul; -+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul]; -+ tgt->vd_last.p.deblk += src->vd_last.p.deblk -+ - src->vd_deblk[src->vd_last.ul]; -+ /* smp_mb(); */ -+ return 0; /* success */ -+ -+out: -+ rerr = reinit_vdir(tgt); -+ BUG_ON(rerr); -+ return err; -+} -+ -+int au_vdir_init(struct file *file) -+{ -+ int err; -+ struct inode *inode; -+ struct au_vdir *vdir_cache, *allocated; -+ -+ /* test file->f_pos here instead of ctx->pos */ -+ err = read_vdir(file, !file->f_pos); -+ if (unlikely(err)) -+ goto out; -+ -+ allocated = NULL; -+ vdir_cache = au_fvdir_cache(file); -+ if (!vdir_cache) { -+ vdir_cache = alloc_vdir(file); -+ err = PTR_ERR(vdir_cache); -+ if (IS_ERR(vdir_cache)) -+ goto out; -+ allocated = vdir_cache; -+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) { -+ /* test file->f_pos here instead of ctx->pos */ -+ err = reinit_vdir(vdir_cache); -+ if (unlikely(err)) -+ goto out; -+ } else -+ return 0; /* success */ -+ -+ inode = file_inode(file); -+ err = copy_vdir(vdir_cache, au_ivdir(inode)); -+ if (!err) { -+ file->f_version = inode->i_version; -+ if (allocated) -+ au_set_fvdir_cache(file, allocated); -+ } else if (allocated) -+ au_vdir_free(allocated); -+ -+out: -+ return err; -+} -+ -+static loff_t calc_offset(struct au_vdir *vdir) -+{ -+ loff_t offset; -+ union au_vdir_deblk_p p; -+ -+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul]; -+ offset = vdir->vd_last.p.deblk - p.deblk; -+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul; -+ return offset; -+} -+ -+/* returns true or false */ -+static int seek_vdir(struct file *file, struct dir_context *ctx) -+{ -+ int valid; -+ unsigned int deblk_sz; -+ unsigned long ul, n; -+ loff_t offset; -+ union au_vdir_deblk_p p, deblk_end; -+ struct au_vdir *vdir_cache; -+ -+ valid = 1; -+ vdir_cache = au_fvdir_cache(file); -+ offset = calc_offset(vdir_cache); -+ AuDbg("offset %lld\n", offset); -+ if (ctx->pos == offset) -+ goto out; -+ -+ vdir_cache->vd_last.ul = 0; -+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0]; -+ if (!ctx->pos) -+ goto out; -+ -+ valid = 0; -+ deblk_sz = vdir_cache->vd_deblk_sz; -+ ul = div64_u64(ctx->pos, deblk_sz); -+ AuDbg("ul %lu\n", ul); -+ if (ul >= vdir_cache->vd_nblk) -+ goto out; -+ -+ n = vdir_cache->vd_nblk; -+ for (; ul < n; ul++) { -+ p.deblk = vdir_cache->vd_deblk[ul]; -+ deblk_end.deblk = p.deblk + deblk_sz; -+ offset = ul; -+ offset *= deblk_sz; -+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) { -+ unsigned int l; -+ -+ l = calc_size(p.de->de_str.len); -+ offset += l; -+ p.deblk += l; -+ } -+ if (!is_deblk_end(&p, &deblk_end)) { -+ valid = 1; -+ vdir_cache->vd_last.ul = ul; -+ vdir_cache->vd_last.p = p; -+ break; -+ } -+ } -+ -+out: -+ /* smp_mb(); */ -+ AuTraceErr(!valid); -+ return valid; -+} -+ -+int au_vdir_fill_de(struct file *file, struct dir_context *ctx) -+{ -+ unsigned int l, deblk_sz; -+ union au_vdir_deblk_p deblk_end; -+ struct au_vdir *vdir_cache; -+ struct au_vdir_de *de; -+ -+ vdir_cache = au_fvdir_cache(file); -+ if (!seek_vdir(file, ctx)) -+ return 0; -+ -+ deblk_sz = vdir_cache->vd_deblk_sz; -+ while (1) { -+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul]; -+ deblk_end.deblk += deblk_sz; -+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) { -+ de = vdir_cache->vd_last.p.de; -+ AuDbg("%.*s, off%lld, i%lu, dt%d\n", -+ de->de_str.len, de->de_str.name, ctx->pos, -+ (unsigned long)de->de_ino, de->de_type); -+ if (unlikely(!dir_emit(ctx, de->de_str.name, -+ de->de_str.len, de->de_ino, -+ de->de_type))) { -+ /* todo: ignore the error caused by udba? */ -+ /* return err; */ -+ return 0; -+ } -+ -+ l = calc_size(de->de_str.len); -+ vdir_cache->vd_last.p.deblk += l; -+ ctx->pos += l; -+ } -+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) { -+ vdir_cache->vd_last.ul++; -+ vdir_cache->vd_last.p.deblk -+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul]; -+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul; -+ continue; -+ } -+ break; -+ } -+ -+ /* smp_mb(); */ -+ return 0; -+} -diff --git fs/aufs/vfsub.c fs/aufs/vfsub.c -new file mode 100644 -index 0000000..12de43a ---- /dev/null -+++ fs/aufs/vfsub.c -@@ -0,0 +1,769 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * sub-routines for VFS -+ */ -+ -+#include -+#include -+#include -+#include -+#include "aufs.h" -+ -+int vfsub_update_h_iattr(struct path *h_path, int *did) -+{ -+ int err; -+ struct kstat st; -+ struct super_block *h_sb; -+ -+ /* for remote fs, leave work for its getattr or d_revalidate */ -+ /* for bad i_attr fs, handle them in aufs_getattr() */ -+ /* still some fs may acquire i_mutex. we need to skip them */ -+ err = 0; -+ if (!did) -+ did = &err; -+ h_sb = h_path->dentry->d_sb; -+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb)); -+ if (*did) -+ err = vfs_getattr(h_path, &st); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct file *vfsub_dentry_open(struct path *path, int flags) -+{ -+ struct file *file; -+ -+ file = dentry_open(path, flags /* | __FMODE_NONOTIFY */, -+ current_cred()); -+ if (!IS_ERR_OR_NULL(file) -+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) -+ i_readcount_inc(path->dentry->d_inode); -+ -+ return file; -+} -+ -+struct file *vfsub_filp_open(const char *path, int oflags, int mode) -+{ -+ struct file *file; -+ -+ lockdep_off(); -+ file = filp_open(path, -+ oflags /* | __FMODE_NONOTIFY */, -+ mode); -+ lockdep_on(); -+ if (IS_ERR(file)) -+ goto out; -+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/ -+ -+out: -+ return file; -+} -+ -+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path) -+{ -+ int err; -+ -+ err = kern_path(name, flags, path); -+ if (!err && path->dentry->d_inode) -+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/ -+ return err; -+} -+ -+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent, -+ int len) -+{ -+ struct path path = { -+ .mnt = NULL -+ }; -+ -+ /* VFS checks it too, but by WARN_ON_ONCE() */ -+ IMustLock(parent->d_inode); -+ -+ path.dentry = lookup_one_len(name, parent, len); -+ if (IS_ERR(path.dentry)) -+ goto out; -+ if (path.dentry->d_inode) -+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/ -+ -+out: -+ AuTraceErrPtr(path.dentry); -+ return path.dentry; -+} -+ -+void vfsub_call_lkup_one(void *args) -+{ -+ struct vfsub_lkup_one_args *a = args; -+ *a->errp = vfsub_lkup_one(a->name, a->parent); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1, -+ struct dentry *d2, struct au_hinode *hdir2) -+{ -+ struct dentry *d; -+ -+ lockdep_off(); -+ d = lock_rename(d1, d2); -+ lockdep_on(); -+ au_hn_suspend(hdir1); -+ if (hdir1 != hdir2) -+ au_hn_suspend(hdir2); -+ -+ return d; -+} -+ -+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1, -+ struct dentry *d2, struct au_hinode *hdir2) -+{ -+ au_hn_resume(hdir1); -+ if (hdir1 != hdir2) -+ au_hn_resume(hdir2); -+ lockdep_off(); -+ unlock_rename(d1, d2); -+ lockdep_on(); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl) -+{ -+ int err; -+ struct dentry *d; -+ -+ IMustLock(dir); -+ -+ d = path->dentry; -+ path->dentry = d->d_parent; -+ err = security_path_mknod(path, d, mode, 0); -+ path->dentry = d; -+ if (unlikely(err)) -+ goto out; -+ -+ err = vfs_create(dir, path->dentry, mode, want_excl); -+ if (!err) { -+ struct path tmp = *path; -+ int did; -+ -+ vfsub_update_h_iattr(&tmp, &did); -+ if (did) { -+ tmp.dentry = path->dentry->d_parent; -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); -+ } -+ /*ignore*/ -+ } -+ -+out: -+ return err; -+} -+ -+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname) -+{ -+ int err; -+ struct dentry *d; -+ -+ IMustLock(dir); -+ -+ d = path->dentry; -+ path->dentry = d->d_parent; -+ err = security_path_symlink(path, d, symname); -+ path->dentry = d; -+ if (unlikely(err)) -+ goto out; -+ -+ err = vfs_symlink(dir, path->dentry, symname); -+ if (!err) { -+ struct path tmp = *path; -+ int did; -+ -+ vfsub_update_h_iattr(&tmp, &did); -+ if (did) { -+ tmp.dentry = path->dentry->d_parent; -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); -+ } -+ /*ignore*/ -+ } -+ -+out: -+ return err; -+} -+ -+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev) -+{ -+ int err; -+ struct dentry *d; -+ -+ IMustLock(dir); -+ -+ d = path->dentry; -+ path->dentry = d->d_parent; -+ err = security_path_mknod(path, d, mode, new_encode_dev(dev)); -+ path->dentry = d; -+ if (unlikely(err)) -+ goto out; -+ -+ err = vfs_mknod(dir, path->dentry, mode, dev); -+ if (!err) { -+ struct path tmp = *path; -+ int did; -+ -+ vfsub_update_h_iattr(&tmp, &did); -+ if (did) { -+ tmp.dentry = path->dentry->d_parent; -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); -+ } -+ /*ignore*/ -+ } -+ -+out: -+ return err; -+} -+ -+static int au_test_nlink(struct inode *inode) -+{ -+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */ -+ -+ if (!au_test_fs_no_limit_nlink(inode->i_sb) -+ || inode->i_nlink < link_max) -+ return 0; -+ return -EMLINK; -+} -+ -+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path, -+ struct inode **delegated_inode) -+{ -+ int err; -+ struct dentry *d; -+ -+ IMustLock(dir); -+ -+ err = au_test_nlink(src_dentry->d_inode); -+ if (unlikely(err)) -+ return err; -+ -+ /* we don't call may_linkat() */ -+ d = path->dentry; -+ path->dentry = d->d_parent; -+ err = security_path_link(src_dentry, path, d); -+ path->dentry = d; -+ if (unlikely(err)) -+ goto out; -+ -+ lockdep_off(); -+ err = vfs_link(src_dentry, dir, path->dentry, delegated_inode); -+ lockdep_on(); -+ if (!err) { -+ struct path tmp = *path; -+ int did; -+ -+ /* fuse has different memory inode for the same inumber */ -+ vfsub_update_h_iattr(&tmp, &did); -+ if (did) { -+ tmp.dentry = path->dentry->d_parent; -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); -+ tmp.dentry = src_dentry; -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); -+ } -+ /*ignore*/ -+ } -+ -+out: -+ return err; -+} -+ -+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry, -+ struct inode *dir, struct path *path, -+ struct inode **delegated_inode) -+{ -+ int err; -+ struct path tmp = { -+ .mnt = path->mnt -+ }; -+ struct dentry *d; -+ -+ IMustLock(dir); -+ IMustLock(src_dir); -+ -+ d = path->dentry; -+ path->dentry = d->d_parent; -+ tmp.dentry = src_dentry->d_parent; -+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0); -+ path->dentry = d; -+ if (unlikely(err)) -+ goto out; -+ -+ lockdep_off(); -+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry, -+ delegated_inode, /*flags*/0); -+ lockdep_on(); -+ if (!err) { -+ int did; -+ -+ tmp.dentry = d->d_parent; -+ vfsub_update_h_iattr(&tmp, &did); -+ if (did) { -+ tmp.dentry = src_dentry; -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); -+ tmp.dentry = src_dentry->d_parent; -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); -+ } -+ /*ignore*/ -+ } -+ -+out: -+ return err; -+} -+ -+int vfsub_mkdir(struct inode *dir, struct path *path, int mode) -+{ -+ int err; -+ struct dentry *d; -+ -+ IMustLock(dir); -+ -+ d = path->dentry; -+ path->dentry = d->d_parent; -+ err = security_path_mkdir(path, d, mode); -+ path->dentry = d; -+ if (unlikely(err)) -+ goto out; -+ -+ err = vfs_mkdir(dir, path->dentry, mode); -+ if (!err) { -+ struct path tmp = *path; -+ int did; -+ -+ vfsub_update_h_iattr(&tmp, &did); -+ if (did) { -+ tmp.dentry = path->dentry->d_parent; -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); -+ } -+ /*ignore*/ -+ } -+ -+out: -+ return err; -+} -+ -+int vfsub_rmdir(struct inode *dir, struct path *path) -+{ -+ int err; -+ struct dentry *d; -+ -+ IMustLock(dir); -+ -+ d = path->dentry; -+ path->dentry = d->d_parent; -+ err = security_path_rmdir(path, d); -+ path->dentry = d; -+ if (unlikely(err)) -+ goto out; -+ -+ lockdep_off(); -+ err = vfs_rmdir(dir, path->dentry); -+ lockdep_on(); -+ if (!err) { -+ struct path tmp = { -+ .dentry = path->dentry->d_parent, -+ .mnt = path->mnt -+ }; -+ -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/ -+ } -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* todo: support mmap_sem? */ -+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count, -+ loff_t *ppos) -+{ -+ ssize_t err; -+ -+ lockdep_off(); -+ err = vfs_read(file, ubuf, count, ppos); -+ lockdep_on(); -+ if (err >= 0) -+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/ -+ return err; -+} -+ -+/* todo: kernel_read()? */ -+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count, -+ loff_t *ppos) -+{ -+ ssize_t err; -+ mm_segment_t oldfs; -+ union { -+ void *k; -+ char __user *u; -+ } buf; -+ -+ buf.k = kbuf; -+ oldfs = get_fs(); -+ set_fs(KERNEL_DS); -+ err = vfsub_read_u(file, buf.u, count, ppos); -+ set_fs(oldfs); -+ return err; -+} -+ -+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count, -+ loff_t *ppos) -+{ -+ ssize_t err; -+ -+ lockdep_off(); -+ err = vfs_write(file, ubuf, count, ppos); -+ lockdep_on(); -+ if (err >= 0) -+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/ -+ return err; -+} -+ -+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos) -+{ -+ ssize_t err; -+ mm_segment_t oldfs; -+ union { -+ void *k; -+ const char __user *u; -+ } buf; -+ -+ buf.k = kbuf; -+ oldfs = get_fs(); -+ set_fs(KERNEL_DS); -+ err = vfsub_write_u(file, buf.u, count, ppos); -+ set_fs(oldfs); -+ return err; -+} -+ -+int vfsub_flush(struct file *file, fl_owner_t id) -+{ -+ int err; -+ -+ err = 0; -+ if (file->f_op->flush) { -+ if (!au_test_nfs(file->f_dentry->d_sb)) -+ err = file->f_op->flush(file, id); -+ else { -+ lockdep_off(); -+ err = file->f_op->flush(file, id); -+ lockdep_on(); -+ } -+ if (!err) -+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); -+ /*ignore*/ -+ } -+ return err; -+} -+ -+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx) -+{ -+ int err; -+ -+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos); -+ -+ lockdep_off(); -+ err = iterate_dir(file, ctx); -+ lockdep_on(); -+ if (err >= 0) -+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/ -+ return err; -+} -+ -+long vfsub_splice_to(struct file *in, loff_t *ppos, -+ struct pipe_inode_info *pipe, size_t len, -+ unsigned int flags) -+{ -+ long err; -+ -+ lockdep_off(); -+ err = do_splice_to(in, ppos, pipe, len, flags); -+ lockdep_on(); -+ file_accessed(in); -+ if (err >= 0) -+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/ -+ return err; -+} -+ -+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out, -+ loff_t *ppos, size_t len, unsigned int flags) -+{ -+ long err; -+ -+ lockdep_off(); -+ err = do_splice_from(pipe, out, ppos, len, flags); -+ lockdep_on(); -+ if (err >= 0) -+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/ -+ return err; -+} -+ -+int vfsub_fsync(struct file *file, struct path *path, int datasync) -+{ -+ int err; -+ -+ /* file can be NULL */ -+ lockdep_off(); -+ err = vfs_fsync(file, datasync); -+ lockdep_on(); -+ if (!err) { -+ if (!path) { -+ AuDebugOn(!file); -+ path = &file->f_path; -+ } -+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/ -+ } -+ return err; -+} -+ -+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */ -+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr, -+ struct file *h_file) -+{ -+ int err; -+ struct inode *h_inode; -+ struct super_block *h_sb; -+ -+ if (!h_file) { -+ err = vfsub_truncate(h_path, length); -+ goto out; -+ } -+ -+ h_inode = h_path->dentry->d_inode; -+ h_sb = h_inode->i_sb; -+ lockdep_off(); -+ sb_start_write(h_sb); -+ lockdep_on(); -+ err = locks_verify_truncate(h_inode, h_file, length); -+ if (!err) -+ err = security_path_truncate(h_path); -+ if (!err) { -+ lockdep_off(); -+ err = do_truncate(h_path->dentry, length, attr, h_file); -+ lockdep_on(); -+ } -+ lockdep_off(); -+ sb_end_write(h_sb); -+ lockdep_on(); -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_vfsub_mkdir_args { -+ int *errp; -+ struct inode *dir; -+ struct path *path; -+ int mode; -+}; -+ -+static void au_call_vfsub_mkdir(void *args) -+{ -+ struct au_vfsub_mkdir_args *a = args; -+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode); -+} -+ -+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode) -+{ -+ int err, do_sio, wkq_err; -+ -+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE); -+ if (!do_sio) -+ err = vfsub_mkdir(dir, path, mode); -+ else { -+ struct au_vfsub_mkdir_args args = { -+ .errp = &err, -+ .dir = dir, -+ .path = path, -+ .mode = mode -+ }; -+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } -+ -+ return err; -+} -+ -+struct au_vfsub_rmdir_args { -+ int *errp; -+ struct inode *dir; -+ struct path *path; -+}; -+ -+static void au_call_vfsub_rmdir(void *args) -+{ -+ struct au_vfsub_rmdir_args *a = args; -+ *a->errp = vfsub_rmdir(a->dir, a->path); -+} -+ -+int vfsub_sio_rmdir(struct inode *dir, struct path *path) -+{ -+ int err, do_sio, wkq_err; -+ -+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE); -+ if (!do_sio) -+ err = vfsub_rmdir(dir, path); -+ else { -+ struct au_vfsub_rmdir_args args = { -+ .errp = &err, -+ .dir = dir, -+ .path = path -+ }; -+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct notify_change_args { -+ int *errp; -+ struct path *path; -+ struct iattr *ia; -+ struct inode **delegated_inode; -+}; -+ -+static void call_notify_change(void *args) -+{ -+ struct notify_change_args *a = args; -+ struct inode *h_inode; -+ -+ h_inode = a->path->dentry->d_inode; -+ IMustLock(h_inode); -+ -+ *a->errp = -EPERM; -+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) { -+ *a->errp = notify_change(a->path->dentry, a->ia, -+ a->delegated_inode); -+ if (!*a->errp) -+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/ -+ } -+ AuTraceErr(*a->errp); -+} -+ -+int vfsub_notify_change(struct path *path, struct iattr *ia, -+ struct inode **delegated_inode) -+{ -+ int err; -+ struct notify_change_args args = { -+ .errp = &err, -+ .path = path, -+ .ia = ia, -+ .delegated_inode = delegated_inode -+ }; -+ -+ call_notify_change(&args); -+ -+ return err; -+} -+ -+int vfsub_sio_notify_change(struct path *path, struct iattr *ia, -+ struct inode **delegated_inode) -+{ -+ int err, wkq_err; -+ struct notify_change_args args = { -+ .errp = &err, -+ .path = path, -+ .ia = ia, -+ .delegated_inode = delegated_inode -+ }; -+ -+ wkq_err = au_wkq_wait(call_notify_change, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct unlink_args { -+ int *errp; -+ struct inode *dir; -+ struct path *path; -+ struct inode **delegated_inode; -+}; -+ -+static void call_unlink(void *args) -+{ -+ struct unlink_args *a = args; -+ struct dentry *d = a->path->dentry; -+ struct inode *h_inode; -+ const int stop_sillyrename = (au_test_nfs(d->d_sb) -+ && d_count(d) == 1); -+ -+ IMustLock(a->dir); -+ -+ a->path->dentry = d->d_parent; -+ *a->errp = security_path_unlink(a->path, d); -+ a->path->dentry = d; -+ if (unlikely(*a->errp)) -+ return; -+ -+ if (!stop_sillyrename) -+ dget(d); -+ h_inode = d->d_inode; -+ if (h_inode) -+ ihold(h_inode); -+ -+ lockdep_off(); -+ *a->errp = vfs_unlink(a->dir, d, a->delegated_inode); -+ lockdep_on(); -+ if (!*a->errp) { -+ struct path tmp = { -+ .dentry = d->d_parent, -+ .mnt = a->path->mnt -+ }; -+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/ -+ } -+ -+ if (!stop_sillyrename) -+ dput(d); -+ if (h_inode) -+ iput(h_inode); -+ -+ AuTraceErr(*a->errp); -+} -+ -+/* -+ * @dir: must be locked. -+ * @dentry: target dentry. -+ */ -+int vfsub_unlink(struct inode *dir, struct path *path, -+ struct inode **delegated_inode, int force) -+{ -+ int err; -+ struct unlink_args args = { -+ .errp = &err, -+ .dir = dir, -+ .path = path, -+ .delegated_inode = delegated_inode -+ }; -+ -+ if (!force) -+ call_unlink(&args); -+ else { -+ int wkq_err; -+ -+ wkq_err = au_wkq_wait(call_unlink, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } -+ -+ return err; -+} -diff --git fs/aufs/vfsub.h fs/aufs/vfsub.h -new file mode 100644 -index 0000000..599fb88 ---- /dev/null -+++ fs/aufs/vfsub.h -@@ -0,0 +1,271 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * sub-routines for VFS -+ */ -+ -+#ifndef __AUFS_VFSUB_H__ -+#define __AUFS_VFSUB_H__ -+ -+#ifdef __KERNEL__ -+ -+#include -+#include -+#include -+#include "debug.h" -+ -+/* copied from linux/fs/internal.h */ -+/* todo: BAD approach!! */ -+extern void __mnt_drop_write(struct vfsmount *); -+extern spinlock_t inode_sb_list_lock; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* lock subclass for lower inode */ -+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */ -+/* reduce? gave up. */ -+enum { -+ AuLsc_I_Begin = I_MUTEX_NONDIR2, /* 4 */ -+ AuLsc_I_PARENT, /* lower inode, parent first */ -+ AuLsc_I_PARENT2, /* copyup dirs */ -+ AuLsc_I_PARENT3, /* copyup wh */ -+ AuLsc_I_CHILD, -+ AuLsc_I_CHILD2, -+ AuLsc_I_End -+}; -+ -+/* to debug easier, do not make them inlined functions */ -+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx)) -+#define IMustLock(i) MtxMustLock(&(i)->i_mutex) -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline void vfsub_drop_nlink(struct inode *inode) -+{ -+ AuDebugOn(!inode->i_nlink); -+ drop_nlink(inode); -+} -+ -+static inline void vfsub_dead_dir(struct inode *inode) -+{ -+ AuDebugOn(!S_ISDIR(inode->i_mode)); -+ inode->i_flags |= S_DEAD; -+ clear_nlink(inode); -+} -+ -+static inline int vfsub_native_ro(struct inode *inode) -+{ -+ return (inode->i_sb->s_flags & MS_RDONLY) -+ || IS_RDONLY(inode) -+ /* || IS_APPEND(inode) */ -+ || IS_IMMUTABLE(inode); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int vfsub_update_h_iattr(struct path *h_path, int *did); -+struct file *vfsub_dentry_open(struct path *path, int flags); -+struct file *vfsub_filp_open(const char *path, int oflags, int mode); -+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path); -+ -+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent, -+ int len); -+ -+struct vfsub_lkup_one_args { -+ struct dentry **errp; -+ struct qstr *name; -+ struct dentry *parent; -+}; -+ -+static inline struct dentry *vfsub_lkup_one(struct qstr *name, -+ struct dentry *parent) -+{ -+ return vfsub_lookup_one_len(name->name, parent, name->len); -+} -+ -+void vfsub_call_lkup_one(void *args); -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline int vfsub_mnt_want_write(struct vfsmount *mnt) -+{ -+ int err; -+ -+ lockdep_off(); -+ err = mnt_want_write(mnt); -+ lockdep_on(); -+ return err; -+} -+ -+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt) -+{ -+ lockdep_off(); -+ mnt_drop_write(mnt); -+ lockdep_on(); -+} -+ -+static inline void vfsub_mnt_drop_write_file(struct file *file) -+{ -+ lockdep_off(); -+ mnt_drop_write_file(file); -+ lockdep_on(); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_hinode; -+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1, -+ struct dentry *d2, struct au_hinode *hdir2); -+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1, -+ struct dentry *d2, struct au_hinode *hdir2); -+ -+int vfsub_create(struct inode *dir, struct path *path, int mode, -+ bool want_excl); -+int vfsub_symlink(struct inode *dir, struct path *path, -+ const char *symname); -+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev); -+int vfsub_link(struct dentry *src_dentry, struct inode *dir, -+ struct path *path, struct inode **delegated_inode); -+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry, -+ struct inode *hdir, struct path *path, -+ struct inode **delegated_inode); -+int vfsub_mkdir(struct inode *dir, struct path *path, int mode); -+int vfsub_rmdir(struct inode *dir, struct path *path); -+ -+/* ---------------------------------------------------------------------- */ -+ -+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count, -+ loff_t *ppos); -+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count, -+ loff_t *ppos); -+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count, -+ loff_t *ppos); -+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, -+ loff_t *ppos); -+int vfsub_flush(struct file *file, fl_owner_t id); -+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx); -+ -+/* just for type-check */ -+static inline filldir_t au_diractor(int (*func)(struct dir_context *, -+ const char *, int, loff_t, u64, -+ unsigned)) -+{ -+ return (filldir_t)func; -+} -+ -+ -+static inline loff_t vfsub_f_size_read(struct file *file) -+{ -+ return i_size_read(file_inode(file)); -+} -+ -+static inline unsigned int vfsub_file_flags(struct file *file) -+{ -+ unsigned int flags; -+ -+ spin_lock(&file->f_lock); -+ flags = file->f_flags; -+ spin_unlock(&file->f_lock); -+ -+ return flags; -+} -+ -+static inline void vfsub_file_accessed(struct file *h_file) -+{ -+ file_accessed(h_file); -+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/ -+} -+ -+static inline void vfsub_touch_atime(struct vfsmount *h_mnt, -+ struct dentry *h_dentry) -+{ -+ struct path h_path = { -+ .dentry = h_dentry, -+ .mnt = h_mnt -+ }; -+ touch_atime(&h_path); -+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/ -+} -+ -+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts, -+ int flags) -+{ -+ return update_time(h_inode, ts, flags); -+ /* no vfsub_update_h_iattr() since we don't have struct path */ -+} -+ -+long vfsub_splice_to(struct file *in, loff_t *ppos, -+ struct pipe_inode_info *pipe, size_t len, -+ unsigned int flags); -+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out, -+ loff_t *ppos, size_t len, unsigned int flags); -+ -+static inline long vfsub_truncate(struct path *path, loff_t length) -+{ -+ long err; -+ -+ lockdep_off(); -+ err = vfs_truncate(path, length); -+ lockdep_on(); -+ return err; -+} -+ -+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr, -+ struct file *h_file); -+int vfsub_fsync(struct file *file, struct path *path, int datasync); -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin) -+{ -+ loff_t err; -+ -+ lockdep_off(); -+ err = vfs_llseek(file, offset, origin); -+ lockdep_on(); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* dirty workaround for strict type of fmode_t */ -+union vfsub_fmu { -+ fmode_t fm; -+ unsigned int ui; -+}; -+ -+static inline unsigned int vfsub_fmode_to_uint(fmode_t fm) -+{ -+ union vfsub_fmu u = { -+ .fm = fm -+ }; -+ -+ BUILD_BUG_ON(sizeof(u.fm) != sizeof(u.ui)); -+ -+ return u.ui; -+} -+ -+static inline fmode_t vfsub_uint_to_fmode(unsigned int ui) -+{ -+ union vfsub_fmu u = { -+ .ui = ui -+ }; -+ -+ return u.fm; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode); -+int vfsub_sio_rmdir(struct inode *dir, struct path *path); -+int vfsub_sio_notify_change(struct path *path, struct iattr *ia, -+ struct inode **delegated_inode); -+int vfsub_notify_change(struct path *path, struct iattr *ia, -+ struct inode **delegated_inode); -+int vfsub_unlink(struct inode *dir, struct path *path, -+ struct inode **delegated_inode, int force); -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_VFSUB_H__ */ -diff --git fs/aufs/wbr_policy.c fs/aufs/wbr_policy.c -new file mode 100644 -index 0000000..e3bb25a ---- /dev/null -+++ fs/aufs/wbr_policy.c -@@ -0,0 +1,751 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * policies for selecting one among multiple writable branches -+ */ -+ -+#include -+#include "aufs.h" -+ -+/* subset of cpup_attr() */ -+static noinline_for_stack -+int au_cpdown_attr(struct path *h_path, struct dentry *h_src) -+{ -+ int err, sbits; -+ struct iattr ia; -+ struct inode *h_isrc; -+ -+ h_isrc = h_src->d_inode; -+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID; -+ ia.ia_mode = h_isrc->i_mode; -+ ia.ia_uid = h_isrc->i_uid; -+ ia.ia_gid = h_isrc->i_gid; -+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID)); -+ au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc->i_flags); -+ /* no delegation since it is just created */ -+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL); -+ -+ /* is this nfs only? */ -+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) { -+ ia.ia_valid = ATTR_FORCE | ATTR_MODE; -+ ia.ia_mode = h_isrc->i_mode; -+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL); -+ } -+ -+ return err; -+} -+ -+#define AuCpdown_PARENT_OPQ 1 -+#define AuCpdown_WHED (1 << 1) -+#define AuCpdown_MADE_DIR (1 << 2) -+#define AuCpdown_DIROPQ (1 << 3) -+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name) -+#define au_fset_cpdown(flags, name) \ -+ do { (flags) |= AuCpdown_##name; } while (0) -+#define au_fclr_cpdown(flags, name) \ -+ do { (flags) &= ~AuCpdown_##name; } while (0) -+ -+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst, -+ unsigned int *flags) -+{ -+ int err; -+ struct dentry *opq_dentry; -+ -+ opq_dentry = au_diropq_create(dentry, bdst); -+ err = PTR_ERR(opq_dentry); -+ if (IS_ERR(opq_dentry)) -+ goto out; -+ dput(opq_dentry); -+ au_fset_cpdown(*flags, DIROPQ); -+ -+out: -+ return err; -+} -+ -+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent, -+ struct inode *dir, aufs_bindex_t bdst) -+{ -+ int err; -+ struct path h_path; -+ struct au_branch *br; -+ -+ br = au_sbr(dentry->d_sb, bdst); -+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br); -+ err = PTR_ERR(h_path.dentry); -+ if (IS_ERR(h_path.dentry)) -+ goto out; -+ -+ err = 0; -+ if (h_path.dentry->d_inode) { -+ h_path.mnt = au_br_mnt(br); -+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path, -+ dentry); -+ } -+ dput(h_path.dentry); -+ -+out: -+ return err; -+} -+ -+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst, -+ struct au_pin *pin, -+ struct dentry *h_parent, void *arg) -+{ -+ int err, rerr; -+ aufs_bindex_t bopq, bstart; -+ struct path h_path; -+ struct dentry *parent; -+ struct inode *h_dir, *h_inode, *inode, *dir; -+ unsigned int *flags = arg; -+ -+ bstart = au_dbstart(dentry); -+ /* dentry is di-locked */ -+ parent = dget_parent(dentry); -+ dir = parent->d_inode; -+ h_dir = h_parent->d_inode; -+ AuDebugOn(h_dir != au_h_iptr(dir, bdst)); -+ IMustLock(h_dir); -+ -+ err = au_lkup_neg(dentry, bdst, /*wh*/0); -+ if (unlikely(err < 0)) -+ goto out; -+ h_path.dentry = au_h_dptr(dentry, bdst); -+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst); -+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path, -+ S_IRWXU | S_IRUGO | S_IXUGO); -+ if (unlikely(err)) -+ goto out_put; -+ au_fset_cpdown(*flags, MADE_DIR); -+ -+ bopq = au_dbdiropq(dentry); -+ au_fclr_cpdown(*flags, WHED); -+ au_fclr_cpdown(*flags, DIROPQ); -+ if (au_dbwh(dentry) == bdst) -+ au_fset_cpdown(*flags, WHED); -+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst) -+ au_fset_cpdown(*flags, PARENT_OPQ); -+ h_inode = h_path.dentry->d_inode; -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ if (au_ftest_cpdown(*flags, WHED)) { -+ err = au_cpdown_dir_opq(dentry, bdst, flags); -+ if (unlikely(err)) { -+ mutex_unlock(&h_inode->i_mutex); -+ goto out_dir; -+ } -+ } -+ -+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart)); -+ mutex_unlock(&h_inode->i_mutex); -+ if (unlikely(err)) -+ goto out_opq; -+ -+ if (au_ftest_cpdown(*flags, WHED)) { -+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst); -+ if (unlikely(err)) -+ goto out_opq; -+ } -+ -+ inode = dentry->d_inode; -+ if (au_ibend(inode) < bdst) -+ au_set_ibend(inode, bdst); -+ au_set_h_iptr(inode, bdst, au_igrab(h_inode), -+ au_hi_flags(inode, /*isdir*/1)); -+ goto out; /* success */ -+ -+ /* revert */ -+out_opq: -+ if (au_ftest_cpdown(*flags, DIROPQ)) { -+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD); -+ rerr = au_diropq_remove(dentry, bdst); -+ mutex_unlock(&h_inode->i_mutex); -+ if (unlikely(rerr)) { -+ AuIOErr("failed removing diropq for %pd b%d (%d)\n", -+ dentry, bdst, rerr); -+ err = -EIO; -+ goto out; -+ } -+ } -+out_dir: -+ if (au_ftest_cpdown(*flags, MADE_DIR)) { -+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path); -+ if (unlikely(rerr)) { -+ AuIOErr("failed removing %pd b%d (%d)\n", -+ dentry, bdst, rerr); -+ err = -EIO; -+ } -+ } -+out_put: -+ au_set_h_dptr(dentry, bdst, NULL); -+ if (au_dbend(dentry) == bdst) -+ au_update_dbend(dentry); -+out: -+ dput(parent); -+ return err; -+} -+ -+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst) -+{ -+ int err; -+ unsigned int flags; -+ -+ flags = 0; -+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* policies for create */ -+ -+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ int err, i, j, ndentry; -+ aufs_bindex_t bopq; -+ struct au_dcsub_pages dpages; -+ struct au_dpage *dpage; -+ struct dentry **dentries, *parent, *d; -+ -+ err = au_dpages_init(&dpages, GFP_NOFS); -+ if (unlikely(err)) -+ goto out; -+ parent = dget_parent(dentry); -+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0); -+ if (unlikely(err)) -+ goto out_free; -+ -+ err = bindex; -+ for (i = 0; i < dpages.ndpage; i++) { -+ dpage = dpages.dpages + i; -+ dentries = dpage->dentries; -+ ndentry = dpage->ndentry; -+ for (j = 0; j < ndentry; j++) { -+ d = dentries[j]; -+ di_read_lock_parent2(d, !AuLock_IR); -+ bopq = au_dbdiropq(d); -+ di_read_unlock(d, !AuLock_IR); -+ if (bopq >= 0 && bopq < err) -+ err = bopq; -+ } -+ } -+ -+out_free: -+ dput(parent); -+ au_dpages_free(&dpages); -+out: -+ return err; -+} -+ -+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ for (; bindex >= 0; bindex--) -+ if (!au_br_rdonly(au_sbr(sb, bindex))) -+ return bindex; -+ return -EROFS; -+} -+ -+/* top down parent */ -+static int au_wbr_create_tdp(struct dentry *dentry, -+ unsigned int flags __maybe_unused) -+{ -+ int err; -+ aufs_bindex_t bstart, bindex; -+ struct super_block *sb; -+ struct dentry *parent, *h_parent; -+ -+ sb = dentry->d_sb; -+ bstart = au_dbstart(dentry); -+ err = bstart; -+ if (!au_br_rdonly(au_sbr(sb, bstart))) -+ goto out; -+ -+ err = -EROFS; -+ parent = dget_parent(dentry); -+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) { -+ h_parent = au_h_dptr(parent, bindex); -+ if (!h_parent || !h_parent->d_inode) -+ continue; -+ -+ if (!au_br_rdonly(au_sbr(sb, bindex))) { -+ err = bindex; -+ break; -+ } -+ } -+ dput(parent); -+ -+ /* bottom up here */ -+ if (unlikely(err < 0)) { -+ err = au_wbr_bu(sb, bstart - 1); -+ if (err >= 0) -+ err = au_wbr_nonopq(dentry, err); -+ } -+ -+out: -+ AuDbg("b%d\n", err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* an exception for the policy other than tdp */ -+static int au_wbr_create_exp(struct dentry *dentry) -+{ -+ int err; -+ aufs_bindex_t bwh, bdiropq; -+ struct dentry *parent; -+ -+ err = -1; -+ bwh = au_dbwh(dentry); -+ parent = dget_parent(dentry); -+ bdiropq = au_dbdiropq(parent); -+ if (bwh >= 0) { -+ if (bdiropq >= 0) -+ err = min(bdiropq, bwh); -+ else -+ err = bwh; -+ AuDbg("%d\n", err); -+ } else if (bdiropq >= 0) { -+ err = bdiropq; -+ AuDbg("%d\n", err); -+ } -+ dput(parent); -+ -+ if (err >= 0) -+ err = au_wbr_nonopq(dentry, err); -+ -+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err))) -+ err = -1; -+ -+ AuDbg("%d\n", err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* round robin */ -+static int au_wbr_create_init_rr(struct super_block *sb) -+{ -+ int err; -+ -+ err = au_wbr_bu(sb, au_sbend(sb)); -+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */ -+ /* smp_mb(); */ -+ -+ AuDbg("b%d\n", err); -+ return err; -+} -+ -+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags) -+{ -+ int err, nbr; -+ unsigned int u; -+ aufs_bindex_t bindex, bend; -+ struct super_block *sb; -+ atomic_t *next; -+ -+ err = au_wbr_create_exp(dentry); -+ if (err >= 0) -+ goto out; -+ -+ sb = dentry->d_sb; -+ next = &au_sbi(sb)->si_wbr_rr_next; -+ bend = au_sbend(sb); -+ nbr = bend + 1; -+ for (bindex = 0; bindex <= bend; bindex++) { -+ if (!au_ftest_wbr(flags, DIR)) { -+ err = atomic_dec_return(next) + 1; -+ /* modulo for 0 is meaningless */ -+ if (unlikely(!err)) -+ err = atomic_dec_return(next) + 1; -+ } else -+ err = atomic_read(next); -+ AuDbg("%d\n", err); -+ u = err; -+ err = u % nbr; -+ AuDbg("%d\n", err); -+ if (!au_br_rdonly(au_sbr(sb, err))) -+ break; -+ err = -EROFS; -+ } -+ -+ if (err >= 0) -+ err = au_wbr_nonopq(dentry, err); -+ -+out: -+ AuDbg("%d\n", err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* most free space */ -+static void au_mfs(struct dentry *dentry, struct dentry *parent) -+{ -+ struct super_block *sb; -+ struct au_branch *br; -+ struct au_wbr_mfs *mfs; -+ struct dentry *h_parent; -+ aufs_bindex_t bindex, bend; -+ int err; -+ unsigned long long b, bavail; -+ struct path h_path; -+ /* reduce the stack usage */ -+ struct kstatfs *st; -+ -+ st = kmalloc(sizeof(*st), GFP_NOFS); -+ if (unlikely(!st)) { -+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM); -+ return; -+ } -+ -+ bavail = 0; -+ sb = dentry->d_sb; -+ mfs = &au_sbi(sb)->si_wbr_mfs; -+ MtxMustLock(&mfs->mfs_lock); -+ mfs->mfs_bindex = -EROFS; -+ mfs->mfsrr_bytes = 0; -+ if (!parent) { -+ bindex = 0; -+ bend = au_sbend(sb); -+ } else { -+ bindex = au_dbstart(parent); -+ bend = au_dbtaildir(parent); -+ } -+ -+ for (; bindex <= bend; bindex++) { -+ if (parent) { -+ h_parent = au_h_dptr(parent, bindex); -+ if (!h_parent || !h_parent->d_inode) -+ continue; -+ } -+ br = au_sbr(sb, bindex); -+ if (au_br_rdonly(br)) -+ continue; -+ -+ /* sb->s_root for NFS is unreliable */ -+ h_path.mnt = au_br_mnt(br); -+ h_path.dentry = h_path.mnt->mnt_root; -+ err = vfs_statfs(&h_path, st); -+ if (unlikely(err)) { -+ AuWarn1("failed statfs, b%d, %d\n", bindex, err); -+ continue; -+ } -+ -+ /* when the available size is equal, select the lower one */ -+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail) -+ || sizeof(b) < sizeof(st->f_bsize)); -+ b = st->f_bavail * st->f_bsize; -+ br->br_wbr->wbr_bytes = b; -+ if (b >= bavail) { -+ bavail = b; -+ mfs->mfs_bindex = bindex; -+ mfs->mfs_jiffy = jiffies; -+ } -+ } -+ -+ mfs->mfsrr_bytes = bavail; -+ AuDbg("b%d\n", mfs->mfs_bindex); -+ kfree(st); -+} -+ -+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags) -+{ -+ int err; -+ struct dentry *parent; -+ struct super_block *sb; -+ struct au_wbr_mfs *mfs; -+ -+ err = au_wbr_create_exp(dentry); -+ if (err >= 0) -+ goto out; -+ -+ sb = dentry->d_sb; -+ parent = NULL; -+ if (au_ftest_wbr(flags, PARENT)) -+ parent = dget_parent(dentry); -+ mfs = &au_sbi(sb)->si_wbr_mfs; -+ mutex_lock(&mfs->mfs_lock); -+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire) -+ || mfs->mfs_bindex < 0 -+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex))) -+ au_mfs(dentry, parent); -+ mutex_unlock(&mfs->mfs_lock); -+ err = mfs->mfs_bindex; -+ dput(parent); -+ -+ if (err >= 0) -+ err = au_wbr_nonopq(dentry, err); -+ -+out: -+ AuDbg("b%d\n", err); -+ return err; -+} -+ -+static int au_wbr_create_init_mfs(struct super_block *sb) -+{ -+ struct au_wbr_mfs *mfs; -+ -+ mfs = &au_sbi(sb)->si_wbr_mfs; -+ mutex_init(&mfs->mfs_lock); -+ mfs->mfs_jiffy = 0; -+ mfs->mfs_bindex = -EROFS; -+ -+ return 0; -+} -+ -+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused) -+{ -+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock); -+ return 0; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* most free space and then round robin */ -+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags) -+{ -+ int err; -+ struct au_wbr_mfs *mfs; -+ -+ err = au_wbr_create_mfs(dentry, flags); -+ if (err >= 0) { -+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs; -+ mutex_lock(&mfs->mfs_lock); -+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark) -+ err = au_wbr_create_rr(dentry, flags); -+ mutex_unlock(&mfs->mfs_lock); -+ } -+ -+ AuDbg("b%d\n", err); -+ return err; -+} -+ -+static int au_wbr_create_init_mfsrr(struct super_block *sb) -+{ -+ int err; -+ -+ au_wbr_create_init_mfs(sb); /* ignore */ -+ err = au_wbr_create_init_rr(sb); -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* top down parent and most free space */ -+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags) -+{ -+ int err, e2; -+ unsigned long long b; -+ aufs_bindex_t bindex, bstart, bend; -+ struct super_block *sb; -+ struct dentry *parent, *h_parent; -+ struct au_branch *br; -+ -+ err = au_wbr_create_tdp(dentry, flags); -+ if (unlikely(err < 0)) -+ goto out; -+ parent = dget_parent(dentry); -+ bstart = au_dbstart(parent); -+ bend = au_dbtaildir(parent); -+ if (bstart == bend) -+ goto out_parent; /* success */ -+ -+ e2 = au_wbr_create_mfs(dentry, flags); -+ if (e2 < 0) -+ goto out_parent; /* success */ -+ -+ /* when the available size is equal, select upper one */ -+ sb = dentry->d_sb; -+ br = au_sbr(sb, err); -+ b = br->br_wbr->wbr_bytes; -+ AuDbg("b%d, %llu\n", err, b); -+ -+ for (bindex = bstart; bindex <= bend; bindex++) { -+ h_parent = au_h_dptr(parent, bindex); -+ if (!h_parent || !h_parent->d_inode) -+ continue; -+ -+ br = au_sbr(sb, bindex); -+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) { -+ b = br->br_wbr->wbr_bytes; -+ err = bindex; -+ AuDbg("b%d, %llu\n", err, b); -+ } -+ } -+ -+ if (err >= 0) -+ err = au_wbr_nonopq(dentry, err); -+ -+out_parent: -+ dput(parent); -+out: -+ AuDbg("b%d\n", err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * - top down parent -+ * - most free space with parent -+ * - most free space round-robin regardless parent -+ */ -+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags) -+{ -+ int err; -+ unsigned long long watermark; -+ struct super_block *sb; -+ struct au_branch *br; -+ struct au_wbr_mfs *mfs; -+ -+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT); -+ if (unlikely(err < 0)) -+ goto out; -+ -+ sb = dentry->d_sb; -+ br = au_sbr(sb, err); -+ mfs = &au_sbi(sb)->si_wbr_mfs; -+ mutex_lock(&mfs->mfs_lock); -+ watermark = mfs->mfsrr_watermark; -+ mutex_unlock(&mfs->mfs_lock); -+ if (br->br_wbr->wbr_bytes < watermark) -+ /* regardless the parent dir */ -+ err = au_wbr_create_mfsrr(dentry, flags); -+ -+out: -+ AuDbg("b%d\n", err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* policies for copyup */ -+ -+/* top down parent */ -+static int au_wbr_copyup_tdp(struct dentry *dentry) -+{ -+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0); -+} -+ -+/* bottom up parent */ -+static int au_wbr_copyup_bup(struct dentry *dentry) -+{ -+ int err; -+ aufs_bindex_t bindex, bstart; -+ struct dentry *parent, *h_parent; -+ struct super_block *sb; -+ -+ err = -EROFS; -+ sb = dentry->d_sb; -+ parent = dget_parent(dentry); -+ bstart = au_dbstart(parent); -+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) { -+ h_parent = au_h_dptr(parent, bindex); -+ if (!h_parent || !h_parent->d_inode) -+ continue; -+ -+ if (!au_br_rdonly(au_sbr(sb, bindex))) { -+ err = bindex; -+ break; -+ } -+ } -+ dput(parent); -+ -+ /* bottom up here */ -+ if (unlikely(err < 0)) -+ err = au_wbr_bu(sb, bstart - 1); -+ -+ AuDbg("b%d\n", err); -+ return err; -+} -+ -+/* bottom up */ -+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart) -+{ -+ int err; -+ -+ err = au_wbr_bu(dentry->d_sb, bstart); -+ AuDbg("b%d\n", err); -+ if (err > bstart) -+ err = au_wbr_nonopq(dentry, err); -+ -+ AuDbg("b%d\n", err); -+ return err; -+} -+ -+static int au_wbr_copyup_bu(struct dentry *dentry) -+{ -+ int err; -+ aufs_bindex_t bstart; -+ -+ bstart = au_dbstart(dentry); -+ err = au_wbr_do_copyup_bu(dentry, bstart); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = { -+ [AuWbrCopyup_TDP] = { -+ .copyup = au_wbr_copyup_tdp -+ }, -+ [AuWbrCopyup_BUP] = { -+ .copyup = au_wbr_copyup_bup -+ }, -+ [AuWbrCopyup_BU] = { -+ .copyup = au_wbr_copyup_bu -+ } -+}; -+ -+struct au_wbr_create_operations au_wbr_create_ops[] = { -+ [AuWbrCreate_TDP] = { -+ .create = au_wbr_create_tdp -+ }, -+ [AuWbrCreate_RR] = { -+ .create = au_wbr_create_rr, -+ .init = au_wbr_create_init_rr -+ }, -+ [AuWbrCreate_MFS] = { -+ .create = au_wbr_create_mfs, -+ .init = au_wbr_create_init_mfs, -+ .fin = au_wbr_create_fin_mfs -+ }, -+ [AuWbrCreate_MFSV] = { -+ .create = au_wbr_create_mfs, -+ .init = au_wbr_create_init_mfs, -+ .fin = au_wbr_create_fin_mfs -+ }, -+ [AuWbrCreate_MFSRR] = { -+ .create = au_wbr_create_mfsrr, -+ .init = au_wbr_create_init_mfsrr, -+ .fin = au_wbr_create_fin_mfs -+ }, -+ [AuWbrCreate_MFSRRV] = { -+ .create = au_wbr_create_mfsrr, -+ .init = au_wbr_create_init_mfsrr, -+ .fin = au_wbr_create_fin_mfs -+ }, -+ [AuWbrCreate_PMFS] = { -+ .create = au_wbr_create_pmfs, -+ .init = au_wbr_create_init_mfs, -+ .fin = au_wbr_create_fin_mfs -+ }, -+ [AuWbrCreate_PMFSV] = { -+ .create = au_wbr_create_pmfs, -+ .init = au_wbr_create_init_mfs, -+ .fin = au_wbr_create_fin_mfs -+ }, -+ [AuWbrCreate_PMFSRR] = { -+ .create = au_wbr_create_pmfsrr, -+ .init = au_wbr_create_init_mfsrr, -+ .fin = au_wbr_create_fin_mfs -+ }, -+ [AuWbrCreate_PMFSRRV] = { -+ .create = au_wbr_create_pmfsrr, -+ .init = au_wbr_create_init_mfsrr, -+ .fin = au_wbr_create_fin_mfs -+ } -+}; -diff --git fs/aufs/whout.c fs/aufs/whout.c -new file mode 100644 -index 0000000..3aa2de0 ---- /dev/null -+++ fs/aufs/whout.c -@@ -0,0 +1,1038 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * whiteout for logical deletion and opaque directory -+ */ -+ -+#include "aufs.h" -+ -+#define WH_MASK S_IRUGO -+ -+/* -+ * If a directory contains this file, then it is opaque. We start with the -+ * .wh. flag so that it is blocked by lookup. -+ */ -+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ, -+ sizeof(AUFS_WH_DIROPQ) - 1); -+ -+/* -+ * generate whiteout name, which is NOT terminated by NULL. -+ * @name: original d_name.name -+ * @len: original d_name.len -+ * @wh: whiteout qstr -+ * returns zero when succeeds, otherwise error. -+ * succeeded value as wh->name should be freed by kfree(). -+ */ -+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name) -+{ -+ char *p; -+ -+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN)) -+ return -ENAMETOOLONG; -+ -+ wh->len = name->len + AUFS_WH_PFX_LEN; -+ p = kmalloc(wh->len, GFP_NOFS); -+ wh->name = p; -+ if (p) { -+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN); -+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len); -+ /* smp_mb(); */ -+ return 0; -+ } -+ return -ENOMEM; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * test if the @wh_name exists under @h_parent. -+ * @try_sio specifies the necessary of super-io. -+ */ -+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio) -+{ -+ int err; -+ struct dentry *wh_dentry; -+ -+ if (!try_sio) -+ wh_dentry = vfsub_lkup_one(wh_name, h_parent); -+ else -+ wh_dentry = au_sio_lkup_one(wh_name, h_parent); -+ err = PTR_ERR(wh_dentry); -+ if (IS_ERR(wh_dentry)) -+ goto out; -+ -+ err = 0; -+ if (!wh_dentry->d_inode) -+ goto out_wh; /* success */ -+ -+ err = 1; -+ if (S_ISREG(wh_dentry->d_inode->i_mode)) -+ goto out_wh; /* success */ -+ -+ err = -EIO; -+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n", -+ wh_dentry, wh_dentry->d_inode->i_mode); -+ -+out_wh: -+ dput(wh_dentry); -+out: -+ return err; -+} -+ -+/* -+ * test if the @h_dentry sets opaque or not. -+ */ -+int au_diropq_test(struct dentry *h_dentry) -+{ -+ int err; -+ struct inode *h_dir; -+ -+ h_dir = h_dentry->d_inode; -+ err = au_wh_test(h_dentry, &diropq_name, -+ au_test_h_perm_sio(h_dir, MAY_EXEC)); -+ return err; -+} -+ -+/* -+ * returns a negative dentry whose name is unique and temporary. -+ */ -+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br, -+ struct qstr *prefix) -+{ -+ struct dentry *dentry; -+ int i; -+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1], -+ *name, *p; -+ /* strict atomic_t is unnecessary here */ -+ static unsigned short cnt; -+ struct qstr qs; -+ -+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN); -+ -+ name = defname; -+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1; -+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) { -+ dentry = ERR_PTR(-ENAMETOOLONG); -+ if (unlikely(qs.len > NAME_MAX)) -+ goto out; -+ dentry = ERR_PTR(-ENOMEM); -+ name = kmalloc(qs.len + 1, GFP_NOFS); -+ if (unlikely(!name)) -+ goto out; -+ } -+ -+ /* doubly whiteout-ed */ -+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2); -+ p = name + AUFS_WH_PFX_LEN * 2; -+ memcpy(p, prefix->name, prefix->len); -+ p += prefix->len; -+ *p++ = '.'; -+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN); -+ -+ qs.name = name; -+ for (i = 0; i < 3; i++) { -+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++); -+ dentry = au_sio_lkup_one(&qs, h_parent); -+ if (IS_ERR(dentry) || !dentry->d_inode) -+ goto out_name; -+ dput(dentry); -+ } -+ /* pr_warn("could not get random name\n"); */ -+ dentry = ERR_PTR(-EEXIST); -+ AuDbg("%.*s\n", AuLNPair(&qs)); -+ BUG(); -+ -+out_name: -+ if (name != defname) -+ kfree(name); -+out: -+ AuTraceErrPtr(dentry); -+ return dentry; -+} -+ -+/* -+ * rename the @h_dentry on @br to the whiteouted temporary name. -+ */ -+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br) -+{ -+ int err; -+ struct path h_path = { -+ .mnt = au_br_mnt(br) -+ }; -+ struct inode *h_dir, *delegated; -+ struct dentry *h_parent; -+ -+ h_parent = h_dentry->d_parent; /* dir inode is locked */ -+ h_dir = h_parent->d_inode; -+ IMustLock(h_dir); -+ -+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name); -+ err = PTR_ERR(h_path.dentry); -+ if (IS_ERR(h_path.dentry)) -+ goto out; -+ -+ /* under the same dir, no need to lock_rename() */ -+ delegated = NULL; -+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated); -+ AuTraceErr(err); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal rename\n"); -+ iput(delegated); -+ } -+ dput(h_path.dentry); -+ -+out: -+ AuTraceErr(err); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+/* -+ * functions for removing a whiteout -+ */ -+ -+static int do_unlink_wh(struct inode *h_dir, struct path *h_path) -+{ -+ int err, force; -+ struct inode *delegated; -+ -+ /* -+ * forces superio when the dir has a sticky bit. -+ * this may be a violation of unix fs semantics. -+ */ -+ force = (h_dir->i_mode & S_ISVTX) -+ && !uid_eq(current_fsuid(), h_path->dentry->d_inode->i_uid); -+ delegated = NULL; -+ err = vfsub_unlink(h_dir, h_path, &delegated, force); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal unlink\n"); -+ iput(delegated); -+ } -+ return err; -+} -+ -+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path, -+ struct dentry *dentry) -+{ -+ int err; -+ -+ err = do_unlink_wh(h_dir, h_path); -+ if (!err && dentry) -+ au_set_dbwh(dentry, -1); -+ -+ return err; -+} -+ -+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh, -+ struct au_branch *br) -+{ -+ int err; -+ struct path h_path = { -+ .mnt = au_br_mnt(br) -+ }; -+ -+ err = 0; -+ h_path.dentry = vfsub_lkup_one(wh, h_parent); -+ if (IS_ERR(h_path.dentry)) -+ err = PTR_ERR(h_path.dentry); -+ else { -+ if (h_path.dentry->d_inode -+ && S_ISREG(h_path.dentry->d_inode->i_mode)) -+ err = do_unlink_wh(h_parent->d_inode, &h_path); -+ dput(h_path.dentry); -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+/* -+ * initialize/clean whiteout for a branch -+ */ -+ -+static void au_wh_clean(struct inode *h_dir, struct path *whpath, -+ const int isdir) -+{ -+ int err; -+ struct inode *delegated; -+ -+ if (!whpath->dentry->d_inode) -+ return; -+ -+ if (isdir) -+ err = vfsub_rmdir(h_dir, whpath); -+ else { -+ delegated = NULL; -+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal unlink\n"); -+ iput(delegated); -+ } -+ } -+ if (unlikely(err)) -+ pr_warn("failed removing %pd (%d), ignored.\n", -+ whpath->dentry, err); -+} -+ -+static int test_linkable(struct dentry *h_root) -+{ -+ struct inode *h_dir = h_root->d_inode; -+ -+ if (h_dir->i_op->link) -+ return 0; -+ -+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n", -+ h_root, au_sbtype(h_root->d_sb)); -+ return -ENOSYS; -+} -+ -+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */ -+static int au_whdir(struct inode *h_dir, struct path *path) -+{ -+ int err; -+ -+ err = -EEXIST; -+ if (!path->dentry->d_inode) { -+ int mode = S_IRWXU; -+ -+ if (au_test_nfs(path->dentry->d_sb)) -+ mode |= S_IXUGO; -+ err = vfsub_mkdir(h_dir, path, mode); -+ } else if (S_ISDIR(path->dentry->d_inode->i_mode)) -+ err = 0; -+ else -+ pr_err("unknown %pd exists\n", path->dentry); -+ -+ return err; -+} -+ -+struct au_wh_base { -+ const struct qstr *name; -+ struct dentry *dentry; -+}; -+ -+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[], -+ struct path *h_path) -+{ -+ h_path->dentry = base[AuBrWh_BASE].dentry; -+ au_wh_clean(h_dir, h_path, /*isdir*/0); -+ h_path->dentry = base[AuBrWh_PLINK].dentry; -+ au_wh_clean(h_dir, h_path, /*isdir*/1); -+ h_path->dentry = base[AuBrWh_ORPH].dentry; -+ au_wh_clean(h_dir, h_path, /*isdir*/1); -+} -+ -+/* -+ * returns tri-state, -+ * minus: error, caller should print the mesage -+ * zero: succuess -+ * plus: error, caller should NOT print the mesage -+ */ -+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr, -+ int do_plink, struct au_wh_base base[], -+ struct path *h_path) -+{ -+ int err; -+ struct inode *h_dir; -+ -+ h_dir = h_root->d_inode; -+ h_path->dentry = base[AuBrWh_BASE].dentry; -+ au_wh_clean(h_dir, h_path, /*isdir*/0); -+ h_path->dentry = base[AuBrWh_PLINK].dentry; -+ if (do_plink) { -+ err = test_linkable(h_root); -+ if (unlikely(err)) { -+ err = 1; -+ goto out; -+ } -+ -+ err = au_whdir(h_dir, h_path); -+ if (unlikely(err)) -+ goto out; -+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry); -+ } else -+ au_wh_clean(h_dir, h_path, /*isdir*/1); -+ h_path->dentry = base[AuBrWh_ORPH].dentry; -+ err = au_whdir(h_dir, h_path); -+ if (unlikely(err)) -+ goto out; -+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry); -+ -+out: -+ return err; -+} -+ -+/* -+ * for the moment, aufs supports the branch filesystem which does not support -+ * link(2). testing on FAT which does not support i_op->setattr() fully either, -+ * copyup failed. finally, such filesystem will not be used as the writable -+ * branch. -+ * -+ * returns tri-state, see above. -+ */ -+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr, -+ int do_plink, struct au_wh_base base[], -+ struct path *h_path) -+{ -+ int err; -+ struct inode *h_dir; -+ -+ WbrWhMustWriteLock(wbr); -+ -+ err = test_linkable(h_root); -+ if (unlikely(err)) { -+ err = 1; -+ goto out; -+ } -+ -+ /* -+ * todo: should this create be done in /sbin/mount.aufs helper? -+ */ -+ err = -EEXIST; -+ h_dir = h_root->d_inode; -+ if (!base[AuBrWh_BASE].dentry->d_inode) { -+ h_path->dentry = base[AuBrWh_BASE].dentry; -+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true); -+ } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode)) -+ err = 0; -+ else -+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry); -+ if (unlikely(err)) -+ goto out; -+ -+ h_path->dentry = base[AuBrWh_PLINK].dentry; -+ if (do_plink) { -+ err = au_whdir(h_dir, h_path); -+ if (unlikely(err)) -+ goto out; -+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry); -+ } else -+ au_wh_clean(h_dir, h_path, /*isdir*/1); -+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry); -+ -+ h_path->dentry = base[AuBrWh_ORPH].dentry; -+ err = au_whdir(h_dir, h_path); -+ if (unlikely(err)) -+ goto out; -+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry); -+ -+out: -+ return err; -+} -+ -+/* -+ * initialize the whiteout base file/dir for @br. -+ */ -+int au_wh_init(struct au_branch *br, struct super_block *sb) -+{ -+ int err, i; -+ const unsigned char do_plink -+ = !!au_opt_test(au_mntflags(sb), PLINK); -+ struct inode *h_dir; -+ struct path path = br->br_path; -+ struct dentry *h_root = path.dentry; -+ struct au_wbr *wbr = br->br_wbr; -+ static const struct qstr base_name[] = { -+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME, -+ sizeof(AUFS_BASE_NAME) - 1), -+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME, -+ sizeof(AUFS_PLINKDIR_NAME) - 1), -+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME, -+ sizeof(AUFS_ORPHDIR_NAME) - 1) -+ }; -+ struct au_wh_base base[] = { -+ [AuBrWh_BASE] = { -+ .name = base_name + AuBrWh_BASE, -+ .dentry = NULL -+ }, -+ [AuBrWh_PLINK] = { -+ .name = base_name + AuBrWh_PLINK, -+ .dentry = NULL -+ }, -+ [AuBrWh_ORPH] = { -+ .name = base_name + AuBrWh_ORPH, -+ .dentry = NULL -+ } -+ }; -+ -+ if (wbr) -+ WbrWhMustWriteLock(wbr); -+ -+ for (i = 0; i < AuBrWh_Last; i++) { -+ /* doubly whiteouted */ -+ struct dentry *d; -+ -+ d = au_wh_lkup(h_root, (void *)base[i].name, br); -+ err = PTR_ERR(d); -+ if (IS_ERR(d)) -+ goto out; -+ -+ base[i].dentry = d; -+ AuDebugOn(wbr -+ && wbr->wbr_wh[i] -+ && wbr->wbr_wh[i] != base[i].dentry); -+ } -+ -+ if (wbr) -+ for (i = 0; i < AuBrWh_Last; i++) { -+ dput(wbr->wbr_wh[i]); -+ wbr->wbr_wh[i] = NULL; -+ } -+ -+ err = 0; -+ if (!au_br_writable(br->br_perm)) { -+ h_dir = h_root->d_inode; -+ au_wh_init_ro(h_dir, base, &path); -+ } else if (!au_br_wh_linkable(br->br_perm)) { -+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path); -+ if (err > 0) -+ goto out; -+ else if (err) -+ goto out_err; -+ } else { -+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path); -+ if (err > 0) -+ goto out; -+ else if (err) -+ goto out_err; -+ } -+ goto out; /* success */ -+ -+out_err: -+ pr_err("an error(%d) on the writable branch %pd(%s)\n", -+ err, h_root, au_sbtype(h_root->d_sb)); -+out: -+ for (i = 0; i < AuBrWh_Last; i++) -+ dput(base[i].dentry); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+/* -+ * whiteouts are all hard-linked usually. -+ * when its link count reaches a ceiling, we create a new whiteout base -+ * asynchronously. -+ */ -+ -+struct reinit_br_wh { -+ struct super_block *sb; -+ struct au_branch *br; -+}; -+ -+static void reinit_br_wh(void *arg) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ struct path h_path; -+ struct reinit_br_wh *a = arg; -+ struct au_wbr *wbr; -+ struct inode *dir, *delegated; -+ struct dentry *h_root; -+ struct au_hinode *hdir; -+ -+ err = 0; -+ wbr = a->br->br_wbr; -+ /* big aufs lock */ -+ si_noflush_write_lock(a->sb); -+ if (!au_br_writable(a->br->br_perm)) -+ goto out; -+ bindex = au_br_index(a->sb, a->br->br_id); -+ if (unlikely(bindex < 0)) -+ goto out; -+ -+ di_read_lock_parent(a->sb->s_root, AuLock_IR); -+ dir = a->sb->s_root->d_inode; -+ hdir = au_hi(dir, bindex); -+ h_root = au_h_dptr(a->sb->s_root, bindex); -+ AuDebugOn(h_root != au_br_dentry(a->br)); -+ -+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT); -+ wbr_wh_write_lock(wbr); -+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode, -+ h_root, a->br); -+ if (!err) { -+ h_path.dentry = wbr->wbr_whbase; -+ h_path.mnt = au_br_mnt(a->br); -+ delegated = NULL; -+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, -+ /*force*/0); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal unlink\n"); -+ iput(delegated); -+ } -+ } else { -+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase); -+ err = 0; -+ } -+ dput(wbr->wbr_whbase); -+ wbr->wbr_whbase = NULL; -+ if (!err) -+ err = au_wh_init(a->br, a->sb); -+ wbr_wh_write_unlock(wbr); -+ au_hn_imtx_unlock(hdir); -+ di_read_unlock(a->sb->s_root, AuLock_IR); -+ -+out: -+ if (wbr) -+ atomic_dec(&wbr->wbr_wh_running); -+ atomic_dec(&a->br->br_count); -+ si_write_unlock(a->sb); -+ au_nwt_done(&au_sbi(a->sb)->si_nowait); -+ kfree(arg); -+ if (unlikely(err)) -+ AuIOErr("err %d\n", err); -+} -+ -+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br) -+{ -+ int do_dec, wkq_err; -+ struct reinit_br_wh *arg; -+ -+ do_dec = 1; -+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1) -+ goto out; -+ -+ /* ignore ENOMEM */ -+ arg = kmalloc(sizeof(*arg), GFP_NOFS); -+ if (arg) { -+ /* -+ * dec(wh_running), kfree(arg) and dec(br_count) -+ * in reinit function -+ */ -+ arg->sb = sb; -+ arg->br = br; -+ atomic_inc(&br->br_count); -+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0); -+ if (unlikely(wkq_err)) { -+ atomic_dec(&br->br_wbr->wbr_wh_running); -+ atomic_dec(&br->br_count); -+ kfree(arg); -+ } -+ do_dec = 0; -+ } -+ -+out: -+ if (do_dec) -+ atomic_dec(&br->br_wbr->wbr_wh_running); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * create the whiteout @wh. -+ */ -+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex, -+ struct dentry *wh) -+{ -+ int err; -+ struct path h_path = { -+ .dentry = wh -+ }; -+ struct au_branch *br; -+ struct au_wbr *wbr; -+ struct dentry *h_parent; -+ struct inode *h_dir, *delegated; -+ -+ h_parent = wh->d_parent; /* dir inode is locked */ -+ h_dir = h_parent->d_inode; -+ IMustLock(h_dir); -+ -+ br = au_sbr(sb, bindex); -+ h_path.mnt = au_br_mnt(br); -+ wbr = br->br_wbr; -+ wbr_wh_read_lock(wbr); -+ if (wbr->wbr_whbase) { -+ delegated = NULL; -+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal link\n"); -+ iput(delegated); -+ } -+ if (!err || err != -EMLINK) -+ goto out; -+ -+ /* link count full. re-initialize br_whbase. */ -+ kick_reinit_br_wh(sb, br); -+ } -+ -+ /* return this error in this context */ -+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true); -+ -+out: -+ wbr_wh_read_unlock(wbr); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * create or remove the diropq. -+ */ -+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int flags) -+{ -+ struct dentry *opq_dentry, *h_dentry; -+ struct super_block *sb; -+ struct au_branch *br; -+ int err; -+ -+ sb = dentry->d_sb; -+ br = au_sbr(sb, bindex); -+ h_dentry = au_h_dptr(dentry, bindex); -+ opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry); -+ if (IS_ERR(opq_dentry)) -+ goto out; -+ -+ if (au_ftest_diropq(flags, CREATE)) { -+ err = link_or_create_wh(sb, bindex, opq_dentry); -+ if (!err) { -+ au_set_dbdiropq(dentry, bindex); -+ goto out; /* success */ -+ } -+ } else { -+ struct path tmp = { -+ .dentry = opq_dentry, -+ .mnt = au_br_mnt(br) -+ }; -+ err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp); -+ if (!err) -+ au_set_dbdiropq(dentry, -1); -+ } -+ dput(opq_dentry); -+ opq_dentry = ERR_PTR(err); -+ -+out: -+ return opq_dentry; -+} -+ -+struct do_diropq_args { -+ struct dentry **errp; -+ struct dentry *dentry; -+ aufs_bindex_t bindex; -+ unsigned int flags; -+}; -+ -+static void call_do_diropq(void *args) -+{ -+ struct do_diropq_args *a = args; -+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags); -+} -+ -+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int flags) -+{ -+ struct dentry *diropq, *h_dentry; -+ -+ h_dentry = au_h_dptr(dentry, bindex); -+ if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE)) -+ diropq = do_diropq(dentry, bindex, flags); -+ else { -+ int wkq_err; -+ struct do_diropq_args args = { -+ .errp = &diropq, -+ .dentry = dentry, -+ .bindex = bindex, -+ .flags = flags -+ }; -+ -+ wkq_err = au_wkq_wait(call_do_diropq, &args); -+ if (unlikely(wkq_err)) -+ diropq = ERR_PTR(wkq_err); -+ } -+ -+ return diropq; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * lookup whiteout dentry. -+ * @h_parent: lower parent dentry which must exist and be locked -+ * @base_name: name of dentry which will be whiteouted -+ * returns dentry for whiteout. -+ */ -+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name, -+ struct au_branch *br) -+{ -+ int err; -+ struct qstr wh_name; -+ struct dentry *wh_dentry; -+ -+ err = au_wh_name_alloc(&wh_name, base_name); -+ wh_dentry = ERR_PTR(err); -+ if (!err) { -+ wh_dentry = vfsub_lkup_one(&wh_name, h_parent); -+ kfree(wh_name.name); -+ } -+ return wh_dentry; -+} -+ -+/* -+ * link/create a whiteout for @dentry on @bindex. -+ */ -+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent) -+{ -+ struct dentry *wh_dentry; -+ struct super_block *sb; -+ int err; -+ -+ sb = dentry->d_sb; -+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex)); -+ if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) { -+ err = link_or_create_wh(sb, bindex, wh_dentry); -+ if (!err) -+ au_set_dbwh(dentry, bindex); -+ else { -+ dput(wh_dentry); -+ wh_dentry = ERR_PTR(err); -+ } -+ } -+ -+ return wh_dentry; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* Delete all whiteouts in this directory on branch bindex. */ -+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist, -+ aufs_bindex_t bindex, struct au_branch *br) -+{ -+ int err; -+ unsigned long ul, n; -+ struct qstr wh_name; -+ char *p; -+ struct hlist_head *head; -+ struct au_vdir_wh *pos; -+ struct au_vdir_destr *str; -+ -+ err = -ENOMEM; -+ p = (void *)__get_free_page(GFP_NOFS); -+ wh_name.name = p; -+ if (unlikely(!wh_name.name)) -+ goto out; -+ -+ err = 0; -+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN); -+ p += AUFS_WH_PFX_LEN; -+ n = whlist->nh_num; -+ head = whlist->nh_head; -+ for (ul = 0; !err && ul < n; ul++, head++) { -+ hlist_for_each_entry(pos, head, wh_hash) { -+ if (pos->wh_bindex != bindex) -+ continue; -+ -+ str = &pos->wh_str; -+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) { -+ memcpy(p, str->name, str->len); -+ wh_name.len = AUFS_WH_PFX_LEN + str->len; -+ err = unlink_wh_name(h_dentry, &wh_name, br); -+ if (!err) -+ continue; -+ break; -+ } -+ AuIOErr("whiteout name too long %.*s\n", -+ str->len, str->name); -+ err = -EIO; -+ break; -+ } -+ } -+ free_page((unsigned long)wh_name.name); -+ -+out: -+ return err; -+} -+ -+struct del_wh_children_args { -+ int *errp; -+ struct dentry *h_dentry; -+ struct au_nhash *whlist; -+ aufs_bindex_t bindex; -+ struct au_branch *br; -+}; -+ -+static void call_del_wh_children(void *args) -+{ -+ struct del_wh_children_args *a = args; -+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp) -+{ -+ struct au_whtmp_rmdir *whtmp; -+ int err; -+ unsigned int rdhash; -+ -+ SiMustAnyLock(sb); -+ -+ whtmp = kmalloc(sizeof(*whtmp), gfp); -+ if (unlikely(!whtmp)) { -+ whtmp = ERR_PTR(-ENOMEM); -+ goto out; -+ } -+ -+ whtmp->dir = NULL; -+ whtmp->br = NULL; -+ whtmp->wh_dentry = NULL; -+ /* no estimation for dir size */ -+ rdhash = au_sbi(sb)->si_rdhash; -+ if (!rdhash) -+ rdhash = AUFS_RDHASH_DEF; -+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp); -+ if (unlikely(err)) { -+ kfree(whtmp); -+ whtmp = ERR_PTR(err); -+ } -+ -+out: -+ return whtmp; -+} -+ -+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp) -+{ -+ if (whtmp->br) -+ atomic_dec(&whtmp->br->br_count); -+ dput(whtmp->wh_dentry); -+ iput(whtmp->dir); -+ au_nhash_wh_free(&whtmp->whlist); -+ kfree(whtmp); -+} -+ -+/* -+ * rmdir the whiteouted temporary named dir @h_dentry. -+ * @whlist: whiteouted children. -+ */ -+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex, -+ struct dentry *wh_dentry, struct au_nhash *whlist) -+{ -+ int err; -+ struct path h_tmp; -+ struct inode *wh_inode, *h_dir; -+ struct au_branch *br; -+ -+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */ -+ IMustLock(h_dir); -+ -+ br = au_sbr(dir->i_sb, bindex); -+ wh_inode = wh_dentry->d_inode; -+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD); -+ -+ /* -+ * someone else might change some whiteouts while we were sleeping. -+ * it means this whlist may have an obsoleted entry. -+ */ -+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE)) -+ err = del_wh_children(wh_dentry, whlist, bindex, br); -+ else { -+ int wkq_err; -+ struct del_wh_children_args args = { -+ .errp = &err, -+ .h_dentry = wh_dentry, -+ .whlist = whlist, -+ .bindex = bindex, -+ .br = br -+ }; -+ -+ wkq_err = au_wkq_wait(call_del_wh_children, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } -+ mutex_unlock(&wh_inode->i_mutex); -+ -+ if (!err) { -+ h_tmp.dentry = wh_dentry; -+ h_tmp.mnt = au_br_mnt(br); -+ err = vfsub_rmdir(h_dir, &h_tmp); -+ } -+ -+ if (!err) { -+ if (au_ibstart(dir) == bindex) { -+ /* todo: dir->i_mutex is necessary */ -+ au_cpup_attr_timesizes(dir); -+ vfsub_drop_nlink(dir); -+ } -+ return 0; /* success */ -+ } -+ -+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err); -+ return err; -+} -+ -+static void call_rmdir_whtmp(void *args) -+{ -+ int err; -+ aufs_bindex_t bindex; -+ struct au_whtmp_rmdir *a = args; -+ struct super_block *sb; -+ struct dentry *h_parent; -+ struct inode *h_dir; -+ struct au_hinode *hdir; -+ -+ /* rmdir by nfsd may cause deadlock with this i_mutex */ -+ /* mutex_lock(&a->dir->i_mutex); */ -+ err = -EROFS; -+ sb = a->dir->i_sb; -+ si_read_lock(sb, !AuLock_FLUSH); -+ if (!au_br_writable(a->br->br_perm)) -+ goto out; -+ bindex = au_br_index(sb, a->br->br_id); -+ if (unlikely(bindex < 0)) -+ goto out; -+ -+ err = -EIO; -+ ii_write_lock_parent(a->dir); -+ h_parent = dget_parent(a->wh_dentry); -+ h_dir = h_parent->d_inode; -+ hdir = au_hi(a->dir, bindex); -+ err = vfsub_mnt_want_write(au_br_mnt(a->br)); -+ if (unlikely(err)) -+ goto out_mnt; -+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT); -+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent, -+ a->br); -+ if (!err) -+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist); -+ au_hn_imtx_unlock(hdir); -+ vfsub_mnt_drop_write(au_br_mnt(a->br)); -+ -+out_mnt: -+ dput(h_parent); -+ ii_write_unlock(a->dir); -+out: -+ /* mutex_unlock(&a->dir->i_mutex); */ -+ au_whtmp_rmdir_free(a); -+ si_read_unlock(sb); -+ au_nwt_done(&au_sbi(sb)->si_nowait); -+ if (unlikely(err)) -+ AuIOErr("err %d\n", err); -+} -+ -+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex, -+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args) -+{ -+ int wkq_err; -+ struct super_block *sb; -+ -+ IMustLock(dir); -+ -+ /* all post-process will be done in do_rmdir_whtmp(). */ -+ sb = dir->i_sb; -+ args->dir = au_igrab(dir); -+ args->br = au_sbr(sb, bindex); -+ atomic_inc(&args->br->br_count); -+ args->wh_dentry = dget(wh_dentry); -+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0); -+ if (unlikely(wkq_err)) { -+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err); -+ au_whtmp_rmdir_free(args); -+ } -+} -diff --git fs/aufs/whout.h fs/aufs/whout.h -new file mode 100644 -index 0000000..983288b ---- /dev/null -+++ fs/aufs/whout.h -@@ -0,0 +1,72 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * whiteout for logical deletion and opaque directory -+ */ -+ -+#ifndef __AUFS_WHOUT_H__ -+#define __AUFS_WHOUT_H__ -+ -+#ifdef __KERNEL__ -+ -+#include "dir.h" -+ -+/* whout.c */ -+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name); -+struct au_branch; -+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio); -+int au_diropq_test(struct dentry *h_dentry); -+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br, -+ struct qstr *prefix); -+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br); -+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path, -+ struct dentry *dentry); -+int au_wh_init(struct au_branch *br, struct super_block *sb); -+ -+/* diropq flags */ -+#define AuDiropq_CREATE 1 -+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name) -+#define au_fset_diropq(flags, name) \ -+ do { (flags) |= AuDiropq_##name; } while (0) -+#define au_fclr_diropq(flags, name) \ -+ do { (flags) &= ~AuDiropq_##name; } while (0) -+ -+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex, -+ unsigned int flags); -+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name, -+ struct au_branch *br); -+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex, -+ struct dentry *h_parent); -+ -+/* real rmdir for the whiteout-ed dir */ -+struct au_whtmp_rmdir { -+ struct inode *dir; -+ struct au_branch *br; -+ struct dentry *wh_dentry; -+ struct au_nhash whlist; -+}; -+ -+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp); -+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp); -+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex, -+ struct dentry *wh_dentry, struct au_nhash *whlist); -+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex, -+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args); -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline struct dentry *au_diropq_create(struct dentry *dentry, -+ aufs_bindex_t bindex) -+{ -+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE); -+} -+ -+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex) -+{ -+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE)); -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_WHOUT_H__ */ -diff --git fs/aufs/wkq.c fs/aufs/wkq.c -new file mode 100644 -index 0000000..8b04d42 ---- /dev/null -+++ fs/aufs/wkq.c -@@ -0,0 +1,200 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * workqueue for asynchronous/super-io operations -+ * todo: try new dredential scheme -+ */ -+ -+#include -+#include "aufs.h" -+ -+/* internal workqueue named AUFS_WKQ_NAME */ -+ -+static struct workqueue_struct *au_wkq; -+ -+struct au_wkinfo { -+ struct work_struct wk; -+ struct kobject *kobj; -+ -+ unsigned int flags; /* see wkq.h */ -+ -+ au_wkq_func_t func; -+ void *args; -+ -+ struct completion *comp; -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void wkq_func(struct work_struct *wk) -+{ -+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk); -+ -+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)); -+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY); -+ -+ wkinfo->func(wkinfo->args); -+ if (au_ftest_wkq(wkinfo->flags, WAIT)) -+ complete(wkinfo->comp); -+ else { -+ kobject_put(wkinfo->kobj); -+ module_put(THIS_MODULE); /* todo: ?? */ -+ kfree(wkinfo); -+ } -+} -+ -+/* -+ * Since struct completion is large, try allocating it dynamically. -+ */ -+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */ -+#define AuWkqCompDeclare(name) struct completion *comp = NULL -+ -+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp) -+{ -+ *comp = kmalloc(sizeof(**comp), GFP_NOFS); -+ if (*comp) { -+ init_completion(*comp); -+ wkinfo->comp = *comp; -+ return 0; -+ } -+ return -ENOMEM; -+} -+ -+static void au_wkq_comp_free(struct completion *comp) -+{ -+ kfree(comp); -+} -+ -+#else -+ -+/* no braces */ -+#define AuWkqCompDeclare(name) \ -+ DECLARE_COMPLETION_ONSTACK(_ ## name); \ -+ struct completion *comp = &_ ## name -+ -+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp) -+{ -+ wkinfo->comp = *comp; -+ return 0; -+} -+ -+static void au_wkq_comp_free(struct completion *comp __maybe_unused) -+{ -+ /* empty */ -+} -+#endif /* 4KSTACKS */ -+ -+static void au_wkq_run(struct au_wkinfo *wkinfo) -+{ -+ if (au_ftest_wkq(wkinfo->flags, NEST)) { -+ if (au_wkq_test()) { -+ AuWarn1("wkq from wkq, unless silly-rename on NFS," -+ " due to a dead dir by UDBA?\n"); -+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT)); -+ } -+ } else -+ au_dbg_verify_kthread(); -+ -+ if (au_ftest_wkq(wkinfo->flags, WAIT)) { -+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func); -+ queue_work(au_wkq, &wkinfo->wk); -+ } else { -+ INIT_WORK(&wkinfo->wk, wkq_func); -+ schedule_work(&wkinfo->wk); -+ } -+} -+ -+/* -+ * Be careful. It is easy to make deadlock happen. -+ * processA: lock, wkq and wait -+ * processB: wkq and wait, lock in wkq -+ * --> deadlock -+ */ -+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args) -+{ -+ int err; -+ AuWkqCompDeclare(comp); -+ struct au_wkinfo wkinfo = { -+ .flags = flags, -+ .func = func, -+ .args = args -+ }; -+ -+ err = au_wkq_comp_alloc(&wkinfo, &comp); -+ if (!err) { -+ au_wkq_run(&wkinfo); -+ /* no timeout, no interrupt */ -+ wait_for_completion(wkinfo.comp); -+ au_wkq_comp_free(comp); -+ destroy_work_on_stack(&wkinfo.wk); -+ } -+ -+ return err; -+ -+} -+ -+/* -+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a -+ * problem in a concurrent umounting. -+ */ -+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb, -+ unsigned int flags) -+{ -+ int err; -+ struct au_wkinfo *wkinfo; -+ -+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len); -+ -+ /* -+ * wkq_func() must free this wkinfo. -+ * it highly depends upon the implementation of workqueue. -+ */ -+ err = 0; -+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS); -+ if (wkinfo) { -+ wkinfo->kobj = &au_sbi(sb)->si_kobj; -+ wkinfo->flags = flags & ~AuWkq_WAIT; -+ wkinfo->func = func; -+ wkinfo->args = args; -+ wkinfo->comp = NULL; -+ kobject_get(wkinfo->kobj); -+ __module_get(THIS_MODULE); /* todo: ?? */ -+ -+ au_wkq_run(wkinfo); -+ } else { -+ err = -ENOMEM; -+ au_nwt_done(&au_sbi(sb)->si_nowait); -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+void au_nwt_init(struct au_nowait_tasks *nwt) -+{ -+ atomic_set(&nwt->nw_len, 0); -+ /* smp_mb(); */ /* atomic_set */ -+ init_waitqueue_head(&nwt->nw_wq); -+} -+ -+void au_wkq_fin(void) -+{ -+ destroy_workqueue(au_wkq); -+} -+ -+int __init au_wkq_init(void) -+{ -+ int err; -+ -+ err = 0; -+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE); -+ if (IS_ERR(au_wkq)) -+ err = PTR_ERR(au_wkq); -+ else if (!au_wkq) -+ err = -ENOMEM; -+ -+ return err; -+} -diff --git fs/aufs/wkq.h fs/aufs/wkq.h -new file mode 100644 -index 0000000..f545c66 ---- /dev/null -+++ fs/aufs/wkq.h -@@ -0,0 +1,78 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * workqueue for asynchronous/super-io operations -+ * todo: try new credentials management scheme -+ */ -+ -+#ifndef __AUFS_WKQ_H__ -+#define __AUFS_WKQ_H__ -+ -+#ifdef __KERNEL__ -+ -+struct super_block; -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue -+ */ -+struct au_nowait_tasks { -+ atomic_t nw_len; -+ wait_queue_head_t nw_wq; -+}; -+ -+/* ---------------------------------------------------------------------- */ -+ -+typedef void (*au_wkq_func_t)(void *args); -+ -+/* wkq flags */ -+#define AuWkq_WAIT 1 -+#define AuWkq_NEST (1 << 1) -+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name) -+#define au_fset_wkq(flags, name) \ -+ do { (flags) |= AuWkq_##name; } while (0) -+#define au_fclr_wkq(flags, name) \ -+ do { (flags) &= ~AuWkq_##name; } while (0) -+ -+#ifndef CONFIG_AUFS_HNOTIFY -+#undef AuWkq_NEST -+#define AuWkq_NEST 0 -+#endif -+ -+/* wkq.c */ -+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args); -+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb, -+ unsigned int flags); -+void au_nwt_init(struct au_nowait_tasks *nwt); -+int __init au_wkq_init(void); -+void au_wkq_fin(void); -+ -+/* ---------------------------------------------------------------------- */ -+ -+static inline int au_wkq_test(void) -+{ -+ return current->flags & PF_WQ_WORKER; -+} -+ -+static inline int au_wkq_wait(au_wkq_func_t func, void *args) -+{ -+ return au_wkq_do_wait(AuWkq_WAIT, func, args); -+} -+ -+static inline void au_nwt_done(struct au_nowait_tasks *nwt) -+{ -+ if (atomic_dec_and_test(&nwt->nw_len)) -+ wake_up_all(&nwt->nw_wq); -+} -+ -+static inline int au_nwt_flush(struct au_nowait_tasks *nwt) -+{ -+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len)); -+ return 0; -+} -+ -+#endif /* __KERNEL__ */ -+#endif /* __AUFS_WKQ_H__ */ -diff --git fs/aufs/xino.c fs/aufs/xino.c -new file mode 100644 -index 0000000..1e41e85 ---- /dev/null -+++ fs/aufs/xino.c -@@ -0,0 +1,1299 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+/* -+ * external inode number translation table and bitmap -+ */ -+ -+#include -+#include -+#include "aufs.h" -+ -+/* todo: unnecessary to support mmap_sem since kernel-space? */ -+ssize_t xino_fread(au_readf_t func, struct file *file, void *kbuf, size_t size, -+ loff_t *pos) -+{ -+ ssize_t err; -+ mm_segment_t oldfs; -+ union { -+ void *k; -+ char __user *u; -+ } buf; -+ -+ buf.k = kbuf; -+ oldfs = get_fs(); -+ set_fs(KERNEL_DS); -+ do { -+ /* todo: signal_pending? */ -+ err = func(file, buf.u, size, pos); -+ } while (err == -EAGAIN || err == -EINTR); -+ set_fs(oldfs); -+ -+#if 0 /* reserved for future use */ -+ if (err > 0) -+ fsnotify_access(file->f_dentry); -+#endif -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *kbuf, -+ size_t size, loff_t *pos) -+{ -+ ssize_t err; -+ mm_segment_t oldfs; -+ union { -+ void *k; -+ const char __user *u; -+ } buf; -+ -+ buf.k = kbuf; -+ oldfs = get_fs(); -+ set_fs(KERNEL_DS); -+ do { -+ /* todo: signal_pending? */ -+ err = func(file, buf.u, size, pos); -+ } while (err == -EAGAIN || err == -EINTR); -+ set_fs(oldfs); -+ -+#if 0 /* reserved for future use */ -+ if (err > 0) -+ fsnotify_modify(file->f_dentry); -+#endif -+ -+ return err; -+} -+ -+struct do_xino_fwrite_args { -+ ssize_t *errp; -+ au_writef_t func; -+ struct file *file; -+ void *buf; -+ size_t size; -+ loff_t *pos; -+}; -+ -+static void call_do_xino_fwrite(void *args) -+{ -+ struct do_xino_fwrite_args *a = args; -+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos); -+} -+ -+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size, -+ loff_t *pos) -+{ -+ ssize_t err; -+ -+ /* todo: signal block and no wkq? */ -+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) { -+ lockdep_off(); -+ err = do_xino_fwrite(func, file, buf, size, pos); -+ lockdep_on(); -+ } else { -+ /* -+ * it breaks RLIMIT_FSIZE and normal user's limit, -+ * users should care about quota and real 'filesystem full.' -+ */ -+ int wkq_err; -+ struct do_xino_fwrite_args args = { -+ .errp = &err, -+ .func = func, -+ .file = file, -+ .buf = buf, -+ .size = size, -+ .pos = pos -+ }; -+ -+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args); -+ if (unlikely(wkq_err)) -+ err = wkq_err; -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * create a new xinofile at the same place/path as @base_file. -+ */ -+struct file *au_xino_create2(struct file *base_file, struct file *copy_src) -+{ -+ struct file *file; -+ struct dentry *base, *parent; -+ struct inode *dir, *delegated; -+ struct qstr *name; -+ struct path path; -+ int err; -+ -+ base = base_file->f_dentry; -+ parent = base->d_parent; /* dir inode is locked */ -+ dir = parent->d_inode; -+ IMustLock(dir); -+ -+ file = ERR_PTR(-EINVAL); -+ name = &base->d_name; -+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len); -+ if (IS_ERR(path.dentry)) { -+ file = (void *)path.dentry; -+ pr_err("%pd lookup err %ld\n", -+ base, PTR_ERR(path.dentry)); -+ goto out; -+ } -+ -+ /* no need to mnt_want_write() since we call dentry_open() later */ -+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL); -+ if (unlikely(err)) { -+ file = ERR_PTR(err); -+ pr_err("%pd create err %d\n", base, err); -+ goto out_dput; -+ } -+ -+ path.mnt = base_file->f_path.mnt; -+ file = vfsub_dentry_open(&path, -+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE -+ /* | __FMODE_NONOTIFY */); -+ if (IS_ERR(file)) { -+ pr_err("%pd open err %ld\n", base, PTR_ERR(file)); -+ goto out_dput; -+ } -+ -+ delegated = NULL; -+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0); -+ if (unlikely(err == -EWOULDBLOCK)) { -+ pr_warn("cannot retry for NFSv4 delegation" -+ " for an internal unlink\n"); -+ iput(delegated); -+ } -+ if (unlikely(err)) { -+ pr_err("%pd unlink err %d\n", base, err); -+ goto out_fput; -+ } -+ -+ if (copy_src) { -+ /* no one can touch copy_src xino */ -+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src)); -+ if (unlikely(err)) { -+ pr_err("%pd copy err %d\n", base, err); -+ goto out_fput; -+ } -+ } -+ goto out_dput; /* success */ -+ -+out_fput: -+ fput(file); -+ file = ERR_PTR(err); -+out_dput: -+ dput(path.dentry); -+out: -+ return file; -+} -+ -+struct au_xino_lock_dir { -+ struct au_hinode *hdir; -+ struct dentry *parent; -+ struct mutex *mtx; -+}; -+ -+static void au_xino_lock_dir(struct super_block *sb, struct file *xino, -+ struct au_xino_lock_dir *ldir) -+{ -+ aufs_bindex_t brid, bindex; -+ -+ ldir->hdir = NULL; -+ bindex = -1; -+ brid = au_xino_brid(sb); -+ if (brid >= 0) -+ bindex = au_br_index(sb, brid); -+ if (bindex >= 0) { -+ ldir->hdir = au_hi(sb->s_root->d_inode, bindex); -+ au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT); -+ } else { -+ ldir->parent = dget_parent(xino->f_dentry); -+ ldir->mtx = &ldir->parent->d_inode->i_mutex; -+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT); -+ } -+} -+ -+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir) -+{ -+ if (ldir->hdir) -+ au_hn_imtx_unlock(ldir->hdir); -+ else { -+ mutex_unlock(ldir->mtx); -+ dput(ldir->parent); -+ } -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* trucate xino files asynchronously */ -+ -+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex) -+{ -+ int err; -+ unsigned long jiffy; -+ blkcnt_t blocks; -+ aufs_bindex_t bi, bend; -+ struct kstatfs *st; -+ struct au_branch *br; -+ struct file *new_xino, *file; -+ struct super_block *h_sb; -+ struct au_xino_lock_dir ldir; -+ -+ err = -ENOMEM; -+ st = kzalloc(sizeof(*st), GFP_NOFS); -+ if (unlikely(!st)) -+ goto out; -+ -+ err = -EINVAL; -+ bend = au_sbend(sb); -+ if (unlikely(bindex < 0 || bend < bindex)) -+ goto out_st; -+ br = au_sbr(sb, bindex); -+ file = br->br_xino.xi_file; -+ if (!file) -+ goto out_st; -+ -+ err = vfs_statfs(&file->f_path, st); -+ if (unlikely(err)) -+ AuErr1("statfs err %d, ignored\n", err); -+ jiffy = jiffies; -+ blocks = file_inode(file)->i_blocks; -+ pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n", -+ bindex, (u64)blocks, st->f_bfree, st->f_blocks); -+ -+ au_xino_lock_dir(sb, file, &ldir); -+ /* mnt_want_write() is unnecessary here */ -+ new_xino = au_xino_create2(file, file); -+ au_xino_unlock_dir(&ldir); -+ err = PTR_ERR(new_xino); -+ if (IS_ERR(new_xino)) { -+ pr_err("err %d, ignored\n", err); -+ goto out_st; -+ } -+ err = 0; -+ fput(file); -+ br->br_xino.xi_file = new_xino; -+ -+ h_sb = au_br_sb(br); -+ for (bi = 0; bi <= bend; bi++) { -+ if (unlikely(bi == bindex)) -+ continue; -+ br = au_sbr(sb, bi); -+ if (au_br_sb(br) != h_sb) -+ continue; -+ -+ fput(br->br_xino.xi_file); -+ br->br_xino.xi_file = new_xino; -+ get_file(new_xino); -+ } -+ -+ err = vfs_statfs(&new_xino->f_path, st); -+ if (!err) { -+ pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n", -+ bindex, (u64)file_inode(new_xino)->i_blocks, -+ st->f_bfree, st->f_blocks); -+ if (file_inode(new_xino)->i_blocks < blocks) -+ au_sbi(sb)->si_xino_jiffy = jiffy; -+ } else -+ AuErr1("statfs err %d, ignored\n", err); -+ -+out_st: -+ kfree(st); -+out: -+ return err; -+} -+ -+struct xino_do_trunc_args { -+ struct super_block *sb; -+ struct au_branch *br; -+}; -+ -+static void xino_do_trunc(void *_args) -+{ -+ struct xino_do_trunc_args *args = _args; -+ struct super_block *sb; -+ struct au_branch *br; -+ struct inode *dir; -+ int err; -+ aufs_bindex_t bindex; -+ -+ err = 0; -+ sb = args->sb; -+ dir = sb->s_root->d_inode; -+ br = args->br; -+ -+ si_noflush_write_lock(sb); -+ ii_read_lock_parent(dir); -+ bindex = au_br_index(sb, br->br_id); -+ err = au_xino_trunc(sb, bindex); -+ ii_read_unlock(dir); -+ if (unlikely(err)) -+ pr_warn("err b%d, (%d)\n", bindex, err); -+ atomic_dec(&br->br_xino_running); -+ atomic_dec(&br->br_count); -+ si_write_unlock(sb); -+ au_nwt_done(&au_sbi(sb)->si_nowait); -+ kfree(args); -+} -+ -+static int xino_trunc_test(struct super_block *sb, struct au_branch *br) -+{ -+ int err; -+ struct kstatfs st; -+ struct au_sbinfo *sbinfo; -+ -+ /* todo: si_xino_expire and the ratio should be customizable */ -+ sbinfo = au_sbi(sb); -+ if (time_before(jiffies, -+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire)) -+ return 0; -+ -+ /* truncation border */ -+ err = vfs_statfs(&br->br_xino.xi_file->f_path, &st); -+ if (unlikely(err)) { -+ AuErr1("statfs err %d, ignored\n", err); -+ return 0; -+ } -+ if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC) -+ return 0; -+ -+ return 1; -+} -+ -+static void xino_try_trunc(struct super_block *sb, struct au_branch *br) -+{ -+ struct xino_do_trunc_args *args; -+ int wkq_err; -+ -+ if (!xino_trunc_test(sb, br)) -+ return; -+ -+ if (atomic_inc_return(&br->br_xino_running) > 1) -+ goto out; -+ -+ /* lock and kfree() will be called in trunc_xino() */ -+ args = kmalloc(sizeof(*args), GFP_NOFS); -+ if (unlikely(!args)) { -+ AuErr1("no memory\n"); -+ goto out_args; -+ } -+ -+ atomic_inc(&br->br_count); -+ args->sb = sb; -+ args->br = br; -+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0); -+ if (!wkq_err) -+ return; /* success */ -+ -+ pr_err("wkq %d\n", wkq_err); -+ atomic_dec(&br->br_count); -+ -+out_args: -+ kfree(args); -+out: -+ atomic_dec(&br->br_xino_running); -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static int au_xino_do_write(au_writef_t write, struct file *file, -+ ino_t h_ino, ino_t ino) -+{ -+ loff_t pos; -+ ssize_t sz; -+ -+ pos = h_ino; -+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) { -+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino); -+ return -EFBIG; -+ } -+ pos *= sizeof(ino); -+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos); -+ if (sz == sizeof(ino)) -+ return 0; /* success */ -+ -+ AuIOErr("write failed (%zd)\n", sz); -+ return -EIO; -+} -+ -+/* -+ * write @ino to the xinofile for the specified branch{@sb, @bindex} -+ * at the position of @h_ino. -+ * even if @ino is zero, it is written to the xinofile and means no entry. -+ * if the size of the xino file on a specific filesystem exceeds the watermark, -+ * try truncating it. -+ */ -+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ ino_t ino) -+{ -+ int err; -+ unsigned int mnt_flags; -+ struct au_branch *br; -+ -+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max) -+ || ((loff_t)-1) > 0); -+ SiMustAnyLock(sb); -+ -+ mnt_flags = au_mntflags(sb); -+ if (!au_opt_test(mnt_flags, XINO)) -+ return 0; -+ -+ br = au_sbr(sb, bindex); -+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file, -+ h_ino, ino); -+ if (!err) { -+ if (au_opt_test(mnt_flags, TRUNC_XINO) -+ && au_test_fs_trunc_xino(au_br_sb(br))) -+ xino_try_trunc(sb, br); -+ return 0; /* success */ -+ } -+ -+ AuIOErr("write failed (%d)\n", err); -+ return -EIO; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* aufs inode number bitmap */ -+ -+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE; -+static ino_t xib_calc_ino(unsigned long pindex, int bit) -+{ -+ ino_t ino; -+ -+ AuDebugOn(bit < 0 || page_bits <= bit); -+ ino = AUFS_FIRST_INO + pindex * page_bits + bit; -+ return ino; -+} -+ -+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit) -+{ -+ AuDebugOn(ino < AUFS_FIRST_INO); -+ ino -= AUFS_FIRST_INO; -+ *pindex = ino / page_bits; -+ *bit = ino % page_bits; -+} -+ -+static int xib_pindex(struct super_block *sb, unsigned long pindex) -+{ -+ int err; -+ loff_t pos; -+ ssize_t sz; -+ struct au_sbinfo *sbinfo; -+ struct file *xib; -+ unsigned long *p; -+ -+ sbinfo = au_sbi(sb); -+ MtxMustLock(&sbinfo->si_xib_mtx); -+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE -+ || !au_opt_test(sbinfo->si_mntflags, XINO)); -+ -+ if (pindex == sbinfo->si_xib_last_pindex) -+ return 0; -+ -+ xib = sbinfo->si_xib; -+ p = sbinfo->si_xib_buf; -+ pos = sbinfo->si_xib_last_pindex; -+ pos *= PAGE_SIZE; -+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos); -+ if (unlikely(sz != PAGE_SIZE)) -+ goto out; -+ -+ pos = pindex; -+ pos *= PAGE_SIZE; -+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE) -+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos); -+ else { -+ memset(p, 0, PAGE_SIZE); -+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos); -+ } -+ if (sz == PAGE_SIZE) { -+ sbinfo->si_xib_last_pindex = pindex; -+ return 0; /* success */ -+ } -+ -+out: -+ AuIOErr1("write failed (%zd)\n", sz); -+ err = sz; -+ if (sz >= 0) -+ err = -EIO; -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+static void au_xib_clear_bit(struct inode *inode) -+{ -+ int err, bit; -+ unsigned long pindex; -+ struct super_block *sb; -+ struct au_sbinfo *sbinfo; -+ -+ AuDebugOn(inode->i_nlink); -+ -+ sb = inode->i_sb; -+ xib_calc_bit(inode->i_ino, &pindex, &bit); -+ AuDebugOn(page_bits <= bit); -+ sbinfo = au_sbi(sb); -+ mutex_lock(&sbinfo->si_xib_mtx); -+ err = xib_pindex(sb, pindex); -+ if (!err) { -+ clear_bit(bit, sbinfo->si_xib_buf); -+ sbinfo->si_xib_next_bit = bit; -+ } -+ mutex_unlock(&sbinfo->si_xib_mtx); -+} -+ -+/* for s_op->delete_inode() */ -+void au_xino_delete_inode(struct inode *inode, const int unlinked) -+{ -+ int err; -+ unsigned int mnt_flags; -+ aufs_bindex_t bindex, bend, bi; -+ unsigned char try_trunc; -+ struct au_iinfo *iinfo; -+ struct super_block *sb; -+ struct au_hinode *hi; -+ struct inode *h_inode; -+ struct au_branch *br; -+ au_writef_t xwrite; -+ -+ sb = inode->i_sb; -+ mnt_flags = au_mntflags(sb); -+ if (!au_opt_test(mnt_flags, XINO) -+ || inode->i_ino == AUFS_ROOT_INO) -+ return; -+ -+ if (unlinked) { -+ au_xigen_inc(inode); -+ au_xib_clear_bit(inode); -+ } -+ -+ iinfo = au_ii(inode); -+ if (!iinfo) -+ return; -+ -+ bindex = iinfo->ii_bstart; -+ if (bindex < 0) -+ return; -+ -+ xwrite = au_sbi(sb)->si_xwrite; -+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO); -+ hi = iinfo->ii_hinode + bindex; -+ bend = iinfo->ii_bend; -+ for (; bindex <= bend; bindex++, hi++) { -+ h_inode = hi->hi_inode; -+ if (!h_inode -+ || (!unlinked && h_inode->i_nlink)) -+ continue; -+ -+ /* inode may not be revalidated */ -+ bi = au_br_index(sb, hi->hi_id); -+ if (bi < 0) -+ continue; -+ -+ br = au_sbr(sb, bi); -+ err = au_xino_do_write(xwrite, br->br_xino.xi_file, -+ h_inode->i_ino, /*ino*/0); -+ if (!err && try_trunc -+ && au_test_fs_trunc_xino(au_br_sb(br))) -+ xino_try_trunc(sb, br); -+ } -+} -+ -+/* get an unused inode number from bitmap */ -+ino_t au_xino_new_ino(struct super_block *sb) -+{ -+ ino_t ino; -+ unsigned long *p, pindex, ul, pend; -+ struct au_sbinfo *sbinfo; -+ struct file *file; -+ int free_bit, err; -+ -+ if (!au_opt_test(au_mntflags(sb), XINO)) -+ return iunique(sb, AUFS_FIRST_INO); -+ -+ sbinfo = au_sbi(sb); -+ mutex_lock(&sbinfo->si_xib_mtx); -+ p = sbinfo->si_xib_buf; -+ free_bit = sbinfo->si_xib_next_bit; -+ if (free_bit < page_bits && !test_bit(free_bit, p)) -+ goto out; /* success */ -+ free_bit = find_first_zero_bit(p, page_bits); -+ if (free_bit < page_bits) -+ goto out; /* success */ -+ -+ pindex = sbinfo->si_xib_last_pindex; -+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) { -+ err = xib_pindex(sb, ul); -+ if (unlikely(err)) -+ goto out_err; -+ free_bit = find_first_zero_bit(p, page_bits); -+ if (free_bit < page_bits) -+ goto out; /* success */ -+ } -+ -+ file = sbinfo->si_xib; -+ pend = vfsub_f_size_read(file) / PAGE_SIZE; -+ for (ul = pindex + 1; ul <= pend; ul++) { -+ err = xib_pindex(sb, ul); -+ if (unlikely(err)) -+ goto out_err; -+ free_bit = find_first_zero_bit(p, page_bits); -+ if (free_bit < page_bits) -+ goto out; /* success */ -+ } -+ BUG(); -+ -+out: -+ set_bit(free_bit, p); -+ sbinfo->si_xib_next_bit = free_bit + 1; -+ pindex = sbinfo->si_xib_last_pindex; -+ mutex_unlock(&sbinfo->si_xib_mtx); -+ ino = xib_calc_ino(pindex, free_bit); -+ AuDbg("i%lu\n", (unsigned long)ino); -+ return ino; -+out_err: -+ mutex_unlock(&sbinfo->si_xib_mtx); -+ AuDbg("i0\n"); -+ return 0; -+} -+ -+/* -+ * read @ino from xinofile for the specified branch{@sb, @bindex} -+ * at the position of @h_ino. -+ * if @ino does not exist and @do_new is true, get new one. -+ */ -+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, -+ ino_t *ino) -+{ -+ int err; -+ ssize_t sz; -+ loff_t pos; -+ struct file *file; -+ struct au_sbinfo *sbinfo; -+ -+ *ino = 0; -+ if (!au_opt_test(au_mntflags(sb), XINO)) -+ return 0; /* no xino */ -+ -+ err = 0; -+ sbinfo = au_sbi(sb); -+ pos = h_ino; -+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) { -+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino); -+ return -EFBIG; -+ } -+ pos *= sizeof(*ino); -+ -+ file = au_sbr(sb, bindex)->br_xino.xi_file; -+ if (vfsub_f_size_read(file) < pos + sizeof(*ino)) -+ return 0; /* no ino */ -+ -+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos); -+ if (sz == sizeof(*ino)) -+ return 0; /* success */ -+ -+ err = sz; -+ if (unlikely(sz >= 0)) { -+ err = -EIO; -+ AuIOErr("xino read error (%zd)\n", sz); -+ } -+ -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* create and set a new xino file */ -+ -+struct file *au_xino_create(struct super_block *sb, char *fname, int silent) -+{ -+ struct file *file; -+ struct dentry *h_parent, *d; -+ struct inode *h_dir; -+ int err; -+ -+ /* -+ * at mount-time, and the xino file is the default path, -+ * hnotify is disabled so we have no notify events to ignore. -+ * when a user specified the xino, we cannot get au_hdir to be ignored. -+ */ -+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE -+ /* | __FMODE_NONOTIFY */, -+ S_IRUGO | S_IWUGO); -+ if (IS_ERR(file)) { -+ if (!silent) -+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file)); -+ return file; -+ } -+ -+ /* keep file count */ -+ h_parent = dget_parent(file->f_dentry); -+ h_dir = h_parent->d_inode; -+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT); -+ /* mnt_want_write() is unnecessary here */ -+ /* no delegation since it is just created */ -+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL, /*force*/0); -+ mutex_unlock(&h_dir->i_mutex); -+ dput(h_parent); -+ if (unlikely(err)) { -+ if (!silent) -+ pr_err("unlink %s(%d)\n", fname, err); -+ goto out; -+ } -+ -+ err = -EINVAL; -+ d = file->f_dentry; -+ if (unlikely(sb == d->d_sb)) { -+ if (!silent) -+ pr_err("%s must be outside\n", fname); -+ goto out; -+ } -+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) { -+ if (!silent) -+ pr_err("xino doesn't support %s(%s)\n", -+ fname, au_sbtype(d->d_sb)); -+ goto out; -+ } -+ return file; /* success */ -+ -+out: -+ fput(file); -+ file = ERR_PTR(err); -+ return file; -+} -+ -+/* -+ * find another branch who is on the same filesystem of the specified -+ * branch{@btgt}. search until @bend. -+ */ -+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt, -+ aufs_bindex_t bend) -+{ -+ aufs_bindex_t bindex; -+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt); -+ -+ for (bindex = 0; bindex < btgt; bindex++) -+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex))) -+ return bindex; -+ for (bindex++; bindex <= bend; bindex++) -+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex))) -+ return bindex; -+ return -1; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * initialize the xinofile for the specified branch @br -+ * at the place/path where @base_file indicates. -+ * test whether another branch is on the same filesystem or not, -+ * if @do_test is true. -+ */ -+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino, -+ struct file *base_file, int do_test) -+{ -+ int err; -+ ino_t ino; -+ aufs_bindex_t bend, bindex; -+ struct au_branch *shared_br, *b; -+ struct file *file; -+ struct super_block *tgt_sb; -+ -+ shared_br = NULL; -+ bend = au_sbend(sb); -+ if (do_test) { -+ tgt_sb = au_br_sb(br); -+ for (bindex = 0; bindex <= bend; bindex++) { -+ b = au_sbr(sb, bindex); -+ if (tgt_sb == au_br_sb(b)) { -+ shared_br = b; -+ break; -+ } -+ } -+ } -+ -+ if (!shared_br || !shared_br->br_xino.xi_file) { -+ struct au_xino_lock_dir ldir; -+ -+ au_xino_lock_dir(sb, base_file, &ldir); -+ /* mnt_want_write() is unnecessary here */ -+ file = au_xino_create2(base_file, NULL); -+ au_xino_unlock_dir(&ldir); -+ err = PTR_ERR(file); -+ if (IS_ERR(file)) -+ goto out; -+ br->br_xino.xi_file = file; -+ } else { -+ br->br_xino.xi_file = shared_br->br_xino.xi_file; -+ get_file(br->br_xino.xi_file); -+ } -+ -+ ino = AUFS_ROOT_INO; -+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file, -+ h_ino, ino); -+ if (unlikely(err)) { -+ fput(br->br_xino.xi_file); -+ br->br_xino.xi_file = NULL; -+ } -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* trucate a xino bitmap file */ -+ -+/* todo: slow */ -+static int do_xib_restore(struct super_block *sb, struct file *file, void *page) -+{ -+ int err, bit; -+ ssize_t sz; -+ unsigned long pindex; -+ loff_t pos, pend; -+ struct au_sbinfo *sbinfo; -+ au_readf_t func; -+ ino_t *ino; -+ unsigned long *p; -+ -+ err = 0; -+ sbinfo = au_sbi(sb); -+ MtxMustLock(&sbinfo->si_xib_mtx); -+ p = sbinfo->si_xib_buf; -+ func = sbinfo->si_xread; -+ pend = vfsub_f_size_read(file); -+ pos = 0; -+ while (pos < pend) { -+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos); -+ err = sz; -+ if (unlikely(sz <= 0)) -+ goto out; -+ -+ err = 0; -+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) { -+ if (unlikely(*ino < AUFS_FIRST_INO)) -+ continue; -+ -+ xib_calc_bit(*ino, &pindex, &bit); -+ AuDebugOn(page_bits <= bit); -+ err = xib_pindex(sb, pindex); -+ if (!err) -+ set_bit(bit, p); -+ else -+ goto out; -+ } -+ } -+ -+out: -+ return err; -+} -+ -+static int xib_restore(struct super_block *sb) -+{ -+ int err; -+ aufs_bindex_t bindex, bend; -+ void *page; -+ -+ err = -ENOMEM; -+ page = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!page)) -+ goto out; -+ -+ err = 0; -+ bend = au_sbend(sb); -+ for (bindex = 0; !err && bindex <= bend; bindex++) -+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0) -+ err = do_xib_restore -+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page); -+ else -+ AuDbg("b%d\n", bindex); -+ free_page((unsigned long)page); -+ -+out: -+ return err; -+} -+ -+int au_xib_trunc(struct super_block *sb) -+{ -+ int err; -+ ssize_t sz; -+ loff_t pos; -+ struct au_xino_lock_dir ldir; -+ struct au_sbinfo *sbinfo; -+ unsigned long *p; -+ struct file *file; -+ -+ SiMustWriteLock(sb); -+ -+ err = 0; -+ sbinfo = au_sbi(sb); -+ if (!au_opt_test(sbinfo->si_mntflags, XINO)) -+ goto out; -+ -+ file = sbinfo->si_xib; -+ if (vfsub_f_size_read(file) <= PAGE_SIZE) -+ goto out; -+ -+ au_xino_lock_dir(sb, file, &ldir); -+ /* mnt_want_write() is unnecessary here */ -+ file = au_xino_create2(sbinfo->si_xib, NULL); -+ au_xino_unlock_dir(&ldir); -+ err = PTR_ERR(file); -+ if (IS_ERR(file)) -+ goto out; -+ fput(sbinfo->si_xib); -+ sbinfo->si_xib = file; -+ -+ p = sbinfo->si_xib_buf; -+ memset(p, 0, PAGE_SIZE); -+ pos = 0; -+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos); -+ if (unlikely(sz != PAGE_SIZE)) { -+ err = sz; -+ AuIOErr("err %d\n", err); -+ if (sz >= 0) -+ err = -EIO; -+ goto out; -+ } -+ -+ mutex_lock(&sbinfo->si_xib_mtx); -+ /* mnt_want_write() is unnecessary here */ -+ err = xib_restore(sb); -+ mutex_unlock(&sbinfo->si_xib_mtx); -+ -+out: -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * xino mount option handlers -+ */ -+static au_readf_t find_readf(struct file *h_file) -+{ -+ const struct file_operations *fop = h_file->f_op; -+ -+ if (fop->read) -+ return fop->read; -+ if (fop->aio_read) -+ return do_sync_read; -+ return ERR_PTR(-ENOSYS); -+} -+ -+static au_writef_t find_writef(struct file *h_file) -+{ -+ const struct file_operations *fop = h_file->f_op; -+ -+ if (fop->write) -+ return fop->write; -+ if (fop->aio_write) -+ return do_sync_write; -+ return ERR_PTR(-ENOSYS); -+} -+ -+/* xino bitmap */ -+static void xino_clear_xib(struct super_block *sb) -+{ -+ struct au_sbinfo *sbinfo; -+ -+ SiMustWriteLock(sb); -+ -+ sbinfo = au_sbi(sb); -+ sbinfo->si_xread = NULL; -+ sbinfo->si_xwrite = NULL; -+ if (sbinfo->si_xib) -+ fput(sbinfo->si_xib); -+ sbinfo->si_xib = NULL; -+ free_page((unsigned long)sbinfo->si_xib_buf); -+ sbinfo->si_xib_buf = NULL; -+} -+ -+static int au_xino_set_xib(struct super_block *sb, struct file *base) -+{ -+ int err; -+ loff_t pos; -+ struct au_sbinfo *sbinfo; -+ struct file *file; -+ -+ SiMustWriteLock(sb); -+ -+ sbinfo = au_sbi(sb); -+ file = au_xino_create2(base, sbinfo->si_xib); -+ err = PTR_ERR(file); -+ if (IS_ERR(file)) -+ goto out; -+ if (sbinfo->si_xib) -+ fput(sbinfo->si_xib); -+ sbinfo->si_xib = file; -+ sbinfo->si_xread = find_readf(file); -+ sbinfo->si_xwrite = find_writef(file); -+ -+ err = -ENOMEM; -+ if (!sbinfo->si_xib_buf) -+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS); -+ if (unlikely(!sbinfo->si_xib_buf)) -+ goto out_unset; -+ -+ sbinfo->si_xib_last_pindex = 0; -+ sbinfo->si_xib_next_bit = 0; -+ if (vfsub_f_size_read(file) < PAGE_SIZE) { -+ pos = 0; -+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf, -+ PAGE_SIZE, &pos); -+ if (unlikely(err != PAGE_SIZE)) -+ goto out_free; -+ } -+ err = 0; -+ goto out; /* success */ -+ -+out_free: -+ free_page((unsigned long)sbinfo->si_xib_buf); -+ sbinfo->si_xib_buf = NULL; -+ if (err >= 0) -+ err = -EIO; -+out_unset: -+ fput(sbinfo->si_xib); -+ sbinfo->si_xib = NULL; -+ sbinfo->si_xread = NULL; -+ sbinfo->si_xwrite = NULL; -+out: -+ return err; -+} -+ -+/* xino for each branch */ -+static void xino_clear_br(struct super_block *sb) -+{ -+ aufs_bindex_t bindex, bend; -+ struct au_branch *br; -+ -+ bend = au_sbend(sb); -+ for (bindex = 0; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ if (!br || !br->br_xino.xi_file) -+ continue; -+ -+ fput(br->br_xino.xi_file); -+ br->br_xino.xi_file = NULL; -+ } -+} -+ -+static int au_xino_set_br(struct super_block *sb, struct file *base) -+{ -+ int err; -+ ino_t ino; -+ aufs_bindex_t bindex, bend, bshared; -+ struct { -+ struct file *old, *new; -+ } *fpair, *p; -+ struct au_branch *br; -+ struct inode *inode; -+ au_writef_t writef; -+ -+ SiMustWriteLock(sb); -+ -+ err = -ENOMEM; -+ bend = au_sbend(sb); -+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS); -+ if (unlikely(!fpair)) -+ goto out; -+ -+ inode = sb->s_root->d_inode; -+ ino = AUFS_ROOT_INO; -+ writef = au_sbi(sb)->si_xwrite; -+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) { -+ br = au_sbr(sb, bindex); -+ bshared = is_sb_shared(sb, bindex, bindex - 1); -+ if (bshared >= 0) { -+ /* shared xino */ -+ *p = fpair[bshared]; -+ get_file(p->new); -+ } -+ -+ if (!p->new) { -+ /* new xino */ -+ p->old = br->br_xino.xi_file; -+ p->new = au_xino_create2(base, br->br_xino.xi_file); -+ err = PTR_ERR(p->new); -+ if (IS_ERR(p->new)) { -+ p->new = NULL; -+ goto out_pair; -+ } -+ } -+ -+ err = au_xino_do_write(writef, p->new, -+ au_h_iptr(inode, bindex)->i_ino, ino); -+ if (unlikely(err)) -+ goto out_pair; -+ } -+ -+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) { -+ br = au_sbr(sb, bindex); -+ if (br->br_xino.xi_file) -+ fput(br->br_xino.xi_file); -+ get_file(p->new); -+ br->br_xino.xi_file = p->new; -+ } -+ -+out_pair: -+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) -+ if (p->new) -+ fput(p->new); -+ else -+ break; -+ kfree(fpair); -+out: -+ return err; -+} -+ -+void au_xino_clr(struct super_block *sb) -+{ -+ struct au_sbinfo *sbinfo; -+ -+ au_xigen_clr(sb); -+ xino_clear_xib(sb); -+ xino_clear_br(sb); -+ sbinfo = au_sbi(sb); -+ /* lvalue, do not call au_mntflags() */ -+ au_opt_clr(sbinfo->si_mntflags, XINO); -+} -+ -+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount) -+{ -+ int err, skip; -+ struct dentry *parent, *cur_parent; -+ struct qstr *dname, *cur_name; -+ struct file *cur_xino; -+ struct inode *dir; -+ struct au_sbinfo *sbinfo; -+ -+ SiMustWriteLock(sb); -+ -+ err = 0; -+ sbinfo = au_sbi(sb); -+ parent = dget_parent(xino->file->f_dentry); -+ if (remount) { -+ skip = 0; -+ dname = &xino->file->f_dentry->d_name; -+ cur_xino = sbinfo->si_xib; -+ if (cur_xino) { -+ cur_parent = dget_parent(cur_xino->f_dentry); -+ cur_name = &cur_xino->f_dentry->d_name; -+ skip = (cur_parent == parent -+ && au_qstreq(dname, cur_name)); -+ dput(cur_parent); -+ } -+ if (skip) -+ goto out; -+ } -+ -+ au_opt_set(sbinfo->si_mntflags, XINO); -+ dir = parent->d_inode; -+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT); -+ /* mnt_want_write() is unnecessary here */ -+ err = au_xino_set_xib(sb, xino->file); -+ if (!err) -+ err = au_xigen_set(sb, xino->file); -+ if (!err) -+ err = au_xino_set_br(sb, xino->file); -+ mutex_unlock(&dir->i_mutex); -+ if (!err) -+ goto out; /* success */ -+ -+ /* reset all */ -+ AuIOErr("failed creating xino(%d).\n", err); -+ -+out: -+ dput(parent); -+ return err; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* -+ * create a xinofile at the default place/path. -+ */ -+struct file *au_xino_def(struct super_block *sb) -+{ -+ struct file *file; -+ char *page, *p; -+ struct au_branch *br; -+ struct super_block *h_sb; -+ struct path path; -+ aufs_bindex_t bend, bindex, bwr; -+ -+ br = NULL; -+ bend = au_sbend(sb); -+ bwr = -1; -+ for (bindex = 0; bindex <= bend; bindex++) { -+ br = au_sbr(sb, bindex); -+ if (au_br_writable(br->br_perm) -+ && !au_test_fs_bad_xino(au_br_sb(br))) { -+ bwr = bindex; -+ break; -+ } -+ } -+ -+ if (bwr >= 0) { -+ file = ERR_PTR(-ENOMEM); -+ page = (void *)__get_free_page(GFP_NOFS); -+ if (unlikely(!page)) -+ goto out; -+ path.mnt = au_br_mnt(br); -+ path.dentry = au_h_dptr(sb->s_root, bwr); -+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME)); -+ file = (void *)p; -+ if (!IS_ERR(p)) { -+ strcat(p, "/" AUFS_XINO_FNAME); -+ AuDbg("%s\n", p); -+ file = au_xino_create(sb, p, /*silent*/0); -+ if (!IS_ERR(file)) -+ au_xino_brid_set(sb, br->br_id); -+ } -+ free_page((unsigned long)page); -+ } else { -+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0); -+ if (IS_ERR(file)) -+ goto out; -+ h_sb = file->f_dentry->d_sb; -+ if (unlikely(au_test_fs_bad_xino(h_sb))) { -+ pr_err("xino doesn't support %s(%s)\n", -+ AUFS_XINO_DEFPATH, au_sbtype(h_sb)); -+ fput(file); -+ file = ERR_PTR(-EINVAL); -+ } -+ if (!IS_ERR(file)) -+ au_xino_brid_set(sb, -1); -+ } -+ -+out: -+ return file; -+} -+ -+/* ---------------------------------------------------------------------- */ -+ -+int au_xino_path(struct seq_file *seq, struct file *file) -+{ -+ int err; -+ -+ err = au_seq_path(seq, &file->f_path); -+ if (unlikely(err < 0)) -+ goto out; -+ -+ err = 0; -+#define Deleted "\\040(deleted)" -+ seq->count -= sizeof(Deleted) - 1; -+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted, -+ sizeof(Deleted) - 1)); -+#undef Deleted -+ -+out: -+ return err; -+} -diff --git fs/buffer.c fs/buffer.c -index 9ddb9fc..1059a0b 100644 ---- fs/buffer.c -+++ fs/buffer.c -@@ -2448,7 +2448,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - * Update file times before taking page lock. We may end up failing the - * fault so this update may be superfluous but who really cares... - */ -- file_update_time(vma->vm_file); -+ vma_file_update_time(vma); - - ret = __block_page_mkwrite(vma, vmf, get_block); - sb_end_pagefault(sb); -diff --git fs/inode.c fs/inode.c -index f96d2a6..2d72083 100644 ---- fs/inode.c -+++ fs/inode.c -@@ -1496,7 +1496,7 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, - * This does the actual work of updating an inodes time or version. Must have - * had called mnt_want_write() before calling this. - */ --static int update_time(struct inode *inode, struct timespec *time, int flags) -+int update_time(struct inode *inode, struct timespec *time, int flags) - { - if (inode->i_op->update_time) - return inode->i_op->update_time(inode, time, flags); -diff --git fs/proc/nommu.c fs/proc/nommu.c -index d4a3574..1397181 100644 ---- fs/proc/nommu.c -+++ fs/proc/nommu.c -@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) - file = region->vm_file; - - if (file) { -- struct inode *inode = file_inode(region->vm_file); -+ struct inode *inode; -+ -+ file = vmr_pr_or_file(region); -+ inode = file_inode(file); - dev = inode->i_sb->s_dev; - ino = inode->i_ino; - } -diff --git fs/proc/task_mmu.c fs/proc/task_mmu.c -index c4b2646..2a2f0f5 100644 ---- fs/proc/task_mmu.c -+++ fs/proc/task_mmu.c -@@ -265,7 +265,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) - const char *name = NULL; - - if (file) { -- struct inode *inode = file_inode(vma->vm_file); -+ struct inode *inode; -+ -+ file = vma_pr_or_file(vma); -+ inode = file_inode(file); - dev = inode->i_sb->s_dev; - ino = inode->i_ino; - pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; -@@ -1408,6 +1411,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) - seq_printf(m, "%08lx %s", vma->vm_start, buffer); - - if (file) { -+ file = vma_pr_or_file(vma); - seq_printf(m, " file="); - seq_path(m, &file->f_path, "\n\t= "); - } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { -diff --git fs/proc/task_nommu.c fs/proc/task_nommu.c -index 678455d..0ef7ef4 100644 ---- fs/proc/task_nommu.c -+++ fs/proc/task_nommu.c -@@ -141,7 +141,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, - file = vma->vm_file; - - if (file) { -- struct inode *inode = file_inode(vma->vm_file); -+ struct inode *inode; -+ -+ file = vma_pr_or_file(file); -+ inode = file_inode(file); - dev = inode->i_sb->s_dev; - ino = inode->i_ino; - pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; -diff --git fs/splice.c fs/splice.c -index e246954..4797013 100644 ---- fs/splice.c -+++ fs/splice.c -@@ -1103,8 +1103,8 @@ EXPORT_SYMBOL(generic_splice_sendpage); - /* - * Attempt to initiate a splice from pipe to file. - */ --static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, -- loff_t *ppos, size_t len, unsigned int flags) -+long do_splice_from(struct pipe_inode_info *pipe, struct file *out, -+ loff_t *ppos, size_t len, unsigned int flags) - { - ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, - loff_t *, size_t, unsigned int); -@@ -1120,9 +1120,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, - /* - * Attempt to initiate a splice from a file to a pipe. - */ --static long do_splice_to(struct file *in, loff_t *ppos, -- struct pipe_inode_info *pipe, size_t len, -- unsigned int flags) -+long do_splice_to(struct file *in, loff_t *ppos, -+ struct pipe_inode_info *pipe, size_t len, -+ unsigned int flags) - { - ssize_t (*splice_read)(struct file *, loff_t *, - struct pipe_inode_info *, size_t, unsigned int); -diff --git include/linux/fs.h include/linux/fs.h -index 8780312..3ff3e89 100644 ---- include/linux/fs.h -+++ include/linux/fs.h -@@ -2606,6 +2606,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); - extern int inode_newsize_ok(const struct inode *, loff_t offset); - extern void setattr_copy(struct inode *inode, const struct iattr *attr); - -+extern int update_time(struct inode *, struct timespec *, int); - extern int file_update_time(struct file *file); - - extern int generic_show_options(struct seq_file *m, struct dentry *root); -diff --git include/linux/mm.h include/linux/mm.h -index d677706..5bed2d8 100644 ---- include/linux/mm.h -+++ include/linux/mm.h -@@ -18,6 +18,9 @@ - #include - #include - #include -+#include -+#include -+#include - - struct mempolicy; - struct anon_vma; -@@ -1173,6 +1176,93 @@ static inline int fixup_user_fault(struct task_struct *tsk, - } - #endif - -+/* -+ * Mainly for aufs which mmap(2) diffrent file and wants to print different path -+ * in /proc/PID/maps. -+ */ -+/* #define AUFS_DEBUG_MMAP */ -+static inline void aufs_trace(struct file *f, struct file *pr, -+ const char func[], int line, const char func2[]) -+{ -+#ifdef AUFS_DEBUG_MMAP -+ if (pr) -+ pr_info("%s:%d: %s, %p\n", func, line, func2, -+ f ? (char *)f->f_dentry->d_name.name : "(null)"); -+#endif -+} -+ -+static inline struct file *vmr_do_pr_or_file(struct vm_region *region, -+ const char func[], int line) -+{ -+ struct file *f = region->vm_file, *pr = region->vm_prfile; -+ -+ aufs_trace(f, pr, func, line, __func__); -+ return (f && pr) ? pr : f; -+} -+ -+static inline void vmr_do_fput(struct vm_region *region, -+ const char func[], int line) -+{ -+ struct file *f = region->vm_file, *pr = region->vm_prfile; -+ -+ aufs_trace(f, pr, func, line, __func__); -+ fput(f); -+ if (f && pr) -+ fput(pr); -+} -+ -+static inline void vma_do_file_update_time(struct vm_area_struct *vma, -+ const char func[], int line) -+{ -+ struct file *f = vma->vm_file, *pr = vma->vm_prfile; -+ -+ aufs_trace(f, pr, func, line, __func__); -+ file_update_time(f); -+ if (f && pr) -+ file_update_time(pr); -+} -+ -+static inline struct file *vma_do_pr_or_file(struct vm_area_struct *vma, -+ const char func[], int line) -+{ -+ struct file *f = vma->vm_file, *pr = vma->vm_prfile; -+ -+ aufs_trace(f, pr, func, line, __func__); -+ return (f && pr) ? pr : f; -+} -+ -+static inline void vma_do_get_file(struct vm_area_struct *vma, -+ const char func[], int line) -+{ -+ struct file *f = vma->vm_file, *pr = vma->vm_prfile; -+ -+ aufs_trace(f, pr, func, line, __func__); -+ get_file(f); -+ if (f && pr) -+ get_file(pr); -+} -+ -+static inline void vma_do_fput(struct vm_area_struct *vma, -+ const char func[], int line) -+{ -+ struct file *f = vma->vm_file, *pr = vma->vm_prfile; -+ -+ aufs_trace(f, pr, func, line, __func__); -+ fput(f); -+ if (f && pr) -+ fput(pr); -+} -+ -+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \ -+ __LINE__) -+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__) -+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \ -+ __LINE__) -+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \ -+ __LINE__) -+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__) -+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__) -+ - extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); - extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write); -diff --git include/linux/mm_types.h include/linux/mm_types.h -index 8967e20..a57b589 100644 ---- include/linux/mm_types.h -+++ include/linux/mm_types.h -@@ -232,6 +232,7 @@ struct vm_region { - unsigned long vm_top; /* region allocated to here */ - unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ - struct file *vm_file; /* the backing file or NULL */ -+ struct file *vm_prfile; /* the virtual backing file or NULL */ - - int vm_usage; /* region usage count (access under nommu_region_sem) */ - bool vm_icache_flushed : 1; /* true if the icache has been flushed for -@@ -300,6 +301,7 @@ struct vm_area_struct { - unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE - units, *not* PAGE_CACHE_SIZE */ - struct file * vm_file; /* File we map to (can be NULL). */ -+ struct file *vm_prfile; /* shadow of vm_file */ - void * vm_private_data; /* was vm_pte (shared mem) */ - - #ifndef CONFIG_MMU -diff --git include/linux/splice.h include/linux/splice.h -index 0e43906..304169e 100644 ---- include/linux/splice.h -+++ include/linux/splice.h -@@ -93,4 +93,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *); - extern void spd_release_page(struct splice_pipe_desc *, unsigned int); - - extern const struct pipe_buf_operations page_cache_pipe_buf_ops; -+ -+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out, -+ loff_t *ppos, size_t len, unsigned int flags); -+extern long do_splice_to(struct file *in, loff_t *ppos, -+ struct pipe_inode_info *pipe, size_t len, -+ unsigned int flags); - #endif -diff --git include/uapi/linux/Kbuild include/uapi/linux/Kbuild -index 6929571..351ca48 100644 ---- include/uapi/linux/Kbuild -+++ include/uapi/linux/Kbuild -@@ -56,6 +56,7 @@ header-y += atmppp.h - header-y += atmsap.h - header-y += atmsvc.h - header-y += audit.h -+header-y += aufs_type.h - header-y += auto_fs.h - header-y += auto_fs4.h - header-y += auxvec.h -diff --git include/uapi/linux/aufs_type.h include/uapi/linux/aufs_type.h -new file mode 100644 -index 0000000..19c29f4 ---- /dev/null -+++ include/uapi/linux/aufs_type.h -@@ -0,0 +1,271 @@ -+/* -+ * Copyright (C) 2005-2014 Junjiro R. Okajima -+ */ -+ -+#ifndef __AUFS_TYPE_H__ -+#define __AUFS_TYPE_H__ -+ -+#define AUFS_NAME "aufs" -+ -+#ifdef __KERNEL__ -+/* -+ * define it before including all other headers. -+ * sched.h may use pr_* macros before defining "current", so define the -+ * no-current version first, and re-define later. -+ */ -+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__ -+#include -+#undef pr_fmt -+#define pr_fmt(fmt) \ -+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \ -+ (int)sizeof(current->comm), current->comm, current->pid -+#else -+#include -+#include -+#endif /* __KERNEL__ */ -+ -+#include -+ -+#define AUFS_VERSION "3.15" -+ -+/* todo? move this to linux-2.6.19/include/magic.h */ -+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's') -+ -+/* ---------------------------------------------------------------------- */ -+ -+#ifdef CONFIG_AUFS_BRANCH_MAX_127 -+typedef int8_t aufs_bindex_t; -+#define AUFS_BRANCH_MAX 127 -+#else -+typedef int16_t aufs_bindex_t; -+#ifdef CONFIG_AUFS_BRANCH_MAX_511 -+#define AUFS_BRANCH_MAX 511 -+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023) -+#define AUFS_BRANCH_MAX 1023 -+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767) -+#define AUFS_BRANCH_MAX 32767 -+#endif -+#endif -+ -+#ifdef __KERNEL__ -+#ifndef AUFS_BRANCH_MAX -+#error unknown CONFIG_AUFS_BRANCH_MAX value -+#endif -+#endif /* __KERNEL__ */ -+ -+/* ---------------------------------------------------------------------- */ -+ -+#define AUFS_FSTYPE AUFS_NAME -+ -+#define AUFS_ROOT_INO 2 -+#define AUFS_FIRST_INO 11 -+ -+#define AUFS_WH_PFX ".wh." -+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1) -+#define AUFS_WH_TMP_LEN 4 -+/* a limit for rmdir/rename a dir and copyup */ -+#define AUFS_MAX_NAMELEN (NAME_MAX \ -+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\ -+ - 1 /* dot */\ -+ - AUFS_WH_TMP_LEN) /* hex */ -+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino" -+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME -+#define AUFS_XINO_DEF_SEC 30 /* seconds */ -+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */ -+#define AUFS_DIRWH_DEF 3 -+#define AUFS_RDCACHE_DEF 10 /* seconds */ -+#define AUFS_RDCACHE_MAX 3600 /* seconds */ -+#define AUFS_RDBLK_DEF 512 /* bytes */ -+#define AUFS_RDHASH_DEF 32 -+#define AUFS_WKQ_NAME AUFS_NAME "d" -+#define AUFS_MFS_DEF_SEC 30 /* seconds */ -+#define AUFS_MFS_MAX_SEC 3600 /* seconds */ -+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */ -+ -+/* pseudo-link maintenace under /proc */ -+#define AUFS_PLINK_MAINT_NAME "plink_maint" -+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME -+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME -+ -+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */ -+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME -+ -+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME -+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk" -+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph" -+ -+/* doubly whiteouted */ -+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME -+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME -+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME -+ -+/* branch permissions and attributes */ -+#define AUFS_BRPERM_RW "rw" -+#define AUFS_BRPERM_RO "ro" -+#define AUFS_BRPERM_RR "rr" -+#define AUFS_BRATTR_COO_REG "coo_reg" -+#define AUFS_BRATTR_COO_ALL "coo_all" -+#define AUFS_BRATTR_UNPIN "unpin" -+#define AUFS_BRRATTR_WH "wh" -+#define AUFS_BRWATTR_NLWH "nolwh" -+#define AUFS_BRWATTR_MOO "moo" -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* ioctl */ -+enum { -+ /* readdir in userspace */ -+ AuCtl_RDU, -+ AuCtl_RDU_INO, -+ -+ /* pathconf wrapper */ -+ AuCtl_WBR_FD, -+ -+ /* busy inode */ -+ AuCtl_IBUSY, -+ -+ /* move-down */ -+ AuCtl_MVDOWN -+}; -+ -+/* borrowed from linux/include/linux/kernel.h */ -+#ifndef ALIGN -+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) -+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) -+#endif -+ -+/* borrowed from linux/include/linux/compiler-gcc3.h */ -+#ifndef __aligned -+#define __aligned(x) __attribute__((aligned(x))) -+#endif -+ -+#ifdef __KERNEL__ -+#ifndef __packed -+#define __packed __attribute__((packed)) -+#endif -+#endif -+ -+struct au_rdu_cookie { -+ uint64_t h_pos; -+ int16_t bindex; -+ uint8_t flags; -+ uint8_t pad; -+ uint32_t generation; -+} __aligned(8); -+ -+struct au_rdu_ent { -+ uint64_t ino; -+ int16_t bindex; -+ uint8_t type; -+ uint8_t nlen; -+ uint8_t wh; -+ char name[0]; -+} __aligned(8); -+ -+static inline int au_rdu_len(int nlen) -+{ -+ /* include the terminating NULL */ -+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1, -+ sizeof(uint64_t)); -+} -+ -+union au_rdu_ent_ul { -+ struct au_rdu_ent __user *e; -+ uint64_t ul; -+}; -+ -+enum { -+ AufsCtlRduV_SZ, -+ AufsCtlRduV_End -+}; -+ -+struct aufs_rdu { -+ /* input */ -+ union { -+ uint64_t sz; /* AuCtl_RDU */ -+ uint64_t nent; /* AuCtl_RDU_INO */ -+ }; -+ union au_rdu_ent_ul ent; -+ uint16_t verify[AufsCtlRduV_End]; -+ -+ /* input/output */ -+ uint32_t blk; -+ -+ /* output */ -+ union au_rdu_ent_ul tail; -+ /* number of entries which were added in a single call */ -+ uint64_t rent; -+ uint8_t full; -+ uint8_t shwh; -+ -+ struct au_rdu_cookie cookie; -+} __aligned(8); -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct aufs_wbr_fd { -+ uint32_t oflags; -+ int16_t brid; -+} __aligned(8); -+ -+/* ---------------------------------------------------------------------- */ -+ -+struct aufs_ibusy { -+ uint64_t ino, h_ino; -+ int16_t bindex; -+} __aligned(8); -+ -+/* ---------------------------------------------------------------------- */ -+ -+/* error code for move-down */ -+/* the actual message strings are implemented in aufs-util.git */ -+enum { -+ EAU_MVDOWN_OPAQUE = 1, -+ EAU_MVDOWN_WHITEOUT, -+ EAU_MVDOWN_UPPER, -+ EAU_MVDOWN_BOTTOM, -+ EAU_MVDOWN_NOUPPER, -+ EAU_MVDOWN_NOLOWERBR, -+ EAU_Last -+}; -+ -+/* flags for move-down */ -+#define AUFS_MVDOWN_DMSG 1 -+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */ -+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */ -+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */ -+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */ -+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */ -+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */ -+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */ -+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */ -+/* will be added more */ -+ -+enum { -+ AUFS_MVDOWN_UPPER, -+ AUFS_MVDOWN_LOWER, -+ AUFS_MVDOWN_NARRAY -+}; -+ -+struct aufs_mvdown { -+ uint32_t flags; -+ struct { -+ int16_t bindex; -+ int16_t brid; -+ } a[AUFS_MVDOWN_NARRAY]; -+ int8_t au_errno; -+ /* will be added more */ -+} __aligned(8); -+ -+/* ---------------------------------------------------------------------- */ -+ -+#define AuCtlType 'A' -+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu) -+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu) -+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \ -+ struct aufs_wbr_fd) -+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy) -+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \ -+ struct aufs_mvdown) -+ -+#endif /* __AUFS_TYPE_H__ */ -diff --git kernel/fork.c kernel/fork.c -index 54a8d26..dcf08b6 100644 ---- kernel/fork.c -+++ kernel/fork.c -@@ -416,7 +416,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) - struct inode *inode = file_inode(file); - struct address_space *mapping = file->f_mapping; - -- get_file(file); -+ vma_get_file(tmp); - if (tmp->vm_flags & VM_DENYWRITE) - atomic_dec(&inode->i_writecount); - mutex_lock(&mapping->i_mmap_mutex); -diff --git mm/filemap.c mm/filemap.c -index 088358c..138a88c 100644 ---- mm/filemap.c -+++ mm/filemap.c -@@ -2091,7 +2091,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) - int ret = VM_FAULT_LOCKED; - - sb_start_pagefault(inode->i_sb); -- file_update_time(vma->vm_file); -+ vma_file_update_time(vma); - lock_page(page); - if (page->mapping != inode->i_mapping) { - unlock_page(page); -diff --git mm/fremap.c mm/fremap.c -index 34feba6..5397350 100644 ---- mm/fremap.c -+++ mm/fremap.c -@@ -223,16 +223,28 @@ get_write_lock: - */ - if (mapping_cap_account_dirty(mapping)) { - unsigned long addr; -- struct file *file = get_file(vma->vm_file); -+ struct file *file = vma->vm_file, -+ *prfile = vma->vm_prfile; -+ - /* mmap_region may free vma; grab the info now */ - vm_flags = vma->vm_flags; - -+ vma_get_file(vma); - addr = mmap_region(file, start, size, vm_flags, pgoff); -- fput(file); -+ vma_fput(vma); - if (IS_ERR_VALUE(addr)) { - err = addr; - } else { - BUG_ON(addr != start); -+ if (prfile) { -+ struct vm_area_struct *new_vma; -+ -+ new_vma = find_vma(mm, addr); -+ if (!new_vma->vm_prfile) -+ new_vma->vm_prfile = prfile; -+ if (new_vma != vma) -+ get_file(prfile); -+ } - err = 0; - } - goto out_freed; -diff --git mm/madvise.c mm/madvise.c -index a402f8f..134e15d 100644 ---- mm/madvise.c -+++ mm/madvise.c -@@ -327,12 +327,12 @@ static long madvise_remove(struct vm_area_struct *vma, - * vma's reference to the file) can go away as soon as we drop - * mmap_sem. - */ -- get_file(f); -+ vma_get_file(vma); - up_read(¤t->mm->mmap_sem); - error = do_fallocate(f, - FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - offset, end - start); -- fput(f); -+ vma_fput(vma); - down_read(¤t->mm->mmap_sem); - return error; - } -diff --git mm/memory.c mm/memory.c -index 037b812..6e7d241 100644 ---- mm/memory.c -+++ mm/memory.c -@@ -2805,7 +2805,7 @@ reuse: - set_page_dirty_balance(dirty_page); - /* file_update_time outside page_lock */ - if (vma->vm_file) -- file_update_time(vma->vm_file); -+ vma_file_update_time(vma); - } - put_page(dirty_page); - if (page_mkwrite) { -diff --git mm/mmap.c mm/mmap.c -index b1202cf..40dd067 100644 ---- mm/mmap.c -+++ mm/mmap.c -@@ -250,7 +250,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); - if (vma->vm_file) -- fput(vma->vm_file); -+ vma_fput(vma); - mpol_put(vma_policy(vma)); - kmem_cache_free(vm_area_cachep, vma); - return next; -@@ -861,7 +861,7 @@ again: remove_next = 1 + (end > next->vm_end); - if (remove_next) { - if (file) { - uprobe_munmap(next, next->vm_start, next->vm_end); -- fput(file); -+ vma_fput(vma); - } - if (next->anon_vma) - anon_vma_merge(vma, next); -@@ -1641,8 +1641,8 @@ out: - unmap_and_free_vma: - if (vm_flags & VM_DENYWRITE) - allow_write_access(file); -+ vma_fput(vma); - vma->vm_file = NULL; -- fput(file); - - /* Undo any partial mapping done by a device driver. */ - unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); -@@ -2432,7 +2432,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, - goto out_free_mpol; - - if (new->vm_file) -- get_file(new->vm_file); -+ vma_get_file(new); - - if (new->vm_ops && new->vm_ops->open) - new->vm_ops->open(new); -@@ -2451,7 +2451,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, - if (new->vm_ops && new->vm_ops->close) - new->vm_ops->close(new); - if (new->vm_file) -- fput(new->vm_file); -+ vma_fput(new); - unlink_anon_vmas(new); - out_free_mpol: - mpol_put(vma_policy(new)); -@@ -2840,7 +2840,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, - if (anon_vma_clone(new_vma, vma)) - goto out_free_mempol; - if (new_vma->vm_file) -- get_file(new_vma->vm_file); -+ vma_get_file(new_vma); - if (new_vma->vm_ops && new_vma->vm_ops->open) - new_vma->vm_ops->open(new_vma); - vma_link(mm, new_vma, prev, rb_link, rb_parent); -diff --git mm/msync.c mm/msync.c -index 632df45..02d770e 100644 ---- mm/msync.c -+++ mm/msync.c -@@ -80,10 +80,10 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) - start = vma->vm_end; - if ((flags & MS_SYNC) && file && - (vma->vm_flags & VM_SHARED)) { -- get_file(file); -+ vma_get_file(vma); - up_read(&mm->mmap_sem); - error = vfs_fsync(file, 0); -- fput(file); -+ vma_fput(vma); - if (error || start >= end) - goto out; - down_read(&mm->mmap_sem); -diff --git mm/nommu.c mm/nommu.c -index 85f8d66..9f471fa 100644 ---- mm/nommu.c -+++ mm/nommu.c -@@ -655,7 +655,7 @@ static void __put_nommu_region(struct vm_region *region) - up_write(&nommu_region_sem); - - if (region->vm_file) -- fput(region->vm_file); -+ vmr_fput(region); - - /* IO memory and memory shared directly out of the pagecache - * from ramfs/tmpfs mustn't be released here */ -@@ -820,7 +820,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); - if (vma->vm_file) -- fput(vma->vm_file); -+ vma_fput(vma); - put_nommu_region(vma->vm_region); - kmem_cache_free(vm_area_cachep, vma); - } -@@ -1382,7 +1382,7 @@ unsigned long do_mmap_pgoff(struct file *file, - goto error_just_free; - } - } -- fput(region->vm_file); -+ vmr_fput(region); - kmem_cache_free(vm_region_jar, region); - region = pregion; - result = start; -@@ -1458,10 +1458,10 @@ error_just_free: - up_write(&nommu_region_sem); - error: - if (region->vm_file) -- fput(region->vm_file); -+ vmr_fput(region); - kmem_cache_free(vm_region_jar, region); - if (vma->vm_file) -- fput(vma->vm_file); -+ vma_fput(vma); - kmem_cache_free(vm_area_cachep, vma); - kleave(" = %d", ret); - return ret; diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.16/0_config-i686 b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.16/0_config-i686 index 2b17b1e6d..3caa2bca0 100644 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.16/0_config-i686 +++ b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.16/0_config-i686 @@ -4570,8 +4570,8 @@ CONFIG_SQUASHFS=m # CONFIG_SQUASHFS_FILE_CACHE is not set CONFIG_SQUASHFS_FILE_DIRECT=y # CONFIG_SQUASHFS_DECOMP_SINGLE is not set -# CONFIG_SQUASHFS_DECOMP_MULTI is not set -CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y +CONFIG_SQUASHFS_DECOMP_MULTI=y +# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set # CONFIG_SQUASHFS_XATTR is not set CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZO=y diff --git a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.16/0_config-x86_64 b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.16/0_config-x86_64 index 06991febb..8600bbd5d 100644 --- a/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.16/0_config-x86_64 +++ b/profiles/templates/3.1/6_ac_install_patch/sys-kernel/calculate-sources/3.16/0_config-x86_64 @@ -4426,8 +4426,8 @@ CONFIG_SQUASHFS=m # CONFIG_SQUASHFS_FILE_CACHE is not set CONFIG_SQUASHFS_FILE_DIRECT=y # CONFIG_SQUASHFS_DECOMP_SINGLE is not set -# CONFIG_SQUASHFS_DECOMP_MULTI is not set -CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y +CONFIG_SQUASHFS_DECOMP_MULTI=y +# CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set # CONFIG_SQUASHFS_XATTR is not set CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZO=y