From 026046070802788de8244de8bde6ae5da2ba3639 Mon Sep 17 00:00:00 2001 From: Evgenii Shatokhin Date: Sun, 23 Oct 2016 22:01:06 +0300 Subject: [PATCH] Updated to version 4.8.4, the first take Besides the upstream kernel update to 4.8.x, the following changes were made: * BFQ was updated to v8r4 * AUFS was updated to version 4.8-20161010 --- .abf.yml | 4 +- ...onfig-build-bits-for-BFQ-v7r11-4.8.0.patch | 10 +- ...the-BFQ-v7r11-I-O-sched-to-be-ported.patch | 179 +- ...rly-Queue-Merge-EQM-to-BFQ-v7r11-to-.patch | 96 +- ...rn-BFQ-v7r11-into-BFQ-v8r4-for-4.8.0.patch | 2376 ++++++++++++----- README.BFQ | 1136 ++++---- ...repare-scripts-configs-in-devel-rpms.patch | 2 +- fs-aufs4.patch | 433 +-- kernel-i586.config | 108 +- kernel-nrj-desktop-i586.config | 5 + kernel-nrj-desktop-x86_64.config | 5 + kernel-nrj-laptop-i586.config | 3 + kernel-nrj-laptop-x86_64.config | 3 + kernel-x86_64.config | 113 +- kernel.spec | 12 +- linux-4.7.tar.sign | 11 - linux-4.8.tar.sign | 10 + patch-4.7.9.sign | 16 - patch-4.8.4.sign | 16 + sanitize-memory.patch | 51 +- 20 files changed, 2931 insertions(+), 1658 deletions(-) rename 0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.0.patch => 0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.8.0.patch (96%) rename 0002-block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.0.patch => 0002-block-introduce-the-BFQ-v7r11-I-O-sched-to-be-ported.patch (98%) rename 0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for.patch => 0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-to-.patch (92%) rename 0004-block-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for.patch => 0004-Turn-BFQ-v7r11-into-BFQ-v8r4-for-4.8.0.patch (77%) delete mode 100644 linux-4.7.tar.sign create mode 100644 linux-4.8.tar.sign delete mode 100644 patch-4.7.9.sign create mode 100644 patch-4.8.4.sign diff --git a/.abf.yml b/.abf.yml index 31e0399..4b8bd21 100644 --- a/.abf.yml +++ b/.abf.yml @@ -1,3 +1,3 @@ sources: - linux-4.7.tar.xz: 99551524779bf05382e363f4879101227664dd55 - patch-4.7.9.xz: e2a77f26009c92cd961d9b0aa938c48eea7fcb94 + linux-4.8.tar.xz: e375f93600a7b96191498af39e5a2416b6666e59 + patch-4.8.4.xz: 16fad35e375cee19b0efbde13643b7e4ab6cf6e6 diff --git a/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.0.patch b/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.8.0.patch similarity index 96% rename from 0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.0.patch rename to 0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.8.0.patch index ff75a8b..35cd1ce 100644 --- a/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.0.patch +++ b/0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.8.0.patch @@ -1,7 +1,7 @@ -From 22ee35ec82fa543b65c1b6d516a086a21f723846 Mon Sep 17 00:00:00 2001 +From f2ebe596e7d72e96e0fb2be87be90f0b96e6f1b3 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 7 Apr 2015 13:39:12 +0200 -Subject: [PATCH 1/4] block: cgroups, kconfig, build bits for BFQ-v7r11-4.7.0 +Subject: [PATCH 1/4] block: cgroups, kconfig, build bits for BFQ-v7r11-4.8.0 Update Kconfig.iosched and do the related Makefile changes to include kernel configuration options for BFQ. Also increase the number of @@ -86,7 +86,7 @@ index 9eda232..4a36683 100644 obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index 3d9cf32..8d862a0 100644 +index e79055c..931ff1e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -45,7 +45,7 @@ struct pr_ops; @@ -96,8 +96,8 @@ index 3d9cf32..8d862a0 100644 -#define BLKCG_MAX_POLS 2 +#define BLKCG_MAX_POLS 3 - struct request; typedef void (rq_end_io_fn)(struct request *, int); + -- -1.9.1 +2.7.4 (Apple Git-66) diff --git a/0002-block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.0.patch b/0002-block-introduce-the-BFQ-v7r11-I-O-sched-to-be-ported.patch similarity index 98% rename from 0002-block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.0.patch rename to 0002-block-introduce-the-BFQ-v7r11-I-O-sched-to-be-ported.patch index 368a4ff..7cc8ce1 100644 --- a/0002-block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.0.patch +++ b/0002-block-introduce-the-BFQ-v7r11-I-O-sched-to-be-ported.patch @@ -1,7 +1,8 @@ -From 2aae32be2a18a7d0da104ae42c08cb9bce9d9c7c Mon Sep 17 00:00:00 2001 +From d9af6fcc4167cbb8433b10bbf3663c8297487f52 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Thu, 9 May 2013 19:10:02 +0200 -Subject: [PATCH 2/4] block: introduce the BFQ-v7r11 I/O sched for 4.7.0 +Subject: [PATCH 2/4] block: introduce the BFQ-v7r11 I/O sched, to be ported to + 4.8.0 The general structure is borrowed from CFQ, as much of the code for handling I/O contexts. Over time, several useful features have been @@ -56,12 +57,12 @@ Signed-off-by: Paolo Valente Signed-off-by: Arianna Avanzini --- block/Kconfig.iosched | 6 +- - block/bfq-cgroup.c | 1182 ++++++++++++++++ + block/bfq-cgroup.c | 1186 ++++++++++++++++ block/bfq-ioc.c | 36 + - block/bfq-iosched.c | 3754 +++++++++++++++++++++++++++++++++++++++++++++++++ - block/bfq-sched.c | 1200 ++++++++++++++++ + block/bfq-iosched.c | 3763 +++++++++++++++++++++++++++++++++++++++++++++++++ + block/bfq-sched.c | 1199 ++++++++++++++++ block/bfq.h | 801 +++++++++++ - 6 files changed, 6975 insertions(+), 4 deletions(-) + 6 files changed, 6987 insertions(+), 4 deletions(-) create mode 100644 block/bfq-cgroup.c create mode 100644 block/bfq-ioc.c create mode 100644 block/bfq-iosched.c @@ -91,10 +92,10 @@ index 0ee5f0f..f78cd1a 100644 prompt "Default I/O scheduler" diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c new file mode 100644 -index 0000000..8610cd6 +index 0000000..8b08a57 --- /dev/null +++ b/block/bfq-cgroup.c -@@ -0,0 +1,1182 @@ +@@ -0,0 +1,1186 @@ +/* + * BFQ: CGROUPS support. + * @@ -259,7 +260,9 @@ index 0000000..8610cd6 +static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) +{ + struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq); ++ + BUG_ON(!pd); ++ + return pd_to_bfqg(pd); +} + @@ -379,7 +382,8 @@ index 0000000..8610cd6 + blkg_stat_add_aux(&from->time, &from->time); + blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time); + blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); -+ blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples); ++ blkg_stat_add_aux(&to->avg_queue_size_samples, ++ &from->avg_queue_size_samples); + blkg_stat_add_aux(&to->dequeue, &from->dequeue); + blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time); + blkg_stat_add_aux(&to->idle_time, &from->idle_time); @@ -471,9 +475,9 @@ index 0000000..8610cd6 +} + +static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd) -+ { ++{ + return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL; -+ } ++} + +static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) +{ @@ -562,8 +566,8 @@ index 0000000..8610cd6 +} + +/* to be used by recursive prfill, sums live and dead rwstats recursively */ -+static struct blkg_rwstat bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, -+ int off) ++static struct blkg_rwstat ++bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, int off) +{ + struct blkg_rwstat a, b; + @@ -776,7 +780,6 @@ index 0000000..8610cd6 + + BUG_ON(!bfqq); + bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); -+ return; +} + +/** @@ -804,8 +807,6 @@ index 0000000..8610cd6 + if (bfqg->sched_data.in_service_entity) + bfq_reparent_leaf_entity(bfqd, + bfqg->sched_data.in_service_entity); -+ -+ return; +} + +/** @@ -930,6 +931,7 @@ index 0000000..8610cd6 + bfqgd->weight = (unsigned short)val; + hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { + struct bfq_group *bfqg = blkg_to_bfqg(blkg); ++ + if (!bfqg) + continue; + /* @@ -1043,7 +1045,8 @@ index 0000000..8610cd6 + return 0; +} + -+static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) ++static struct bfq_group * ++bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) +{ + int ret; + @@ -1051,22 +1054,22 @@ index 0000000..8610cd6 + if (ret) + return NULL; + -+ return blkg_to_bfqg(bfqd->queue->root_blkg); ++ return blkg_to_bfqg(bfqd->queue->root_blkg); +} + +static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) +{ -+ struct bfq_group_data *bgd; ++ struct bfq_group_data *bgd; + -+ bgd = kzalloc(sizeof(*bgd), GFP_KERNEL); -+ if (!bgd) -+ return NULL; -+ return &bgd->pd; ++ bgd = kzalloc(sizeof(*bgd), GFP_KERNEL); ++ if (!bgd) ++ return NULL; ++ return &bgd->pd; +} + +static void bfq_cpd_free(struct blkcg_policy_data *cpd) +{ -+ kfree(cpd_to_bfqgd(cpd)); ++ kfree(cpd_to_bfqgd(cpd)); +} + +static struct cftype bfqio_files_dfl[] = { @@ -1201,20 +1204,19 @@ index 0000000..8610cd6 +}; + +static struct blkcg_policy blkcg_policy_bfq = { -+ .dfl_cftypes = bfqio_files_dfl, -+ .legacy_cftypes = bfqio_files, ++ .dfl_cftypes = bfqio_files_dfl, ++ .legacy_cftypes = bfqio_files, + -+ .pd_alloc_fn = bfq_pd_alloc, -+ .pd_init_fn = bfq_pd_init, -+ .pd_offline_fn = bfq_pd_offline, -+ .pd_free_fn = bfq_pd_free, -+ .pd_reset_stats_fn = bfq_pd_reset_stats, -+ -+ .cpd_alloc_fn = bfq_cpd_alloc, -+ .cpd_init_fn = bfq_cpd_init, -+ .cpd_bind_fn = bfq_cpd_init, -+ .cpd_free_fn = bfq_cpd_free, ++ .pd_alloc_fn = bfq_pd_alloc, ++ .pd_init_fn = bfq_pd_init, ++ .pd_offline_fn = bfq_pd_offline, ++ .pd_free_fn = bfq_pd_free, ++ .pd_reset_stats_fn = bfq_pd_reset_stats, + ++ .cpd_alloc_fn = bfq_cpd_alloc, ++ .cpd_init_fn = bfq_cpd_init, ++ .cpd_bind_fn = bfq_cpd_init, ++ .cpd_free_fn = bfq_cpd_free, +}; + +#else @@ -1223,6 +1225,7 @@ index 0000000..8610cd6 + struct bfq_group *bfqg) +{ + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + if (bfqq) { @@ -1236,6 +1239,7 @@ index 0000000..8610cd6 +bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) +{ + struct bfq_data *bfqd = bic_to_bfqd(bic); ++ + return bfqd->root_group; +} + @@ -1257,12 +1261,13 @@ index 0000000..8610cd6 +} + +static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, -+ struct blkcg *blkcg) ++ struct blkcg *blkcg) +{ + return bfqd->root_group; +} + -+static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) ++static struct bfq_group * ++bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) +{ + struct bfq_group *bfqg; + int i; @@ -1321,10 +1326,10 @@ index 0000000..fb7bb8f +} diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c new file mode 100644 -index 0000000..f9787a6 +index 0000000..85e2169 --- /dev/null +++ b/block/bfq-iosched.c -@@ -0,0 +1,3754 @@ +@@ -0,0 +1,3763 @@ +/* + * Budget Fair Queueing (BFQ) disk scheduler. + * @@ -1542,7 +1547,7 @@ index 0000000..f9787a6 + unsigned long back_max; +#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */ +#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */ -+ unsigned wrap = 0; /* bit mask: requests behind the disk head? */ ++ unsigned int wrap = 0; /* bit mask: requests behind the disk head? */ + + if (!rq1 || rq1 == rq2) + return rq2; @@ -1597,12 +1602,11 @@ index 0000000..f9787a6 + return rq1; + else if (d2 < d1) + return rq2; -+ else { -+ if (s1 >= s2) -+ return rq1; -+ else -+ return rq2; -+ } ++ ++ if (s1 >= s2) ++ return rq1; ++ else ++ return rq2; + + case BFQ_RQ2_WRAP: + return rq1; @@ -1889,7 +1893,7 @@ index 0000000..f9787a6 + */ + hlist_for_each_entry(bfqq_item, &bfqd->burst_list, + burst_list_node) -+ bfq_mark_bfqq_in_large_burst(bfqq_item); ++ bfq_mark_bfqq_in_large_burst(bfqq_item); + bfq_mark_bfqq_in_large_burst(bfqq); + + /* @@ -2288,7 +2292,7 @@ index 0000000..f9787a6 + bfqd->rq_in_driver++; + bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); + bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", -+ (long long unsigned)bfqd->last_position); ++ (unsigned long long) bfqd->last_position); +} + +static void bfq_deactivate_request(struct request_queue *q, struct request *rq) @@ -2595,6 +2599,7 @@ index 0000000..f9787a6 +{ + struct bfq_queue *bfqq = bfqd->in_service_queue; + unsigned int timeout_coeff; ++ + if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) + timeout_coeff = 1; + else @@ -2667,6 +2672,7 @@ index 0000000..f9787a6 +static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; ++ + return entity->budget - entity->service; +} + @@ -2906,6 +2912,7 @@ index 0000000..f9787a6 + if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && + update) { + int dev_type = blk_queue_nonrot(bfqd->queue); ++ + if (bfqd->bfq_user_max_budget == 0) { + bfqd->bfq_max_budget = + bfq_calc_max_budget(bfqd->peak_rate, @@ -3065,6 +3072,7 @@ index 0000000..f9787a6 + enum bfqq_expiration reason) +{ + bool slow; ++ + BUG_ON(bfqq != bfqd->in_service_queue); + + /* @@ -3098,7 +3106,7 @@ index 0000000..f9787a6 + } + + if (reason == BFQ_BFQQ_TOO_IDLE && -+ bfqq->entity.service <= 2 * bfqq->entity.budget / 10 ) ++ bfqq->entity.service <= 2 * bfqq->entity.budget / 10) + bfq_clear_bfqq_IO_bound(bfqq); + + if (bfqd->low_latency && bfqq->wr_coeff == 1) @@ -3244,7 +3252,7 @@ index 0000000..f9787a6 + */ + idling_boosts_thr = !bfqd->hw_tag || + (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) && -+ bfq_bfqq_idle_window(bfqq)) ; ++ bfq_bfqq_idle_window(bfqq)); + + /* + * The value of the next variable, @@ -3356,7 +3364,7 @@ index 0000000..f9787a6 + * (i) each of these processes must get the same throughput as + * the others; + * (ii) all these processes have the same I/O pattern -+ (either sequential or random). ++ * (either sequential or random). + * In fact, in such a scenario, the drive will tend to treat + * the requests of each of these processes in about the same + * way as the requests of the others, and thus to provide @@ -3553,6 +3561,7 @@ index 0000000..f9787a6 +static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; ++ + if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ + bfq_log_bfqq(bfqd, bfqq, + "raising period dur %u/%u msec, old coeff %u, w %d(%d)", @@ -3643,7 +3652,7 @@ index 0000000..f9787a6 + bfq_log_bfqq(bfqd, bfqq, + "dispatched %u sec req (%llu), budg left %d", + blk_rq_sectors(rq), -+ (long long unsigned)blk_rq_pos(rq), ++ (unsigned long long) blk_rq_pos(rq), + bfq_bfqq_budget_left(bfqq)); + + dispatched++; @@ -3841,7 +3850,8 @@ index 0000000..f9787a6 + * Update the entity prio values; note that the new values will not + * be used until the next (re)activation. + */ -+static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ++static void ++bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) +{ + struct task_struct *tsk = current; + int ioprio_class; @@ -3874,8 +3884,8 @@ index 0000000..f9787a6 + } + + if (bfqq->new_ioprio < 0 || bfqq->new_ioprio >= IOPRIO_BE_NR) { -+ printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n", -+ bfqq->new_ioprio); ++ pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n", ++ bfqq->new_ioprio); + BUG(); + } + @@ -3999,7 +4009,7 @@ index 0000000..f9787a6 + + if (bfqq) { + bfq_init_bfqq(bfqd, bfqq, bic, current->pid, -+ is_sync); ++ is_sync); + bfq_init_entity(&bfqq->entity, bfqg); + bfq_log_bfqq(bfqd, bfqq, "allocated"); + } else { @@ -4187,7 +4197,7 @@ index 0000000..f9787a6 + bfq_log_bfqq(bfqd, bfqq, + "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", + bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), -+ (long long unsigned)bfqq->seek_mean); ++ (unsigned long long) bfqq->seek_mean); + + bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); + @@ -4738,8 +4748,7 @@ index 0000000..f9787a6 + +static void bfq_slab_kill(void) +{ -+ if (bfq_pool) -+ kmem_cache_destroy(bfq_pool); ++ kmem_cache_destroy(bfq_pool); +} + +static int __init bfq_slab_setup(void) @@ -4770,6 +4779,7 @@ index 0000000..f9787a6 +static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page) +{ + struct bfq_data *bfqd = e->elevator_data; ++ + return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ? + jiffies_to_msecs(bfqd->bfq_wr_max_time) : + jiffies_to_msecs(bfq_wr_duration(bfqd))); @@ -4788,25 +4798,29 @@ index 0000000..f9787a6 + + num_char += sprintf(page + num_char, "Active:\n"); + list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) { -+ num_char += sprintf(page + num_char, -+ "pid%d: weight %hu, nr_queued %d %d, dur %d/%u\n", -+ bfqq->pid, -+ bfqq->entity.weight, -+ bfqq->queued[0], -+ bfqq->queued[1], -+ jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); ++ num_char += sprintf(page + num_char, ++ "pid%d: weight %hu, nr_queued %d %d, ", ++ bfqq->pid, ++ bfqq->entity.weight, ++ bfqq->queued[0], ++ bfqq->queued[1]); ++ num_char += sprintf(page + num_char, ++ "dur %d/%u\n", ++ jiffies_to_msecs( ++ jiffies - ++ bfqq->last_wr_start_finish), ++ jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + + num_char += sprintf(page + num_char, "Idle:\n"); + list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) { -+ num_char += sprintf(page + num_char, -+ "pid%d: weight %hu, dur %d/%u\n", -+ bfqq->pid, -+ bfqq->entity.weight, -+ jiffies_to_msecs(jiffies - -+ bfqq->last_wr_start_finish), -+ jiffies_to_msecs(bfqq->wr_cur_max_time)); ++ num_char += sprintf(page + num_char, ++ "pid%d: weight %hu, dur %d/%u\n", ++ bfqq->pid, ++ bfqq->entity.weight, ++ jiffies_to_msecs(jiffies - ++ bfqq->last_wr_start_finish), ++ jiffies_to_msecs(bfqq->wr_cur_max_time)); + } + + spin_unlock_irq(bfqd->queue->queue_lock); @@ -5081,10 +5095,10 @@ index 0000000..f9787a6 +MODULE_LICENSE("GPL"); diff --git a/block/bfq-sched.c b/block/bfq-sched.c new file mode 100644 -index 0000000..a64fec1 +index 0000000..a5ed694 --- /dev/null +++ b/block/bfq-sched.c -@@ -0,0 +1,1200 @@ +@@ -0,0 +1,1199 @@ +/* + * BFQ: Hierarchical B-WF2Q+ scheduler. + * @@ -5715,8 +5729,7 @@ index 0000000..a64fec1 + if (entity->new_weight != entity->orig_weight) { + if (entity->new_weight < BFQ_MIN_WEIGHT || + entity->new_weight > BFQ_MAX_WEIGHT) { -+ printk(KERN_CRIT "update_weight_prio: " -+ "new_weight %d\n", ++ pr_crit("update_weight_prio: new_weight %d\n", + entity->new_weight); + BUG(); + } @@ -6287,7 +6300,7 @@ index 0000000..a64fec1 +} diff --git a/block/bfq.h b/block/bfq.h new file mode 100644 -index 0000000..485d0c9 +index 0000000..2bf54ae --- /dev/null +++ b/block/bfq.h @@ -0,0 +1,801 @@ @@ -6722,10 +6735,10 @@ index 0000000..485d0c9 + * @last_ins_in_burst. + * @burst_size: number of queues in the current burst of queue activations. + * @bfq_large_burst_thresh: maximum burst size above which the current -+ * queue-activation burst is deemed as 'large'. ++ * queue-activation burst is deemed as 'large'. + * @large_burst: true if a large queue-activation burst is in progress. + * @burst_list: head of the burst list (as for the above fields, more details -+ * in the comments to the function bfq_handle_burst). ++ * in the comments to the function bfq_handle_burst). + * @low_latency: if set to true, low-latency heuristics are enabled. + * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised + * queue is multiplied. @@ -7093,5 +7106,5 @@ index 0000000..485d0c9 + +#endif /* _BFQ_H */ -- -1.9.1 +2.7.4 (Apple Git-66) diff --git a/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for.patch b/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-to-.patch similarity index 92% rename from 0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for.patch rename to 0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-to-.patch index a9876aa..2a53175 100644 --- a/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for.patch +++ b/0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-to-.patch @@ -1,8 +1,8 @@ -From 47de1e46ef5f462e9694e5b0607aec6ad658f1e0 Mon Sep 17 00:00:00 2001 +From 409e62551360d2802992b0175062237352793a2a Mon Sep 17 00:00:00 2001 From: Mauro Andreolini Date: Sun, 6 Sep 2015 16:09:05 +0200 -Subject: [PATCH 3/4] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r11 for - 4.7.0 +Subject: [PATCH 3/4] block, bfq: add Early Queue Merge (EQM) to BFQ-v7r11, to + port to 4.8.0 A set of processes may happen to perform interleaved reads, i.e.,requests whose union would give rise to a sequential read pattern. There are two @@ -35,16 +35,16 @@ Signed-off-by: Arianna Avanzini Signed-off-by: Paolo Valente Signed-off-by: Linus Walleij --- - block/bfq-cgroup.c | 4 + - block/bfq-iosched.c | 687 ++++++++++++++++++++++++++++++++++++++++++++++++++-- + block/bfq-cgroup.c | 5 + + block/bfq-iosched.c | 685 +++++++++++++++++++++++++++++++++++++++++++++++++++- block/bfq.h | 66 +++++ - 3 files changed, 743 insertions(+), 14 deletions(-) + 3 files changed, 743 insertions(+), 13 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c -index 8610cd6..5ee99ec 100644 +index 8b08a57..0367996 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c -@@ -437,6 +437,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) +@@ -440,6 +440,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; @@ -52,16 +52,17 @@ index 8610cd6..5ee99ec 100644 } static void bfq_pd_free(struct blkg_policy_data *pd) -@@ -530,6 +531,8 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, +@@ -533,6 +534,9 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, return bfqg; } -+static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); ++static void bfq_pos_tree_add_move(struct bfq_data *bfqd, ++ struct bfq_queue *bfqq); + /** * bfq_bfqq_move - migrate @bfqq to @bfqg. * @bfqd: queue descriptor. -@@ -577,6 +580,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -580,6 +584,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfqg_get(bfqg); if (busy) { @@ -70,10 +71,10 @@ index 8610cd6..5ee99ec 100644 bfq_activate_bfqq(bfqd, bfqq); } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index f9787a6..d1f648d 100644 +index 85e2169..cf3e9b1 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c -@@ -296,6 +296,72 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, +@@ -295,6 +295,72 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd, } } @@ -112,7 +113,7 @@ index f9787a6..d1f648d 100644 + *rb_link = p; + + bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d", -+ (long long unsigned)sector, ++ (unsigned long long) sector, + bfqq ? bfqq->pid : 0); + + return bfqq; @@ -146,11 +147,11 @@ index f9787a6..d1f648d 100644 /* * Tell whether there are active queues or groups with differentiated weights. */ -@@ -528,6 +594,57 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) +@@ -527,6 +593,57 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) return dur; } -+static unsigned bfq_bfqq_cooperations(struct bfq_queue *bfqq) ++static unsigned int bfq_bfqq_cooperations(struct bfq_queue *bfqq) +{ + return bfqq->bic ? bfqq->bic->cooperations : 0; +} @@ -204,7 +205,7 @@ index f9787a6..d1f648d 100644 /* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) { -@@ -764,8 +881,14 @@ static void bfq_add_request(struct request *rq) +@@ -763,8 +880,14 @@ static void bfq_add_request(struct request *rq) BUG_ON(!next_rq); bfqq->next_rq = next_rq; @@ -220,7 +221,7 @@ index f9787a6..d1f648d 100644 idle_for_long_time = time_is_before_jiffies( bfqq->budget_timeout + bfqd->bfq_wr_min_idle_time); -@@ -793,11 +916,12 @@ static void bfq_add_request(struct request *rq) +@@ -792,11 +915,12 @@ static void bfq_add_request(struct request *rq) bfqd->last_ins_in_burst = jiffies; } @@ -236,7 +237,7 @@ index f9787a6..d1f648d 100644 entity->budget = max_t(unsigned long, bfqq->max_budget, bfq_serv_to_charge(next_rq, bfqq)); -@@ -816,6 +940,9 @@ static void bfq_add_request(struct request *rq) +@@ -815,6 +939,9 @@ static void bfq_add_request(struct request *rq) if (!bfqd->low_latency) goto add_bfqq_busy; @@ -246,7 +247,7 @@ index f9787a6..d1f648d 100644 /* * If the queue: * - is not being boosted, -@@ -840,7 +967,7 @@ static void bfq_add_request(struct request *rq) +@@ -839,7 +966,7 @@ static void bfq_add_request(struct request *rq) } else if (old_wr_coeff > 1) { if (interactive) bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); @@ -255,7 +256,7 @@ index f9787a6..d1f648d 100644 (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && !soft_rt)) { -@@ -905,6 +1032,7 @@ static void bfq_add_request(struct request *rq) +@@ -904,6 +1031,7 @@ static void bfq_add_request(struct request *rq) bfqd->bfq_wr_rt_max_time; } } @@ -263,7 +264,7 @@ index f9787a6..d1f648d 100644 if (old_wr_coeff != bfqq->wr_coeff) entity->prio_changed = 1; add_bfqq_busy: -@@ -1047,6 +1175,15 @@ static void bfq_merged_request(struct request_queue *q, struct request *req, +@@ -1046,6 +1174,15 @@ static void bfq_merged_request(struct request_queue *q, struct request *req, bfqd->last_position); BUG_ON(!next_rq); bfqq->next_rq = next_rq; @@ -279,7 +280,7 @@ index f9787a6..d1f648d 100644 } } -@@ -1129,11 +1266,346 @@ static void bfq_end_wr(struct bfq_data *bfqd) +@@ -1128,11 +1265,346 @@ static void bfq_end_wr(struct bfq_data *bfqd) spin_unlock_irq(bfqd->queue->queue_lock); } @@ -572,7 +573,7 @@ index f9787a6..d1f648d 100644 + struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +{ + bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", -+ (long unsigned)new_bfqq->pid); ++ (unsigned long) new_bfqq->pid); + /* Save weight raising and idle window of the merged queues */ + bfq_bfqq_save_state(bfqq); + bfq_bfqq_save_state(new_bfqq); @@ -626,7 +627,7 @@ index f9787a6..d1f648d 100644 /* * Disallow merge of a sync bio into an async request. -@@ -1150,7 +1622,26 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, +@@ -1149,7 +1621,26 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, if (!bic) return 0; @@ -654,7 +655,7 @@ index f9787a6..d1f648d 100644 } static void __bfq_set_in_service_queue(struct bfq_data *bfqd, -@@ -1349,6 +1840,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -1350,6 +1841,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) __bfq_bfqd_reset_in_service(bfqd); @@ -670,7 +671,7 @@ index f9787a6..d1f648d 100644 if (RB_EMPTY_ROOT(&bfqq->sort_list)) { /* * Overloading budget_timeout field to store the time -@@ -1357,8 +1857,13 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -1358,8 +1858,13 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) */ bfqq->budget_timeout = jiffies; bfq_del_bfqq_busy(bfqd, bfqq, 1); @@ -685,7 +686,7 @@ index f9787a6..d1f648d 100644 } /** -@@ -2242,10 +2747,12 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -2246,10 +2751,12 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) /* * If the queue was activated in a burst, or * too much time has elapsed from the beginning @@ -700,7 +701,7 @@ index f9787a6..d1f648d 100644 time_is_before_jiffies(bfqq->last_wr_start_finish + bfqq->wr_cur_max_time)) { bfqq->last_wr_start_finish = jiffies; -@@ -2474,6 +2981,25 @@ static void bfq_put_queue(struct bfq_queue *bfqq) +@@ -2478,6 +2985,25 @@ static void bfq_put_queue(struct bfq_queue *bfqq) #endif } @@ -726,7 +727,7 @@ index f9787a6..d1f648d 100644 static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) { if (bfqq == bfqd->in_service_queue) { -@@ -2484,6 +3010,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -2488,6 +3014,8 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_log_bfqq(bfqd, bfqq, "exit_bfqq: %p, %d", bfqq, atomic_read(&bfqq->ref)); @@ -735,7 +736,7 @@ index f9787a6..d1f648d 100644 bfq_put_queue(bfqq); } -@@ -2492,6 +3020,25 @@ static void bfq_init_icq(struct io_cq *icq) +@@ -2496,6 +3024,25 @@ static void bfq_init_icq(struct io_cq *icq) struct bfq_io_cq *bic = icq_to_bic(icq); bic->ttime.last_end_request = jiffies; @@ -761,7 +762,7 @@ index f9787a6..d1f648d 100644 } static void bfq_exit_icq(struct io_cq *icq) -@@ -2505,6 +3052,13 @@ static void bfq_exit_icq(struct io_cq *icq) +@@ -2509,6 +3056,13 @@ static void bfq_exit_icq(struct io_cq *icq) } if (bic->bfqq[BLK_RW_SYNC]) { @@ -775,7 +776,7 @@ index f9787a6..d1f648d 100644 bfq_exit_bfqq(bfqd, bic->bfqq[BLK_RW_SYNC]); bic->bfqq[BLK_RW_SYNC] = NULL; } -@@ -2809,6 +3363,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, +@@ -2814,6 +3368,10 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, if (!bfq_bfqq_sync(bfqq) || bfq_class_idle(bfqq)) return; @@ -786,7 +787,7 @@ index f9787a6..d1f648d 100644 enable_idle = bfq_bfqq_idle_window(bfqq); if (atomic_read(&bic->icq.ioc->active_ref) == 0 || -@@ -2856,6 +3414,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -2861,6 +3419,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfqq->entity.service > bfq_max_budget(bfqd) / 8 || !BFQQ_SEEKY(bfqq)) bfq_update_idle_window(bfqd, bfqq, bic); @@ -794,7 +795,7 @@ index f9787a6..d1f648d 100644 bfq_log_bfqq(bfqd, bfqq, "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", -@@ -2920,12 +3479,47 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -2925,12 +3484,47 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, static void bfq_insert_request(struct request_queue *q, struct request *rq) { struct bfq_data *bfqd = q->elevator->elevator_data; @@ -843,7 +844,7 @@ index f9787a6..d1f648d 100644 rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; list_add_tail(&rq->queuelist, &bfqq->fifo); -@@ -3094,6 +3688,32 @@ static void bfq_put_request(struct request *rq) +@@ -3099,6 +3693,32 @@ static void bfq_put_request(struct request *rq) } /* @@ -876,7 +877,7 @@ index f9787a6..d1f648d 100644 * Allocate bfq data structures associated with this request. */ static int bfq_set_request(struct request_queue *q, struct request *rq, -@@ -3105,6 +3725,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, +@@ -3110,6 +3730,7 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, const int is_sync = rq_is_sync(rq); struct bfq_queue *bfqq; unsigned long flags; @@ -884,7 +885,7 @@ index f9787a6..d1f648d 100644 might_sleep_if(gfpflags_allow_blocking(gfp_mask)); -@@ -3117,15 +3738,30 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, +@@ -3122,15 +3743,30 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, bfq_bic_update_cgroup(bic, bio); @@ -900,12 +901,11 @@ index f9787a6..d1f648d 100644 + bic->saved_in_large_burst) bfq_mark_bfqq_in_large_burst(bfqq); - else -- bfq_clear_bfqq_in_large_burst(bfqq); + else { -+ bfq_clear_bfqq_in_large_burst(bfqq); -+ if (bic->was_in_burst_list) -+ hlist_add_head(&bfqq->burst_list_node, -+ &bfqd->burst_list); + bfq_clear_bfqq_in_large_burst(bfqq); ++ if (bic->was_in_burst_list) ++ hlist_add_head(&bfqq->burst_list_node, ++ &bfqd->burst_list); + } + } + } else { @@ -919,7 +919,7 @@ index f9787a6..d1f648d 100644 } } -@@ -3137,6 +3773,26 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, +@@ -3142,6 +3778,26 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, rq->elv.priv[0] = bic; rq->elv.priv[1] = bfqq; @@ -946,7 +946,7 @@ index f9787a6..d1f648d 100644 spin_unlock_irqrestore(q->queue_lock, flags); return 0; -@@ -3290,6 +3946,7 @@ static void bfq_init_root_group(struct bfq_group *root_group, +@@ -3295,6 +3951,7 @@ static void bfq_init_root_group(struct bfq_group *root_group, root_group->my_entity = NULL; root_group->bfqd = bfqd; #endif @@ -954,7 +954,7 @@ index f9787a6..d1f648d 100644 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; } -@@ -3370,6 +4027,8 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) +@@ -3375,6 +4032,8 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->bfq_timeout[BLK_RW_ASYNC] = bfq_timeout_async; bfqd->bfq_timeout[BLK_RW_SYNC] = bfq_timeout_sync; @@ -964,7 +964,7 @@ index f9787a6..d1f648d 100644 bfqd->bfq_large_burst_thresh = 11; diff --git a/block/bfq.h b/block/bfq.h -index 485d0c9..f73c942 100644 +index 2bf54ae..fcce855 100644 --- a/block/bfq.h +++ b/block/bfq.h @@ -183,6 +183,8 @@ struct bfq_group; @@ -1097,5 +1097,5 @@ index 485d0c9..f73c942 100644 static void bfq_put_queue(struct bfq_queue *bfqq); static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); -- -1.9.1 +2.7.4 (Apple Git-66) diff --git a/0004-block-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for.patch b/0004-Turn-BFQ-v7r11-into-BFQ-v8r4-for-4.8.0.patch similarity index 77% rename from 0004-block-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for.patch rename to 0004-Turn-BFQ-v7r11-into-BFQ-v8r4-for-4.8.0.patch index bf56ac7..62cdd1a 100644 --- a/0004-block-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for.patch +++ b/0004-Turn-BFQ-v7r11-into-BFQ-v8r4-for-4.8.0.patch @@ -1,16 +1,16 @@ -From d384ccf796a992e27691b7359ce54534db57e74c Mon Sep 17 00:00:00 2001 +From ec8981e245dfe24bc6a80207e832ca9be18fd39d Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 17 May 2016 08:28:04 +0200 -Subject: [PATCH 4/4] block, bfq: turn BFQ-v7r11 for 4.7.0 into BFQ-v8r3 for - 4.7.0 +Subject: [PATCH 4/4] Turn BFQ-v7r11 into BFQ-v8r4 for 4.8.0 +Signed-off-by: Paolo Valente --- block/Kconfig.iosched | 2 +- - block/bfq-cgroup.c | 480 +++++---- - block/bfq-iosched.c | 2602 +++++++++++++++++++++++++++++-------------------- - block/bfq-sched.c | 441 +++++++-- - block/bfq.h | 708 +++++++------- - 5 files changed, 2484 insertions(+), 1749 deletions(-) + block/bfq-cgroup.c | 495 ++++---- + block/bfq-iosched.c | 3230 +++++++++++++++++++++++++++++++------------------ + block/bfq-sched.c | 480 ++++++-- + block/bfq.h | 747 ++++++------ + 5 files changed, 3073 insertions(+), 1881 deletions(-) diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index f78cd1a..6d92579 100644 @@ -26,33 +26,85 @@ index f78cd1a..6d92579 100644 ---help--- Enable hierarchical scheduling in BFQ, using the blkio controller. diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c -index 5ee99ec..c83d90c 100644 +index 0367996..b50ae8e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c -@@ -162,7 +162,6 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg) - static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) +@@ -7,7 +7,9 @@ + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * +- * Copyright (C) 2010 Paolo Valente ++ * Copyright (C) 2015 Paolo Valente ++ * ++ * Copyright (C) 2016 Paolo Valente + * + * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ + * file. +@@ -163,8 +165,6 @@ static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) { struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq); + - BUG_ON(!pd); +- return pd_to_bfqg(pd); } -@@ -224,14 +223,6 @@ static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw) - blkg_rwstat_add(&bfqg->stats.merged, rw, 1); +@@ -208,59 +208,49 @@ static void bfqg_put(struct bfq_group *bfqg) + + static void bfqg_stats_update_io_add(struct bfq_group *bfqg, + struct bfq_queue *bfqq, +- int rw) ++ int op, int op_flags) + { +- blkg_rwstat_add(&bfqg->stats.queued, rw, 1); ++ blkg_rwstat_add(&bfqg->stats.queued, op, op_flags, 1); + bfqg_stats_end_empty_time(&bfqg->stats); + if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) + bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); + } + +-static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw) +-{ +- blkg_rwstat_add(&bfqg->stats.queued, rw, -1); +-} +- +-static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw) ++static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int op, ++ int op_flags) + { +- blkg_rwstat_add(&bfqg->stats.merged, rw, 1); ++ blkg_rwstat_add(&bfqg->stats.queued, op, op_flags, -1); } -static void bfqg_stats_update_dispatch(struct bfq_group *bfqg, - uint64_t bytes, int rw) --{ ++static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int op, ++ int op_flags) + { - blkg_stat_add(&bfqg->stats.sectors, bytes >> 9); - blkg_rwstat_add(&bfqg->stats.serviced, rw, 1); - blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes); --} -- ++ blkg_rwstat_add(&bfqg->stats.merged, op, op_flags, 1); + } + static void bfqg_stats_update_completion(struct bfq_group *bfqg, - uint64_t start_time, uint64_t io_start_time, int rw) +- uint64_t start_time, uint64_t io_start_time, int rw) ++ uint64_t start_time, uint64_t io_start_time, int op, ++ int op_flags) { -@@ -248,17 +239,11 @@ static void bfqg_stats_update_completion(struct bfq_group *bfqg, + struct bfqg_stats *stats = &bfqg->stats; + unsigned long long now = sched_clock(); + + if (time_after64(now, io_start_time)) +- blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); ++ blkg_rwstat_add(&stats->service_time, op, op_flags, ++ now - io_start_time); + if (time_after64(io_start_time, start_time)) +- blkg_rwstat_add(&stats->wait_time, rw, ++ blkg_rwstat_add(&stats->wait_time, op, op_flags, + io_start_time - start_time); + } + /* @stats = 0 */ static void bfqg_stats_reset(struct bfqg_stats *stats) { @@ -70,7 +122,7 @@ index 5ee99ec..c83d90c 100644 blkg_stat_reset(&stats->avg_queue_size_sum); blkg_stat_reset(&stats->avg_queue_size_samples); blkg_stat_reset(&stats->dequeue); -@@ -268,21 +253,19 @@ static void bfqg_stats_reset(struct bfqg_stats *stats) +@@ -270,19 +260,16 @@ static void bfqg_stats_reset(struct bfqg_stats *stats) } /* @to += @from */ @@ -89,13 +141,9 @@ index 5ee99ec..c83d90c 100644 blkg_stat_add_aux(&from->time, &from->time); - blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time); blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); -- blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples); -+ blkg_stat_add_aux(&to->avg_queue_size_samples, -+ &from->avg_queue_size_samples); - blkg_stat_add_aux(&to->dequeue, &from->dequeue); - blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time); - blkg_stat_add_aux(&to->idle_time, &from->idle_time); -@@ -308,10 +291,8 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) + blkg_stat_add_aux(&to->avg_queue_size_samples, + &from->avg_queue_size_samples); +@@ -311,10 +298,8 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) if (unlikely(!parent)) return; @@ -107,7 +155,7 @@ index 5ee99ec..c83d90c 100644 } static void bfq_init_entity(struct bfq_entity *entity, -@@ -332,15 +313,11 @@ static void bfq_init_entity(struct bfq_entity *entity, +@@ -335,15 +320,11 @@ static void bfq_init_entity(struct bfq_entity *entity, static void bfqg_stats_exit(struct bfqg_stats *stats) { @@ -123,7 +171,7 @@ index 5ee99ec..c83d90c 100644 blkg_stat_exit(&stats->avg_queue_size_sum); blkg_stat_exit(&stats->avg_queue_size_samples); blkg_stat_exit(&stats->dequeue); -@@ -351,15 +328,11 @@ static void bfqg_stats_exit(struct bfqg_stats *stats) +@@ -354,15 +335,11 @@ static void bfqg_stats_exit(struct bfqg_stats *stats) static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) { @@ -140,18 +188,7 @@ index 5ee99ec..c83d90c 100644 blkg_stat_init(&stats->avg_queue_size_sum, gfp) || blkg_stat_init(&stats->avg_queue_size_samples, gfp) || blkg_stat_init(&stats->dequeue, gfp) || -@@ -374,20 +347,36 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) - } - - static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd) -- { -+{ - return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL; -- } -+} - - static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) - { +@@ -386,11 +363,27 @@ static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq)); } @@ -180,7 +217,7 @@ index 5ee99ec..c83d90c 100644 } static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) -@@ -398,8 +387,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) +@@ -401,8 +394,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) if (!bfqg) return NULL; @@ -190,7 +227,7 @@ index 5ee99ec..c83d90c 100644 kfree(bfqg); return NULL; } -@@ -407,27 +395,20 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) +@@ -410,27 +402,20 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node) return &bfqg->pd; } @@ -228,7 +265,7 @@ index 5ee99ec..c83d90c 100644 entity->orig_weight = entity->weight = entity->new_weight = d->weight; entity->my_sched_data = &bfqg->sched_data; -@@ -445,70 +426,53 @@ static void bfq_pd_free(struct blkg_policy_data *pd) +@@ -448,70 +433,53 @@ static void bfq_pd_free(struct blkg_policy_data *pd) struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_exit(&bfqg->stats); @@ -256,23 +293,23 @@ index 5ee99ec..c83d90c 100644 } -/* to be used by recursive prfill, sums live and dead rwstats recursively */ --static struct blkg_rwstat bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, -- int off) +-static struct blkg_rwstat +-bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, int off) +static void bfq_group_set_parent(struct bfq_group *bfqg, + struct bfq_group *parent) { - struct blkg_rwstat a, b; + struct bfq_entity *entity; -+ -+ BUG_ON(!parent); -+ BUG_ON(!bfqg); -+ BUG_ON(bfqg == parent); - a = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off); - b = blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, - off + dead_stats_off_delta); - blkg_rwstat_add_aux(&a, &b); - return a; ++ BUG_ON(!parent); ++ BUG_ON(!bfqg); ++ BUG_ON(bfqg == parent); ++ + entity = &bfqg->entity; + entity->parent = parent->my_entity; + entity->sched_data = &parent->sched_data; @@ -326,19 +363,15 @@ index 5ee99ec..c83d90c 100644 /* * Update chain of bfq_groups as we might be handling a leaf group -@@ -531,13 +495,18 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, - return bfqg; - } +@@ -537,11 +505,15 @@ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, + static void bfq_pos_tree_add_move(struct bfq_data *bfqd, + struct bfq_queue *bfqq); --static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); -+static void bfq_pos_tree_add_move(struct bfq_data *bfqd, -+ struct bfq_queue *bfqq); -+ +static void bfq_bfqq_expire(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + bool compensate, + enum bfqq_expiration reason); - ++ /** * bfq_bfqq_move - migrate @bfqq to @bfqg. * @bfqd: queue descriptor. @@ -347,7 +380,7 @@ index 5ee99ec..c83d90c 100644 * @bfqg: the group to move to. * * Move @bfqq to @bfqg, deactivating it from its old group and reactivating -@@ -548,26 +517,40 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -552,26 +524,40 @@ static void bfq_pos_tree_add_move(struct bfq_data *bfqd, * rcu_read_lock()). */ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, @@ -368,7 +401,9 @@ index 5ee99ec..c83d90c 100644 + && entity->on_st && bfqq != bfqd->in_service_queue); + BUG_ON(!bfq_bfqq_busy(bfqq) && bfqq == bfqd->in_service_queue); -+ + +- if (busy) { +- BUG_ON(atomic_read(&bfqq->ref) < 2); + /* If bfqq is empty, then bfq_bfqq_expire also invokes + * bfq_del_bfqq_busy, thereby removing bfqq and its entity + * from data structures related to current group. Otherwise we @@ -382,9 +417,7 @@ index 5ee99ec..c83d90c 100644 + BUG_ON(entity->on_st && !bfq_bfqq_busy(bfqq) + && &bfq_entity_service_tree(entity)->idle != + entity->tree); - -- if (busy) { -- BUG_ON(atomic_read(&bfqq->ref) < 2); ++ + BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_busy(bfqq)); - if (!resume) @@ -402,7 +435,7 @@ index 5ee99ec..c83d90c 100644 bfqg_put(bfqq_group(bfqq)); /* -@@ -579,14 +562,17 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -583,14 +569,17 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, entity->sched_data = &bfqg->sched_data; bfqg_get(bfqg); @@ -423,16 +456,20 @@ index 5ee99ec..c83d90c 100644 } /** -@@ -613,7 +599,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, +@@ -617,7 +606,11 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, lockdep_assert_held(bfqd->queue->queue_lock); - bfqg = bfq_find_alloc_group(bfqd, blkcg); + bfqg = bfq_find_set_group(bfqd, blkcg); ++ ++ if (unlikely(!bfqg)) ++ bfqg = bfqd->root_group; ++ if (async_bfqq) { entity = &async_bfqq->entity; -@@ -621,7 +607,8 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, +@@ -625,7 +618,8 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, bic_set_bfqq(bic, NULL, 0); bfq_log_bfqq(bfqd, async_bfqq, "bic_change_group: %p %d", @@ -442,7 +479,7 @@ index 5ee99ec..c83d90c 100644 bfq_put_queue(async_bfqq); } } -@@ -629,7 +616,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, +@@ -633,7 +627,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, if (sync_bfqq) { entity = &sync_bfqq->entity; if (entity->sched_data != &bfqg->sched_data) @@ -451,7 +488,7 @@ index 5ee99ec..c83d90c 100644 } return bfqg; -@@ -638,25 +625,23 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, +@@ -642,25 +636,23 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); @@ -485,22 +522,16 @@ index 5ee99ec..c83d90c 100644 } /** -@@ -682,8 +667,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, +@@ -686,7 +678,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); BUG_ON(!bfqq); - bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); -- return; + bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); } /** -@@ -711,16 +695,15 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd, - if (bfqg->sched_data.in_service_entity) - bfq_reparent_leaf_entity(bfqd, - bfqg->sched_data.in_service_entity); -- -- return; +@@ -717,11 +709,12 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd, } /** @@ -517,7 +548,7 @@ index 5ee99ec..c83d90c 100644 */ static void bfq_pd_offline(struct blkg_policy_data *pd) { -@@ -779,6 +762,12 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) +@@ -780,6 +773,12 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) bfq_put_async_queues(bfqd, bfqg); BUG_ON(entity->tree); @@ -530,7 +561,7 @@ index 5ee99ec..c83d90c 100644 bfqg_stats_xfer_dead(bfqg); } -@@ -788,46 +777,35 @@ static void bfq_end_wr_async(struct bfq_data *bfqd) +@@ -789,46 +788,35 @@ static void bfq_end_wr_async(struct bfq_data *bfqd) list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); @@ -588,15 +619,7 @@ index 5ee99ec..c83d90c 100644 if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT) return ret; -@@ -837,6 +815,7 @@ static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css, - bfqgd->weight = (unsigned short)val; - hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { - struct bfq_group *bfqg = blkg_to_bfqg(blkg); -+ - if (!bfqg) - continue; - /* -@@ -871,13 +850,18 @@ static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css, +@@ -873,13 +861,18 @@ static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css, return ret; } @@ -620,7 +643,7 @@ index 5ee99ec..c83d90c 100644 } static int bfqg_print_stat(struct seq_file *sf, void *v) -@@ -897,16 +881,17 @@ static int bfqg_print_rwstat(struct seq_file *sf, void *v) +@@ -899,16 +892,17 @@ static int bfqg_print_rwstat(struct seq_file *sf, void *v) static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { @@ -642,7 +665,7 @@ index 5ee99ec..c83d90c 100644 return __blkg_prfill_rwstat(sf, pd, &sum); } -@@ -926,6 +911,41 @@ static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) +@@ -928,6 +922,41 @@ static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) return 0; } @@ -684,37 +707,23 @@ index 5ee99ec..c83d90c 100644 static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, struct blkg_policy_data *pd, int off) { -@@ -950,7 +970,8 @@ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v) - return 0; - } - --static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) -+static struct bfq_group * -+bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) - { - int ret; - -@@ -958,41 +979,18 @@ static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int n - if (ret) - return NULL; - -- return blkg_to_bfqg(bfqd->queue->root_blkg); -+ return blkg_to_bfqg(bfqd->queue->root_blkg); +@@ -964,38 +993,15 @@ bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) + return blkg_to_bfqg(bfqd->queue->root_blkg); } -static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) -{ -- struct bfq_group_data *bgd; +- struct bfq_group_data *bgd; - -- bgd = kzalloc(sizeof(*bgd), GFP_KERNEL); -- if (!bgd) -- return NULL; -- return &bgd->pd; +- bgd = kzalloc(sizeof(*bgd), GFP_KERNEL); +- if (!bgd) +- return NULL; +- return &bgd->pd; -} - -static void bfq_cpd_free(struct blkcg_policy_data *cpd) -{ -- kfree(cpd_to_bfqgd(cpd)); +- kfree(cpd_to_bfqgd(cpd)); -} - -static struct cftype bfqio_files_dfl[] = { @@ -742,7 +751,7 @@ index 5ee99ec..c83d90c 100644 { .name = "bfq.time", .private = offsetof(struct bfq_group, stats.time), -@@ -1000,18 +998,17 @@ static struct cftype bfqio_files[] = { +@@ -1003,18 +1009,17 @@ static struct cftype bfqio_files[] = { }, { .name = "bfq.sectors", @@ -766,7 +775,7 @@ index 5ee99ec..c83d90c 100644 }, { .name = "bfq.io_service_time", -@@ -1042,18 +1039,17 @@ static struct cftype bfqio_files[] = { +@@ -1045,18 +1050,17 @@ static struct cftype bfqio_files[] = { }, { .name = "bfq.sectors_recursive", @@ -790,7 +799,7 @@ index 5ee99ec..c83d90c 100644 }, { .name = "bfq.io_service_time_recursive", -@@ -1099,32 +1095,35 @@ static struct cftype bfqio_files[] = { +@@ -1102,31 +1106,39 @@ static struct cftype bfqio_files[] = { .private = offsetof(struct bfq_group, stats.dequeue), .seq_show = bfqg_print_stat, }, @@ -803,20 +812,19 @@ index 5ee99ec..c83d90c 100644 }; -static struct blkcg_policy blkcg_policy_bfq = { -- .dfl_cftypes = bfqio_files_dfl, -- .legacy_cftypes = bfqio_files, +- .dfl_cftypes = bfqio_files_dfl, +- .legacy_cftypes = bfqio_files, - -- .pd_alloc_fn = bfq_pd_alloc, -- .pd_init_fn = bfq_pd_init, -- .pd_offline_fn = bfq_pd_offline, -- .pd_free_fn = bfq_pd_free, -- .pd_reset_stats_fn = bfq_pd_reset_stats, -- -- .cpd_alloc_fn = bfq_cpd_alloc, -- .cpd_init_fn = bfq_cpd_init, -- .cpd_bind_fn = bfq_cpd_init, -- .cpd_free_fn = bfq_cpd_free, +- .pd_alloc_fn = bfq_pd_alloc, +- .pd_init_fn = bfq_pd_init, +- .pd_offline_fn = bfq_pd_offline, +- .pd_free_fn = bfq_pd_free, +- .pd_reset_stats_fn = bfq_pd_reset_stats, - +- .cpd_alloc_fn = bfq_cpd_alloc, +- .cpd_init_fn = bfq_cpd_init, +- .cpd_bind_fn = bfq_cpd_init, +- .cpd_free_fn = bfq_cpd_free, +static struct cftype bfq_blkg_files[] = { + { + .name = "bfq.weight", @@ -831,13 +839,17 @@ index 5ee99ec..c83d90c 100644 +#else /* CONFIG_BFQ_GROUP_IOSCHED */ + +static inline void bfqg_stats_update_io_add(struct bfq_group *bfqg, -+ struct bfq_queue *bfqq, int rw) { } -+static inline void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw) { } -+static inline void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw) { } ++ struct bfq_queue *bfqq, int op, int op_flags) { } ++static inline void ++bfqg_stats_update_io_remove(struct bfq_group *bfqg, int op, int op_flags) { } ++static inline void ++bfqg_stats_update_io_merged(struct bfq_group *bfqg, int op, int op_flags) { } +static inline void bfqg_stats_update_completion(struct bfq_group *bfqg, -+ uint64_t start_time, uint64_t io_start_time, int rw) { } -+static inline void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, -+struct bfq_group *curr_bfqg) { } ++ uint64_t start_time, uint64_t io_start_time, int op, ++ int op_flags) { } ++static inline void ++bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, ++ struct bfq_group *curr_bfqg) { } +static inline void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { } +static inline void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } +static inline void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { } @@ -847,7 +859,7 @@ index 5ee99ec..c83d90c 100644 static void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) -@@ -1146,27 +1145,20 @@ bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) +@@ -1150,27 +1162,20 @@ bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) return bfqd->root_group; } @@ -865,31 +877,33 @@ index 5ee99ec..c83d90c 100644 -static void bfq_disconnect_groups(struct bfq_data *bfqd) +static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, -+ struct blkcg *blkcg) ++ struct blkcg *blkcg) { - bfq_put_async_queues(bfqd, bfqd->root_group); + return bfqd->root_group; } -static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, -- struct blkcg *blkcg) +- struct blkcg *blkcg) +static struct bfq_group *bfqq_group(struct bfq_queue *bfqq) { - return bfqd->root_group; + return bfqq->bfqd->root_group; } - static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) + static struct bfq_group * diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c -index d1f648d..3bc1f8b 100644 +index cf3e9b1..eef6ff4 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c -@@ -7,25 +7,26 @@ +@@ -7,25 +7,28 @@ * Copyright (C) 2008 Fabio Checconi * Paolo Valente * - * Copyright (C) 2010 Paolo Valente -+ * Copyright (C) 2016 Paolo Valente ++ * Copyright (C) 2015 Paolo Valente ++ * ++ * Copyright (C) 2016 Paolo Valente * * Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ * file. @@ -926,7 +940,28 @@ index d1f648d..3bc1f8b 100644 * * BFQ is described in [1], where also a reference to the initial, more * theoretical paper on BFQ can be found. The interested reader can find -@@ -87,7 +88,6 @@ static const int bfq_stats_min_budgets = 194; +@@ -70,8 +73,8 @@ + #include "bfq.h" + #include "blk.h" + +-/* Expiration time of sync (0) and async (1) requests, in jiffies. */ +-static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; ++/* Expiration time of sync (0) and async (1) requests, in ns. */ ++static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 }; + + /* Maximum backwards seek, in KiB. */ + static const int bfq_back_max = 16 * 1024; +@@ -79,15 +82,14 @@ static const int bfq_back_max = 16 * 1024; + /* Penalty of a backwards seek, in number of sectors. */ + static const int bfq_back_penalty = 2; + +-/* Idling period duration, in jiffies. */ +-static int bfq_slice_idle = HZ / 125; ++/* Idling period duration, in ns. */ ++static u32 bfq_slice_idle = NSEC_PER_SEC / 125; + + /* Minimum number of assigned budgets for which stats are safe to compute. */ + static const int bfq_stats_min_budgets = 194; /* Default maximum budget values, in sectors and number of requests. */ static const int bfq_default_max_budget = 16 * 1024; @@ -934,7 +969,7 @@ index d1f648d..3bc1f8b 100644 /* * Async to sync throughput distribution is controlled as follows: -@@ -97,8 +97,7 @@ static const int bfq_max_budget_async_rq = 4; +@@ -97,23 +99,27 @@ static const int bfq_max_budget_async_rq = 4; static const int bfq_async_charge_factor = 10; /* Default timeout values, in jiffies, approximating CFQ defaults. */ @@ -944,19 +979,33 @@ index d1f648d..3bc1f8b 100644 struct kmem_cache *bfq_pool; -@@ -109,8 +108,9 @@ struct kmem_cache *bfq_pool; +-/* Below this threshold (in ms), we consider thinktime immediate. */ +-#define BFQ_MIN_TT 2 ++/* Below this threshold (in ns), we consider thinktime immediate. */ ++#define BFQ_MIN_TT (2 * NSEC_PER_MSEC) + + /* hw_tag detection: parallel requests threshold and min samples needed. */ #define BFQ_HW_QUEUE_THRESHOLD 4 #define BFQ_HW_QUEUE_SAMPLES 32 -#define BFQQ_SEEK_THR (sector_t)(8 * 1024) -#define BFQQ_SEEKY(bfqq) ((bfqq)->seek_mean > BFQQ_SEEK_THR) -+#define BFQQ_SEEK_THR (sector_t)(8 * 100) ++#define BFQQ_SEEK_THR (sector_t)(8 * 100) +#define BFQQ_CLOSE_THR (sector_t)(8 * 1024) +#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8) - /* Min samples used for peak rate estimation (for autotuning). */ - #define BFQ_PEAK_RATE_SAMPLES 32 -@@ -141,16 +141,24 @@ struct kmem_cache *bfq_pool; +-/* Min samples used for peak rate estimation (for autotuning). */ +-#define BFQ_PEAK_RATE_SAMPLES 32 ++/* Min number of samples required to perform peak-rate update */ ++#define BFQ_RATE_MIN_SAMPLES 32 ++/* Min observation time interval required to perform a peak-rate update (ns) */ ++#define BFQ_RATE_MIN_INTERVAL 300*NSEC_PER_MSEC ++/* Target observation time interval for a peak-rate update (ns) */ ++#define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC + + /* Shift used for peak rate fixed precision calculations. */ + #define BFQ_RATE_SHIFT 16 +@@ -141,16 +147,24 @@ struct kmem_cache *bfq_pool; * The device's speed class is dynamically (re)detected in * bfq_update_peak_rate() every time the estimated peak rate is updated. * @@ -988,7 +1037,19 @@ index d1f648d..3bc1f8b 100644 /* * To improve readability, a conversion function is used to initialize the * following arrays, which entails that they can be initialized only in a -@@ -410,11 +418,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd) +@@ -183,10 +197,7 @@ static void bfq_schedule_dispatch(struct bfq_data *bfqd); + */ + static int bfq_bio_sync(struct bio *bio) + { +- if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC)) +- return 1; +- +- return 0; ++ return bio_data_dir(bio) == READ || (bio->bi_opf & REQ_SYNC); + } + + /* +@@ -409,11 +420,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd) */ static bool bfq_symmetric_scenario(struct bfq_data *bfqd) { @@ -1001,7 +1062,7 @@ index d1f648d..3bc1f8b 100644 } /* -@@ -534,9 +538,19 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd, +@@ -533,9 +540,19 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd, static unsigned long bfq_serv_to_charge(struct request *rq, struct bfq_queue *bfqq) { @@ -1024,7 +1085,7 @@ index d1f648d..3bc1f8b 100644 } /** -@@ -591,12 +605,23 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) +@@ -590,12 +607,23 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd) dur = bfqd->RT_prod; do_div(dur, bfqd->peak_rate); @@ -1046,14 +1107,14 @@ index d1f648d..3bc1f8b 100644 + else if (dur < msecs_to_jiffies(3000)) + dur = msecs_to_jiffies(3000); --static unsigned bfq_bfqq_cooperations(struct bfq_queue *bfqq) +-static unsigned int bfq_bfqq_cooperations(struct bfq_queue *bfqq) -{ - return bfqq->bic ? bfqq->bic->cooperations : 0; + return dur; } static void -@@ -606,31 +631,11 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) +@@ -605,31 +633,28 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) bfq_mark_bfqq_idle_window(bfqq); else bfq_clear_bfqq_idle_window(bfqq); @@ -1076,17 +1137,33 @@ index d1f648d..3bc1f8b 100644 - bfqq->wr_cur_max_time = bic->wr_time_left; - bfqq->last_wr_start_finish = jiffies; - bfqq->entity.prio_changed = 1; -- } ++ ++ bfqq->wr_coeff = bic->saved_wr_coeff; ++ bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; ++ BUG_ON(time_is_after_jiffies(bfqq->wr_start_at_switch_to_srt)); ++ bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; ++ BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); ++ ++ if (bfqq->wr_coeff > 1 && (bfq_bfqq_in_large_burst(bfqq) || ++ time_is_before_jiffies(bfqq->last_wr_start_finish + ++ bfqq->wr_cur_max_time))) { ++ bfq_log_bfqq(bfqq->bfqd, bfqq, ++ "resume state: switching off wr"); ++ ++ bfqq->wr_coeff = 1; + } - /* - * Clear wr_time_left to prevent bfq_bfqq_save_state() from - * getting confused about the queue's need of a weight-raising - * period. - */ - bic->wr_time_left = 0; ++ /* make sure weight will be updated, however we got here */ ++ bfqq->entity.prio_changed = 1; } static int bfqq_process_refs(struct bfq_queue *bfqq) -@@ -640,7 +645,7 @@ static int bfqq_process_refs(struct bfq_queue *bfqq) +@@ -639,7 +664,7 @@ static int bfqq_process_refs(struct bfq_queue *bfqq) lockdep_assert_held(bfqq->bfqd->queue->queue_lock); io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE]; @@ -1095,7 +1172,7 @@ index d1f648d..3bc1f8b 100644 BUG_ON(process_refs < 0); return process_refs; } -@@ -655,6 +660,7 @@ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -654,6 +679,7 @@ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq) hlist_del_init(&item->burst_list_node); hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list); bfqd->burst_size = 1; @@ -1103,7 +1180,7 @@ index d1f648d..3bc1f8b 100644 } /* Add bfqq to the list of queues in current burst (see bfq_handle_burst) */ -@@ -663,6 +669,10 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -662,6 +688,10 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) /* Increment burst size to take into account also bfqq */ bfqd->burst_size++; @@ -1114,7 +1191,7 @@ index d1f648d..3bc1f8b 100644 if (bfqd->burst_size == bfqd->bfq_large_burst_thresh) { struct bfq_queue *pos, *bfqq_item; struct hlist_node *n; -@@ -672,15 +682,19 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -671,15 +701,19 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * other to consider this burst as large. */ bfqd->large_burst = true; @@ -1127,7 +1204,7 @@ index d1f648d..3bc1f8b 100644 hlist_for_each_entry(bfqq_item, &bfqd->burst_list, - burst_list_node) + burst_list_node) { - bfq_mark_bfqq_in_large_burst(bfqq_item); + bfq_mark_bfqq_in_large_burst(bfqq_item); + bfq_log_bfqq(bfqd, bfqq_item, "marked in large burst"); + } bfq_mark_bfqq_in_large_burst(bfqq); @@ -1135,7 +1212,7 @@ index d1f648d..3bc1f8b 100644 /* * From now on, and until the current burst finishes, any -@@ -692,67 +706,79 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -691,67 +725,79 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) hlist_for_each_entry_safe(pos, n, &bfqd->burst_list, burst_list_node) hlist_del_init(&pos->burst_list_node); @@ -1249,7 +1326,8 @@ index d1f648d..3bc1f8b 100644 + * enjoy weight raising as expected. Fortunately these false positives + * are very rare. They typically occur if some service happens to + * start doing I/O exactly when the interactive task starts. -+ * + * +- * . when the very first queue is activated, the queue is inserted into the + * Turning back to the next function, it implements all the steps + * needed to detect the occurrence of a large burst and to properly + * mark all the queues belonging to it (so that they can then be @@ -1258,13 +1336,12 @@ index d1f648d..3bc1f8b 100644 + * burst in progress. The list is then used to mark these queues as + * belonging to a large burst if the burst does become large. The main + * steps are the following. - * -- * . when the very first queue is activated, the queue is inserted into the ++ * + * . when the very first queue is created, the queue is inserted into the * list (as it could be the first queue in a possible burst) * * . if the current burst has not yet become large, and a queue Q that does -@@ -773,13 +799,13 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -772,13 +818,13 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * * . the device enters a large-burst mode * @@ -1280,7 +1357,7 @@ index d1f648d..3bc1f8b 100644 * later, i.e., not shortly after, than the last time at which a queue * either entered the burst list or was marked as belonging to the * current large burst, then the current burst is deemed as finished and: -@@ -792,52 +818,44 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -791,52 +837,44 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq) * in a possible new burst (then the burst list contains just Q * after this step). */ @@ -1357,7 +1434,7 @@ index d1f648d..3bc1f8b 100644 } /* -@@ -846,8 +864,9 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -845,8 +883,9 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, * bfqq as belonging to this large burst immediately. */ if (bfqd->large_burst) { @@ -1368,7 +1445,7 @@ index d1f648d..3bc1f8b 100644 } /* -@@ -856,25 +875,498 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -855,25 +894,491 @@ static void bfq_handle_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq, * queue. Then we add bfqq to the burst. */ bfq_add_to_burst(bfqd, bfqq); @@ -1388,6 +1465,7 @@ index d1f648d..3bc1f8b 100644 +static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) +{ + struct bfq_entity *entity = &bfqq->entity; ++ + return entity->budget - entity->service; +} + @@ -1550,6 +1628,7 @@ index d1f648d..3bc1f8b 100644 + * operation, is reset only when bfqq is selected for + * service (see bfq_get_next_queue). + */ ++ BUG_ON(bfqq->max_budget < 0); + entity->budget = min_t(unsigned long, + bfq_bfqq_budget_left(bfqq), + bfqq->max_budget); @@ -1558,8 +1637,9 @@ index d1f648d..3bc1f8b 100644 + return true; + } + ++ BUG_ON(bfqq->max_budget < 0); + entity->budget = max_t(unsigned long, bfqq->max_budget, -+ bfq_serv_to_charge(bfqq->next_rq,bfqq)); ++ bfq_serv_to_charge(bfqq->next_rq, bfqq)); + BUG_ON(entity->budget < 0); + + bfq_clear_bfqq_non_blocking_wait_rq(bfqq); @@ -1580,6 +1660,7 @@ index d1f648d..3bc1f8b 100644 + bfqq->wr_coeff = bfqd->bfq_wr_coeff; + bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); + } else { ++ bfqq->wr_start_at_switch_to_srt = jiffies; + bfqq->wr_coeff = bfqd->bfq_wr_coeff * + BFQ_SOFTRT_WEIGHT_FACTOR; + bfqq->wr_cur_max_time = @@ -1613,32 +1694,13 @@ index d1f648d..3bc1f8b 100644 + jiffies, + jiffies_to_msecs(bfqq-> + wr_cur_max_time)); -+ } else if (time_before( -+ bfqq->last_wr_start_finish + -+ bfqq->wr_cur_max_time, -+ jiffies + -+ bfqd->bfq_wr_rt_max_time) && -+ soft_rt) { ++ } else if (soft_rt) { + /* -+ * The remaining weight-raising time is lower -+ * than bfqd->bfq_wr_rt_max_time, which means -+ * that the application is enjoying weight -+ * raising either because deemed soft-rt in -+ * the near past, or because deemed interactive -+ * a long ago. -+ * In both cases, resetting now the current -+ * remaining weight-raising time for the -+ * application to the weight-raising duration -+ * for soft rt applications would not cause any -+ * latency increase for the application (as the -+ * new duration would be higher than the -+ * remaining time). -+ * -+ * In addition, the application is now meeting -+ * the requirements for being deemed soft rt. -+ * In the end we can correctly and safely -+ * (re)charge the weight-raising duration for -+ * the application with the weight-raising ++ * The application is now or still meeting the ++ * requirements for being deemed soft rt. We ++ * can then correctly and safely (re)charge ++ * the weight-raising duration for the ++ * application with the weight-raising + * duration for soft rt applications. + * + * In particular, doing this recharge now, i.e., @@ -1662,14 +1724,22 @@ index d1f648d..3bc1f8b 100644 + * latency because the application is not + * weight-raised while they are pending. + */ ++ if (bfqq->wr_cur_max_time != ++ bfqd->bfq_wr_rt_max_time) { ++ bfqq->wr_start_at_switch_to_srt = ++ bfqq->last_wr_start_finish; ++ BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); ++ ++ bfqq->wr_cur_max_time = ++ bfqd->bfq_wr_rt_max_time; ++ bfqq->wr_coeff = bfqd->bfq_wr_coeff * ++ BFQ_SOFTRT_WEIGHT_FACTOR; ++ bfq_log_bfqq(bfqd, bfqq, ++ "switching to soft_rt wr"); ++ } else ++ bfq_log_bfqq(bfqd, bfqq, ++ "moving forward soft_rt wr duration"); + bfqq->last_wr_start_finish = jiffies; -+ bfqq->wr_cur_max_time = -+ bfqd->bfq_wr_rt_max_time; -+ bfqq->wr_coeff = bfqd->bfq_wr_coeff * -+ BFQ_SOFTRT_WEIGHT_FACTOR; -+ bfq_log_bfqq(bfqd, bfqq, -+ "switching to soft_rt wr, or " -+ " just moving forward duration"); + } + } +} @@ -1697,9 +1767,9 @@ index d1f648d..3bc1f8b 100644 + * bfq_bfqq_update_budg_for_activation for + * details on the usage of the next variable. + */ -+ arrived_in_time = time_is_after_jiffies( ++ arrived_in_time = ktime_get_ns() <= + RQ_BIC(rq)->ttime.last_end_request + -+ bfqd->bfq_slice_idle * 3); ++ bfqd->bfq_slice_idle * 3; + + bfq_log_bfqq(bfqd, bfqq, + "bfq_add_request non-busy: " @@ -1714,7 +1784,7 @@ index d1f648d..3bc1f8b 100644 + + BUG_ON(bfqq == bfqd->in_service_queue); + bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq, -+ rq->cmd_flags); ++ req_op(rq), rq->cmd_flags); + + /* + * bfqq deserves to be weight-raised if: @@ -1871,7 +1941,7 @@ index d1f648d..3bc1f8b 100644 */ prev = bfqq->next_rq; next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position); -@@ -887,160 +1379,10 @@ static void bfq_add_request(struct request *rq) +@@ -886,160 +1391,10 @@ static void bfq_add_request(struct request *rq) if (prev != bfqq->next_rq) bfq_pos_tree_add_move(bfqd, bfqq); @@ -2036,7 +2106,7 @@ index d1f648d..3bc1f8b 100644 if (bfqd->low_latency && old_wr_coeff == 1 && !rq_is_sync(rq) && time_is_before_jiffies( bfqq->last_wr_start_finish + -@@ -1049,16 +1391,43 @@ add_bfqq_busy: +@@ -1048,16 +1403,43 @@ add_bfqq_busy: bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); bfqd->wr_busy_queues++; @@ -2084,7 +2154,36 @@ index d1f648d..3bc1f8b 100644 if (bfqd->low_latency && (old_wr_coeff == 1 || bfqq->wr_coeff == 1 || interactive)) bfqq->last_wr_start_finish = jiffies; -@@ -1106,6 +1475,9 @@ static void bfq_remove_request(struct request *rq) +@@ -1081,14 +1463,24 @@ static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd, + return NULL; + } + ++static sector_t get_sdist(sector_t last_pos, struct request *rq) ++{ ++ sector_t sdist = 0; ++ ++ if (last_pos) { ++ if (last_pos < blk_rq_pos(rq)) ++ sdist = blk_rq_pos(rq) - last_pos; ++ else ++ sdist = last_pos - blk_rq_pos(rq); ++ } ++ ++ return sdist; ++} ++ + static void bfq_activate_request(struct request_queue *q, struct request *rq) + { + struct bfq_data *bfqd = q->elevator->elevator_data; +- + bfqd->rq_in_driver++; +- bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); +- bfq_log(bfqd, "activate_request: new bfqd->last_position %llu", +- (unsigned long long) bfqd->last_position); + } + + static void bfq_deactivate_request(struct request_queue *q, struct request *rq) +@@ -1105,6 +1497,9 @@ static void bfq_remove_request(struct request *rq) struct bfq_data *bfqd = bfqq->bfqd; const int sync = rq_is_sync(rq); @@ -2094,7 +2193,7 @@ index d1f648d..3bc1f8b 100644 if (bfqq->next_rq == rq) { bfqq->next_rq = bfq_find_next_rq(bfqd, bfqq, rq); bfq_updated_next_req(bfqd, bfqq); -@@ -1119,8 +1491,25 @@ static void bfq_remove_request(struct request *rq) +@@ -1118,8 +1513,25 @@ static void bfq_remove_request(struct request *rq) elv_rb_del(&bfqq->sort_list, rq); if (RB_EMPTY_ROOT(&bfqq->sort_list)) { @@ -2121,23 +2220,55 @@ index d1f648d..3bc1f8b 100644 /* * Remove queue from request-position tree as it is empty. */ -@@ -1134,9 +1523,7 @@ static void bfq_remove_request(struct request *rq) +@@ -1133,9 +1545,8 @@ static void bfq_remove_request(struct request *rq) BUG_ON(bfqq->meta_pending == 0); bfqq->meta_pending--; } -#ifdef CONFIG_BFQ_GROUP_IOSCHED - bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); +- bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags); -#endif ++ bfqg_stats_update_io_remove(bfqq_group(bfqq), req_op(rq), ++ rq->cmd_flags); } static int bfq_merge(struct request_queue *q, struct request **req, -@@ -1221,21 +1608,25 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq, +@@ -1145,7 +1556,7 @@ static int bfq_merge(struct request_queue *q, struct request **req, + struct request *__rq; + + __rq = bfq_find_rq_fmerge(bfqd, bio); +- if (__rq && elv_rq_merge_ok(__rq, bio)) { ++ if (__rq && elv_bio_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } +@@ -1190,7 +1601,8 @@ static void bfq_merged_request(struct request_queue *q, struct request *req, + static void bfq_bio_merged(struct request_queue *q, struct request *req, + struct bio *bio) + { +- bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio->bi_rw); ++ bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio_op(bio), ++ bio->bi_opf); + } + #endif + +@@ -1210,7 +1622,7 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq, + */ + if (bfqq == next_bfqq && + !list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && +- time_before(next->fifo_time, rq->fifo_time)) { ++ next->fifo_time < rq->fifo_time) { + list_del_init(&rq->queuelist); + list_replace_init(&next->queuelist, &rq->queuelist); + rq->fifo_time = next->fifo_time; +@@ -1220,21 +1632,31 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq, bfqq->next_rq = rq; bfq_remove_request(next); -#ifdef CONFIG_BFQ_GROUP_IOSCHED - bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags); +- bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags); -#endif ++ bfqg_stats_update_io_merged(bfqq_group(bfqq), req_op(next), ++ next->cmd_flags); } /* Must be called with bfqq != NULL */ @@ -2150,17 +2281,22 @@ index d1f648d..3bc1f8b 100644 bfqq->wr_coeff = 1; bfqq->wr_cur_max_time = 0; - /* Trigger a weight change on the next activation of the queue */ ++ bfqq->last_wr_start_finish = jiffies; + /* + * Trigger a weight change on the next invocation of + * __bfq_entity_update_weight_prio. + */ bfqq->entity.prio_changed = 1; ++ bfq_log_bfqq(bfqq->bfqd, bfqq, ++ "end_wr: wrais ending at %lu, rais_max_time %u", ++ bfqq->last_wr_start_finish, ++ jiffies_to_msecs(bfqq->wr_cur_max_time)); + bfq_log_bfqq(bfqq->bfqd, bfqq, "end_wr: wr_busy %d", + bfqq->bfqd->wr_busy_queues); } static void bfq_end_wr_async_queues(struct bfq_data *bfqd, -@@ -1278,7 +1669,7 @@ static int bfq_rq_close_to_sector(void *io_struct, bool request, +@@ -1277,7 +1699,7 @@ static int bfq_rq_close_to_sector(void *io_struct, bool request, sector_t sector) { return abs(bfq_io_struct_pos(io_struct, request) - sector) <= @@ -2169,7 +2305,7 @@ index d1f648d..3bc1f8b 100644 } static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd, -@@ -1400,7 +1791,7 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +@@ -1399,7 +1821,7 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) * throughput. */ bfqq->new_bfqq = new_bfqq; @@ -2178,7 +2314,7 @@ index d1f648d..3bc1f8b 100644 return new_bfqq; } -@@ -1431,9 +1822,23 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, +@@ -1430,9 +1852,23 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, } /* @@ -2205,7 +2341,7 @@ index d1f648d..3bc1f8b 100644 * structure otherwise. * * The OOM queue is not allowed to participate to cooperation: in fact, since -@@ -1442,6 +1847,18 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, +@@ -1441,6 +1877,18 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, * handle merging with the OOM queue would be quite complex and expensive * to maintain. Besides, in such a critical condition as an out of memory, * the benefits of queue merging may be little relevant, or even negligible. @@ -2224,7 +2360,7 @@ index d1f648d..3bc1f8b 100644 */ static struct bfq_queue * bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, -@@ -1451,16 +1868,32 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -1450,16 +1898,32 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfqq->new_bfqq) return bfqq->new_bfqq; @@ -2260,7 +2396,7 @@ index d1f648d..3bc1f8b 100644 unlikely(in_service_bfqq == &bfqd->oom_bfqq)) goto check_scheduled; -@@ -1482,7 +1915,15 @@ check_scheduled: +@@ -1481,7 +1945,15 @@ check_scheduled: BUG_ON(new_bfqq && bfqq->entity.parent != new_bfqq->entity.parent); @@ -2277,9 +2413,19 @@ index d1f648d..3bc1f8b 100644 bfq_may_be_close_cooperator(bfqq, new_bfqq)) return bfq_setup_merge(bfqq, new_bfqq); -@@ -1498,46 +1939,11 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) +@@ -1490,53 +1962,24 @@ check_scheduled: + + static void bfq_bfqq_save_state(struct bfq_queue *bfqq) + { ++ struct bfq_io_cq *bic = bfqq->bic; ++ + /* + * If !bfqq->bic, the queue is already shared or its requests + * have already been redirected to a shared queue; both idle window + * and weight raising state have already been saved. Do nothing. */ - if (!bfqq->bic) +- if (!bfqq->bic) ++ if (!bic) return; - if (bfqq->bic->wr_time_left) - /* @@ -2315,17 +2461,25 @@ index d1f648d..3bc1f8b 100644 - bfq_bfqq_end_wr(bfqq); - } else - bfqq->bic->wr_time_left = 0; -+ - bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); - bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); - bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); - bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); +- bfqq->bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); +- bfqq->bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); +- bfqq->bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); +- bfqq->bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); - bfqq->bic->cooperations++; - bfqq->bic->failed_cooperations = 0; ++ ++ bic->saved_idle_window = bfq_bfqq_idle_window(bfqq); ++ bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); ++ bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); ++ bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); ++ bic->saved_wr_coeff = bfqq->wr_coeff; ++ bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; ++ bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; ++ BUG_ON(time_is_after_jiffies(bfqq->last_wr_start_finish)); } static void bfq_get_bic_reference(struct bfq_queue *bfqq) -@@ -1562,6 +1968,40 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, +@@ -1561,6 +2004,40 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, if (bfq_bfqq_IO_bound(bfqq)) bfq_mark_bfqq_IO_bound(new_bfqq); bfq_clear_bfqq_IO_bound(bfqq); @@ -2343,12 +2497,12 @@ index d1f648d..3bc1f8b 100644 + new_bfqq->wr_coeff = bfqq->wr_coeff; + new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time; + new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish; ++ new_bfqq->wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; + if (bfq_bfqq_busy(new_bfqq)) -+ bfqd->wr_busy_queues++; ++ bfqd->wr_busy_queues++; + new_bfqq->entity.prio_changed = 1; + bfq_log_bfqq(bfqd, new_bfqq, -+ "wr starting after merge with %d, " -+ "rais_max_time %u", ++ "wr start after merge with %d, rais_max_time %u", + bfqq->pid, + jiffies_to_msecs(bfqq->wr_cur_max_time)); + } @@ -2366,7 +2520,7 @@ index d1f648d..3bc1f8b 100644 /* * Grab a reference to the bic, to prevent it from being destroyed * before being possibly touched by a bfq_split_bfqq(). -@@ -1588,18 +2028,6 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, +@@ -1587,20 +2064,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, bfq_put_queue(bfqq); } @@ -2382,10 +2536,32 @@ index d1f648d..3bc1f8b 100644 - } -} - - static int bfq_allow_merge(struct request_queue *q, struct request *rq, - struct bio *bio) +-static int bfq_allow_merge(struct request_queue *q, struct request *rq, +- struct bio *bio) ++static int bfq_allow_bio_merge(struct request_queue *q, struct request *rq, ++ struct bio *bio) { -@@ -1637,30 +2065,86 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, + struct bfq_data *bfqd = q->elevator->elevator_data; + struct bfq_io_cq *bic; +@@ -1610,7 +2075,7 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, + * Disallow merge of a sync bio into an async request. + */ + if (bfq_bio_sync(bio) && !rq_is_sync(rq)) +- return 0; ++ return false; + + /* + * Lookup the bfqq that this bio will be queued with. Allow +@@ -1619,7 +2084,7 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, + */ + bic = bfq_bic_lookup(bfqd, current->io_context); + if (!bic) +- return 0; ++ return false; + + bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio)); + /* +@@ -1636,30 +2101,107 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq, * to decide whether bio and rq can be merged. */ bfqq = new_bfqq; @@ -2397,6 +2573,12 @@ index d1f648d..3bc1f8b 100644 return bfqq == RQ_BFQQ(rq); } ++static int bfq_allow_rq_merge(struct request_queue *q, struct request *rq, ++ struct request *next) ++{ ++ return RQ_BFQQ(rq) == RQ_BFQQ(next); ++} ++ +/* + * Set the maximum time for the in-service queue to consume its + * budget. This prevents seeky processes from lowering the throughput. @@ -2407,6 +2589,7 @@ index d1f648d..3bc1f8b 100644 + struct bfq_queue *bfqq) +{ + unsigned int timeout_coeff; ++ + if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) + timeout_coeff = 1; + else @@ -2437,9 +2620,10 @@ index d1f648d..3bc1f8b 100644 + BUG_ON(bfqq == bfqd->in_service_queue); + BUG_ON(RB_EMPTY_ROOT(&bfqq->sort_list)); + -+ if (bfqq->wr_coeff > 1 && ++ if (time_is_before_jiffies(bfqq->last_wr_start_finish) && ++ bfqq->wr_coeff > 1 && + bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && -+ time_is_before_jiffies(bfqq->budget_timeout)) { ++ time_is_before_jiffies(bfqq->budget_timeout)) { + /* + * For soft real-time queues, move the start + * of the weight-raising period forward by the @@ -2465,7 +2649,20 @@ index d1f648d..3bc1f8b 100644 + * request. + */ + bfqq->last_wr_start_finish += jiffies - -+ bfqq->budget_timeout; ++ max_t(unsigned long, bfqq->last_wr_start_finish, ++ bfqq->budget_timeout); ++ if (time_is_after_jiffies(bfqq->last_wr_start_finish)) { ++ pr_crit( ++ "BFQ WARNING:last %lu budget %lu jiffies %lu", ++ bfqq->last_wr_start_finish, ++ bfqq->budget_timeout, ++ jiffies); ++ pr_crit("diff %lu", jiffies - ++ max_t(unsigned long, ++ bfqq->last_wr_start_finish, ++ bfqq->budget_timeout)); ++ bfqq->last_wr_start_finish = jiffies; ++ } + } + + bfq_set_budget_timeout(bfqd, bfqq); @@ -2478,7 +2675,7 @@ index d1f648d..3bc1f8b 100644 bfqd->in_service_queue = bfqq; } -@@ -1676,31 +2160,6 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) +@@ -1675,36 +2217,11 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd) return bfqq; } @@ -2510,58 +2707,19 @@ index d1f648d..3bc1f8b 100644 static void bfq_arm_slice_timer(struct bfq_data *bfqd) { struct bfq_queue *bfqq = bfqd->in_service_queue; -@@ -1725,62 +2184,34 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd) - * being too ill-treated, grant them a small fraction of the - * assigned budget before reducing the waiting time to - * BFQ_MIN_TT. This happened to help reduce latency. -- */ -- sl = bfqd->bfq_slice_idle; -- /* -- * Unless the queue is being weight-raised or the scenario is + struct bfq_io_cq *bic; +- unsigned long sl; ++ u32 sl; + + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); + +@@ -1728,59 +2245,343 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd) + sl = bfqd->bfq_slice_idle; + /* + * Unless the queue is being weight-raised or the scenario is - * asymmetric, grant only minimum idle time if the queue either - * has been seeky for long enough or has already proved to be - * constantly seeky. -- */ -- if (bfq_sample_valid(bfqq->seek_samples) && -- ((BFQQ_SEEKY(bfqq) && bfqq->entity.service > -- bfq_max_budget(bfqq->bfqd) / 8) || -- bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 && -- bfq_symmetric_scenario(bfqd)) -- sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); -- else if (bfqq->wr_coeff > 1) -- sl = sl * 3; -- bfqd->last_idling_start = ktime_get(); -- mod_timer(&bfqd->idle_slice_timer, jiffies + sl); --#ifdef CONFIG_BFQ_GROUP_IOSCHED -- bfqg_stats_set_start_idle_time(bfqq_group(bfqq)); --#endif -- bfq_log(bfqd, "arm idle: %u/%u ms", -- jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); --} -- --/* -- * Set the maximum time for the in-service queue to consume its -- * budget. This prevents seeky processes from lowering the disk -- * throughput (always guaranteed with a time slice scheme as in CFQ). -- */ --static void bfq_set_budget_timeout(struct bfq_data *bfqd) --{ -- struct bfq_queue *bfqq = bfqd->in_service_queue; -- unsigned int timeout_coeff; -- if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) -- timeout_coeff = 1; -- else -- timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; -- -- bfqd->last_budget_start = ktime_get(); -- -- bfq_clear_bfqq_budget_new(bfqq); -- bfqq->budget_timeout = jiffies + -- bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; -+ */ -+ sl = bfqd->bfq_slice_idle; -+ /* -+ * Unless the queue is being weight-raised or the scenario is + * asymmetric, grant only minimum idle time if the queue + * is seeky. A long idling is preserved for a weight-raised + * queue, or, more in general, in an asymemtric scenario, @@ -2569,19 +2727,357 @@ index d1f648d..3bc1f8b 100644 + * its reserved share of the throughput (in particular, it is + * needed if the queue has a higher weight than some other + * queue). -+ */ + */ +- if (bfq_sample_valid(bfqq->seek_samples) && +- ((BFQQ_SEEKY(bfqq) && bfqq->entity.service > +- bfq_max_budget(bfqq->bfqd) / 8) || +- bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 && + if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 && -+ bfq_symmetric_scenario(bfqd)) -+ sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); + bfq_symmetric_scenario(bfqd)) +- sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT)); +- else if (bfqq->wr_coeff > 1) +- sl = sl * 3; ++ sl = min_t(u32, sl, BFQ_MIN_TT); ++ + bfqd->last_idling_start = ktime_get(); +- mod_timer(&bfqd->idle_slice_timer, jiffies + sl); +-#ifdef CONFIG_BFQ_GROUP_IOSCHED ++ hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl), ++ HRTIMER_MODE_REL); + bfqg_stats_set_start_idle_time(bfqq_group(bfqq)); +-#endif +- bfq_log(bfqd, "arm idle: %u/%u ms", +- jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); ++ bfq_log(bfqd, "arm idle: %ld/%ld ms", ++ sl / NSEC_PER_MSEC, bfqd->bfq_slice_idle / NSEC_PER_MSEC); + } + + /* +- * Set the maximum time for the in-service queue to consume its +- * budget. This prevents seeky processes from lowering the disk +- * throughput (always guaranteed with a time slice scheme as in CFQ). ++ * In autotuning mode, max_budget is dynamically recomputed as the ++ * amount of sectors transferred in timeout at the estimated peak ++ * rate. This enables BFQ to utilize a full timeslice with a full ++ * budget, even if the in-service queue is served at peak rate. And ++ * this maximises throughput with sequential workloads. + */ +-static void bfq_set_budget_timeout(struct bfq_data *bfqd) ++static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd) + { +- struct bfq_queue *bfqq = bfqd->in_service_queue; +- unsigned int timeout_coeff; ++ return (u64)bfqd->peak_rate * USEC_PER_MSEC * ++ jiffies_to_msecs(bfqd->bfq_timeout)>>BFQ_RATE_SHIFT; ++} + +- if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time) +- timeout_coeff = 1; ++/* ++ * Update parameters related to throughput and responsiveness, as a ++ * function of the estimated peak rate. See comments on ++ * bfq_calc_max_budget(), and on T_slow and T_fast arrays. ++ */ ++void update_thr_responsiveness_params(struct bfq_data *bfqd) ++{ ++ int dev_type = blk_queue_nonrot(bfqd->queue); ++ ++ if (bfqd->bfq_user_max_budget == 0) { ++ bfqd->bfq_max_budget = ++ bfq_calc_max_budget(bfqd); ++ BUG_ON(bfqd->bfq_max_budget < 0); ++ bfq_log(bfqd, "new max_budget = %d", ++ bfqd->bfq_max_budget); ++ } ++ ++ if (bfqd->device_speed == BFQ_BFQD_FAST && ++ bfqd->peak_rate < device_speed_thresh[dev_type]) { ++ bfqd->device_speed = BFQ_BFQD_SLOW; ++ bfqd->RT_prod = R_slow[dev_type] * ++ T_slow[dev_type]; ++ } else if (bfqd->device_speed == BFQ_BFQD_SLOW && ++ bfqd->peak_rate > device_speed_thresh[dev_type]) { ++ bfqd->device_speed = BFQ_BFQD_FAST; ++ bfqd->RT_prod = R_fast[dev_type] * ++ T_fast[dev_type]; ++ } ++ ++ bfq_log(bfqd, ++"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec", ++ dev_type == 0 ? "ROT" : "NONROT", ++ bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW", ++ bfqd->device_speed == BFQ_BFQD_FAST ? ++ (USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT : ++ (USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT, ++ (USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>> ++ BFQ_RATE_SHIFT); ++} ++ ++void bfq_reset_rate_computation(struct bfq_data *bfqd, struct request *rq) ++{ ++ if (rq != NULL) { /* new rq dispatch now, reset accordingly */ ++ bfqd->last_dispatch = bfqd->first_dispatch = ktime_get_ns() ; ++ bfqd->peak_rate_samples = 1; ++ bfqd->sequential_samples = 0; ++ bfqd->tot_sectors_dispatched = bfqd->last_rq_max_size = ++ blk_rq_sectors(rq); ++ } else /* no new rq dispatched, just reset the number of samples */ ++ bfqd->peak_rate_samples = 0; /* full re-init on next disp. */ ++ ++ bfq_log(bfqd, ++ "reset_rate_computation at end, sample %u/%u tot_sects %llu", ++ bfqd->peak_rate_samples, bfqd->sequential_samples, ++ bfqd->tot_sectors_dispatched); ++} ++ ++void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq) ++{ ++ u32 rate, weight, divisor; ++ ++ /* ++ * For the convergence property to hold (see comments on ++ * bfq_update_peak_rate()) and for the assessment to be ++ * reliable, a minimum number of samples must be present, and ++ * a minimum amount of time must have elapsed. If not so, do ++ * not compute new rate. Just reset parameters, to get ready ++ * for a new evaluation attempt. ++ */ ++ if (bfqd->peak_rate_samples < BFQ_RATE_MIN_SAMPLES || ++ bfqd->delta_from_first < BFQ_RATE_MIN_INTERVAL) { ++ bfq_log(bfqd, ++ "update_rate_reset: only resetting, delta_first %lluus samples %d", ++ bfqd->delta_from_first>>10, bfqd->peak_rate_samples); ++ goto reset_computation; ++ } ++ ++ /* ++ * If a new request completion has occurred after last ++ * dispatch, then, to approximate the rate at which requests ++ * have been served by the device, it is more precise to ++ * extend the observation interval to the last completion. ++ */ ++ bfqd->delta_from_first = ++ max_t(u64, bfqd->delta_from_first, ++ bfqd->last_completion - bfqd->first_dispatch); ++ ++ BUG_ON(bfqd->delta_from_first == 0); ++ /* ++ * Rate computed in sects/usec, and not sects/nsec, for ++ * precision issues. ++ */ ++ rate = div64_ul(bfqd->tot_sectors_dispatched<delta_from_first, NSEC_PER_USEC)); ++ ++ bfq_log(bfqd, ++"update_rate_reset: tot_sects %llu delta_first %lluus rate %llu sects/s (%d)", ++ bfqd->tot_sectors_dispatched, bfqd->delta_from_first>>10, ++ ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), ++ rate > 20< 20M sectors/sec) ++ */ ++ if ((bfqd->peak_rate_samples > (3 * bfqd->sequential_samples)>>2 && ++ rate <= bfqd->peak_rate) || ++ rate > 20<peak_rate_samples, bfqd->sequential_samples, ++ ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), ++ ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); ++ goto reset_computation; ++ } else { ++ bfq_log(bfqd, ++ "update_rate_reset: do update, samples %u/%u rate/peak %llu/%llu", ++ bfqd->peak_rate_samples, bfqd->sequential_samples, ++ ((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT), ++ ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); ++ } ++ ++ /* ++ * We have to update the peak rate, at last! To this purpose, ++ * we use a low-pass filter. We compute the smoothing constant ++ * of the filter as a function of the 'weight' of the new ++ * measured rate. ++ * ++ * As can be seen in next formulas, we define this weight as a ++ * quantity proportional to how sequential the workload is, ++ * and to how long the observation time interval is. ++ * ++ * The weight runs from 0 to 8. The maximum value of the ++ * weight, 8, yields the minimum value for the smoothing ++ * constant. At this minimum value for the smoothing constant, ++ * the measured rate contributes for half of the next value of ++ * the estimated peak rate. ++ * ++ * So, the first step is to compute the weight as a function ++ * of how sequential the workload is. Note that the weight ++ * cannot reach 9, because bfqd->sequential_samples cannot ++ * become equal to bfqd->peak_rate_samples, which, in its ++ * turn, holds true because bfqd->sequential_samples is not ++ * incremented for the first sample. ++ */ ++ weight = (9 * bfqd->sequential_samples) / bfqd->peak_rate_samples; ++ ++ /* ++ * Second step: further refine the weight as a function of the ++ * duration of the observation interval. ++ */ ++ weight = min_t(u32, 8, ++ div_u64(weight * bfqd->delta_from_first, ++ BFQ_RATE_REF_INTERVAL)); ++ ++ /* ++ * Divisor ranging from 10, for minimum weight, to 2, for ++ * maximum weight. ++ */ ++ divisor = 10 - weight; ++ BUG_ON(divisor == 0); ++ ++ /* ++ * Finally, update peak rate: ++ * ++ * peak_rate = peak_rate * (divisor-1) / divisor + rate / divisor ++ */ ++ bfqd->peak_rate *= divisor-1; ++ bfqd->peak_rate /= divisor; ++ rate /= divisor; /* smoothing constant alpha = 1/divisor */ ++ ++ bfq_log(bfqd, ++ "update_rate_reset: divisor %d tmp_peak_rate %llu tmp_rate %u", ++ divisor, ++ ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT), ++ (u32)((USEC_PER_SEC*(u64)rate)>>BFQ_RATE_SHIFT)); ++ ++ BUG_ON(bfqd->peak_rate == 0); ++ BUG_ON(bfqd->peak_rate > 20<peak_rate += rate; ++ update_thr_responsiveness_params(bfqd); ++ BUG_ON(bfqd->peak_rate > 20<peak_rate_samples == 0) { /* first dispatch */ ++ bfq_log(bfqd, ++ "update_peak_rate: goto reset, samples %d", ++ bfqd->peak_rate_samples) ; ++ bfq_reset_rate_computation(bfqd, rq); ++ goto update_last_values; /* will add one sample */ ++ } ++ ++ /* ++ * Device idle for very long: the observation interval lasting ++ * up to this dispatch cannot be a valid observation interval ++ * for computing a new peak rate (similarly to the late- ++ * completion event in bfq_completed_request()). Go to ++ * update_rate_and_reset to have the following three steps ++ * taken: ++ * - close the observation interval at the last (previous) ++ * request dispatch or completion ++ * - compute rate, if possible, for that observation interval ++ * - start a new observation interval with this dispatch ++ */ ++ if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC && ++ bfqd->rq_in_driver == 0) { ++ bfq_log(bfqd, ++"update_peak_rate: jumping to updating&resetting delta_last %lluus samples %d", ++ (now_ns - bfqd->last_dispatch)>>10, ++ bfqd->peak_rate_samples) ; ++ goto update_rate_and_reset; ++ } ++ ++ /* Update sampling information */ ++ bfqd->peak_rate_samples++; ++ ++ if ((bfqd->rq_in_driver > 0 || ++ now_ns - bfqd->last_completion < BFQ_MIN_TT) ++ && get_sdist(bfqd->last_position, rq) < BFQQ_SEEK_THR) ++ bfqd->sequential_samples++; ++ ++ bfqd->tot_sectors_dispatched += blk_rq_sectors(rq); ++ ++ /* Reset max observed rq size every 32 dispatches */ ++ if (likely(bfqd->peak_rate_samples % 32)) ++ bfqd->last_rq_max_size = ++ max_t(u32, blk_rq_sectors(rq), bfqd->last_rq_max_size); + else +- timeout_coeff = bfqq->entity.weight / bfqq->entity.orig_weight; ++ bfqd->last_rq_max_size = blk_rq_sectors(rq); + +- bfqd->last_budget_start = ktime_get(); ++ bfqd->delta_from_first = now_ns - bfqd->first_dispatch; + +- bfq_clear_bfqq_budget_new(bfqq); +- bfqq->budget_timeout = jiffies + +- bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * timeout_coeff; ++ bfq_log(bfqd, ++ "update_peak_rate: added samples %u/%u tot_sects %llu delta_first %lluus", ++ bfqd->peak_rate_samples, bfqd->sequential_samples, ++ bfqd->tot_sectors_dispatched, ++ bfqd->delta_from_first>>10); - bfq_log_bfqq(bfqd, bfqq, "set budget_timeout %u", - jiffies_to_msecs(bfqd->bfq_timeout[bfq_bfqq_sync(bfqq)] * - timeout_coeff)); -+ bfqd->last_idling_start = ktime_get(); -+ mod_timer(&bfqd->idle_slice_timer, jiffies + sl); -+ bfqg_stats_set_start_idle_time(bfqq_group(bfqq)); -+ bfq_log(bfqd, "arm idle: %u/%u ms", -+ jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle)); ++ /* Target observation interval not yet reached, go on sampling */ ++ if (bfqd->delta_from_first < BFQ_RATE_REF_INTERVAL) ++ goto update_last_values; ++ ++update_rate_and_reset: ++ bfq_update_rate_reset(bfqd, rq); ++update_last_values: ++ bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); ++ bfqd->last_dispatch = now_ns; ++ ++ bfq_log(bfqd, ++ "update_peak_rate: delta_first %lluus last_pos %llu peak_rate %llu", ++ (now_ns - bfqd->first_dispatch)>>10, ++ (unsigned long long) bfqd->last_position, ++ ((USEC_PER_SEC*(u64)bfqd->peak_rate)>>BFQ_RATE_SHIFT)); ++ bfq_log(bfqd, ++ "update_peak_rate: samples at end %d", bfqd->peak_rate_samples); } /* @@ -2594,10 +3090,11 @@ index d1f648d..3bc1f8b 100644 struct bfq_queue *bfqq = RQ_BFQQ(rq); /* -@@ -1794,15 +2225,9 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) +@@ -1794,15 +2595,10 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq) * incrementing bfqq->dispatched. */ bfqq->dispatched++; ++ bfq_update_peak_rate(q->elevator->elevator_data, rq); + bfq_remove_request(rq); elv_dispatch_sort(q, rq); @@ -2611,12 +3108,12 @@ index d1f648d..3bc1f8b 100644 } /* -@@ -1822,18 +2247,12 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq) +@@ -1822,19 +2618,12 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq) rq = rq_entry_fifo(bfqq->fifo.next); - if (time_before(jiffies, rq->fifo_time)) -+ if (time_is_after_jiffies(rq->fifo_time)) ++ if (ktime_get_ns() < rq->fifo_time) return NULL; return rq; @@ -2625,13 +3122,14 @@ index d1f648d..3bc1f8b 100644 -static int bfq_bfqq_budget_left(struct bfq_queue *bfqq) -{ - struct bfq_entity *entity = &bfqq->entity; +- - return entity->budget - entity->service; -} - static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) { BUG_ON(bfqq != bfqd->in_service_queue); -@@ -1850,12 +2269,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -1851,12 +2640,15 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_mark_bfqq_split_coop(bfqq); if (RB_EMPTY_ROOT(&bfqq->sort_list)) { @@ -2653,7 +3151,7 @@ index d1f648d..3bc1f8b 100644 bfq_del_bfqq_busy(bfqd, bfqq, 1); } else { bfq_activate_bfqq(bfqd, bfqq); -@@ -1882,10 +2304,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, +@@ -1883,10 +2675,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, struct request *next_rq; int budget, min_budget; @@ -2675,7 +3173,7 @@ index d1f648d..3bc1f8b 100644 bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d", bfqq->entity.budget, bfq_bfqq_budget_left(bfqq)); -@@ -1894,7 +2325,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, +@@ -1895,7 +2696,7 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d", bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue)); @@ -2684,7 +3182,7 @@ index d1f648d..3bc1f8b 100644 switch (reason) { /* * Caveat: in all the following cases we trade latency -@@ -1936,14 +2367,10 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, +@@ -1937,14 +2738,10 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, break; case BFQ_BFQQ_BUDGET_TIMEOUT: /* @@ -2703,7 +3201,7 @@ index d1f648d..3bc1f8b 100644 */ budget = min(budget * 2, bfqd->bfq_max_budget); break; -@@ -1960,17 +2387,49 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, +@@ -1961,17 +2758,49 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, budget = min(budget * 4, bfqd->bfq_max_budget); break; case BFQ_BFQQ_NO_MORE_REQUESTS: @@ -2760,7 +3258,7 @@ index d1f648d..3bc1f8b 100644 */ budget = bfqd->bfq_max_budget; -@@ -1981,65 +2440,105 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, +@@ -1982,160 +2811,120 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd, bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget); /* @@ -2795,39 +3293,34 @@ index d1f648d..3bc1f8b 100644 } -static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout) -+static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd) - { +-{ - unsigned long max_budget; - - /* - * The max_budget calculated when autotuning is equal to the +- /* +- * The max_budget calculated when autotuning is equal to the - * amount of sectors transfered in timeout_sync at the -+ * amount of sectors transfered in timeout at the - * estimated peak rate. - */ +- * estimated peak rate. +- */ - max_budget = (unsigned long)(peak_rate * 1000 * - timeout >> BFQ_RATE_SHIFT); - - return max_budget; -+ return bfqd->peak_rate * 1000 * jiffies_to_msecs(bfqd->bfq_timeout) >> -+ BFQ_RATE_SHIFT; - } - +-} +- /* - * In addition to updating the peak rate, checks whether the process - * is "slow", and returns 1 if so. This slow flag is used, in addition - * to the budget timeout, to reduce the amount of service provided to - * seeky processes, and hence reduce their chances to lower the - * throughput. See the code for more details. -+ * Update the read peak rate (quantity used for auto-tuning) as a -+ * function of the rate at which bfqq has been served, and check -+ * whether the process associated with bfqq is "slow". Return true if -+ * the process is slow. The slow flag is used, in addition to the -+ * budget timeout, to reduce the amount of service provided to seeky -+ * processes, and hence reduce their chances to lower the -+ * throughput. More details in the body of the function. ++ * Return true if the process associated with bfqq is "slow". The slow ++ * flag is used, in addition to the budget timeout, to reduce the ++ * amount of service provided to seeky processes, and thus reduce ++ * their chances to lower the throughput. More details in the comments ++ * on the function bfq_bfqq_expire(). + * -+ * An important observation is in order: with devices with internal ++ * An important observation is in order: as discussed in the comments ++ * on the function bfq_update_peak_rate(), with devices with internal + * queues, it is hard if ever possible to know when and for how long + * an I/O request is processed by the device (apart from the trivial + * I/O pattern where a new request is dispatched only after the @@ -2835,29 +3328,32 @@ index d1f648d..3bc1f8b 100644 + * the real rate at which the I/O requests of each bfq_queue are + * served. In fact, for an I/O scheduler like BFQ, serving a + * bfq_queue means just dispatching its requests during its service -+ * slot, i.e., until the budget of the queue is exhausted, or the -+ * queue remains idle, or, finally, a timeout fires. But, during the -+ * service slot of a bfq_queue, the device may be still processing -+ * requests of bfq_queues served in previous service slots. On the -+ * opposite end, the requests of the in-service bfq_queue may be -+ * completed after the service slot of the queue finishes. Anyway, -+ * unless more sophisticated solutions are used (where possible), the -+ * sum of the sizes of the requests dispatched during the service slot -+ * of a bfq_queue is probably the only approximation available for -+ * the service received by the bfq_queue during its service slot. And, -+ * as written above, this sum is the quantity used in this function to -+ * evaluate the peak rate. ++ * slot (i.e., until the budget of the queue is exhausted, or the ++ * queue remains idle, or, finally, a timeout fires). But, during the ++ * service slot of a bfq_queue, around 100 ms at most, the device may ++ * be even still processing requests of bfq_queues served in previous ++ * service slots. On the opposite end, the requests of the in-service ++ * bfq_queue may be completed after the service slot of the queue ++ * finishes. ++ * ++ * Anyway, unless more sophisticated solutions are used ++ * (where possible), the sum of the sizes of the requests dispatched ++ * during the service slot of a bfq_queue is probably the only ++ * approximation available for the service received by the bfq_queue ++ * during its service slot. And this sum is the quantity used in this ++ * function to evaluate the I/O speed of a process. */ - static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, +-static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, - bool compensate, enum bfqq_expiration reason) ++static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq, + bool compensate, enum bfqq_expiration reason, + unsigned long *delta_ms) { - u64 bw, usecs, expected, timeout; - ktime_t delta; -+ u64 bw, bwdiv10, delta_usecs, delta_ms_tmp; +- int update = 0; + ktime_t delta_ktime; - int update = 0; ++ u32 delta_usecs; + bool slow = BFQQ_SEEKY(bfqq); /* if delta too short, use seekyness */ - if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq)) @@ -2866,138 +3362,98 @@ index d1f648d..3bc1f8b 100644 if (compensate) - delta = bfqd->last_idling_start; -+ delta_ktime = bfqd->last_idling_start; - else +- else - delta = ktime_get(); - delta = ktime_sub(delta, bfqd->last_budget_start); - usecs = ktime_to_us(delta); -+ delta_ktime = ktime_get(); -+ delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start); -+ delta_usecs = ktime_to_us(delta_ktime); - - /* Don't trust short/unrealistic values. */ +- +- /* Don't trust short/unrealistic values. */ - if (usecs < 100 || usecs >= LONG_MAX) - return false; -+ if (delta_usecs < 1000 || delta_usecs >= LONG_MAX) { -+ if (blk_queue_nonrot(bfqd->queue)) -+ *delta_ms = BFQ_MIN_TT; /* give same worst-case -+ guarantees as -+ idling for seeky -+ */ -+ else /* Charge at least one seek */ -+ *delta_ms = jiffies_to_msecs(bfq_slice_idle); -+ return slow; -+ } -+ -+ delta_ms_tmp = delta_usecs; -+ do_div(delta_ms_tmp, 1000); -+ *delta_ms = delta_ms_tmp; - - /* - * Calculate the bandwidth for the last slice. We use a 64 bit -@@ -2048,32 +2547,51 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, - * and to avoid overflows. - */ - bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; +- +- /* +- * Calculate the bandwidth for the last slice. We use a 64 bit +- * value to store the peak rate, in sectors per usec in fixed +- * point math. We do so to have enough precision in the estimate +- * and to avoid overflows. +- */ +- bw = (u64)bfqq->entity.service << BFQ_RATE_SHIFT; - do_div(bw, (unsigned long)usecs); - - timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); -+ do_div(bw, (unsigned long)delta_usecs); - -+ bfq_log(bfqd, "measured bw = %llu sects/sec", -+ (1000000*bw)>>BFQ_RATE_SHIFT); - /* - * Use only long (> 20ms) intervals to filter out spikes for - * the peak rate estimation. - */ +- +- /* +- * Use only long (> 20ms) intervals to filter out spikes for +- * the peak rate estimation. +- */ - if (usecs > 20000) { -+ if (delta_usecs > 20000) { -+ bool fully_sequential = bfqq->seek_history == 0; -+ /* -+ * Soft real-time queues are not good candidates for -+ * evaluating bw, as they are likely to be slow even -+ * if sequential. -+ */ -+ bool non_soft_rt = bfqq->wr_coeff == 1 || -+ bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time; -+ bool consumed_large_budget = -+ reason == BFQ_BFQQ_BUDGET_EXHAUSTED && -+ bfqq->entity.budget >= bfqd->bfq_max_budget * 2 / 3; -+ bool served_for_long_time = -+ reason == BFQ_BFQQ_BUDGET_TIMEOUT || -+ consumed_large_budget; -+ -+ BUG_ON(bfqq->seek_history == 0 && -+ hweight32(bfqq->seek_history) != 0); -+ - if (bw > bfqd->peak_rate || +- if (bw > bfqd->peak_rate || - (!BFQQ_SEEKY(bfqq) && - reason == BFQ_BFQQ_BUDGET_TIMEOUT)) { - bfq_log(bfqd, "measured bw =%llu", bw); -+ (bfq_bfqq_sync(bfqq) && fully_sequential && non_soft_rt && -+ served_for_long_time)) { - /* - * To smooth oscillations use a low-pass filter with +- /* +- * To smooth oscillations use a low-pass filter with - * alpha=7/8, i.e., - * new_rate = (7/8) * old_rate + (1/8) * bw -+ * alpha=9/10, i.e., -+ * new_rate = (9/10) * old_rate + (1/10) * bw - */ +- */ - do_div(bw, 8); - if (bw == 0) - return 0; - bfqd->peak_rate *= 7; - do_div(bfqd->peak_rate, 8); - bfqd->peak_rate += bw; -+ bwdiv10 = bw; -+ do_div(bwdiv10, 10); -+ if (bwdiv10 == 0) -+ return false; /* bw too low to be used */ -+ bfqd->peak_rate *= 9; -+ do_div(bfqd->peak_rate, 10); -+ bfqd->peak_rate += bwdiv10; - update = 1; +- update = 1; - bfq_log(bfqd, "new peak_rate=%llu", bfqd->peak_rate); -+ bfq_log(bfqd, "new peak_rate = %llu sects/sec", -+ (1000000*bfqd->peak_rate)>>BFQ_RATE_SHIFT); - } - - update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; -@@ -2086,9 +2604,8 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, - int dev_type = blk_queue_nonrot(bfqd->queue); - if (bfqd->bfq_user_max_budget == 0) { - bfqd->bfq_max_budget = +- } +- +- update |= bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES - 1; +- +- if (bfqd->peak_rate_samples < BFQ_PEAK_RATE_SAMPLES) +- bfqd->peak_rate_samples++; +- +- if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES && +- update) { +- int dev_type = blk_queue_nonrot(bfqd->queue); +- +- if (bfqd->bfq_user_max_budget == 0) { +- bfqd->bfq_max_budget = - bfq_calc_max_budget(bfqd->peak_rate, - timeout); - bfq_log(bfqd, "new max_budget=%d", -+ bfq_calc_max_budget(bfqd); -+ bfq_log(bfqd, "new max_budget = %d", - bfqd->bfq_max_budget); - } - if (bfqd->device_speed == BFQ_BFQD_FAST && -@@ -2102,38 +2619,35 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, - bfqd->RT_prod = R_fast[dev_type] * - T_fast[dev_type]; - } -+ bfq_log(bfqd, "dev_speed_class = %d (%d sects/sec), " -+ "thresh %d setcs/sec", -+ bfqd->device_speed, -+ bfqd->device_speed == BFQ_BFQD_FAST ? -+ (1000000*R_fast[dev_type])>>BFQ_RATE_SHIFT : -+ (1000000*R_slow[dev_type])>>BFQ_RATE_SHIFT, -+ (1000000*device_speed_thresh[dev_type])>> -+ BFQ_RATE_SHIFT); - } -+ /* -+ * Caveat: processes doing IO in the slower disk zones -+ * tend to be slow(er) even if not seeky. In this -+ * respect, the estimated peak rate is likely to be an -+ * average over the disk surface. Accordingly, to not -+ * be too harsh with unlucky processes, a process is -+ * deemed slow only if its bw has been lower than half -+ * of the estimated peak rate. -+ */ -+ slow = bw < bfqd->peak_rate / 2; +- bfqd->bfq_max_budget); +- } +- if (bfqd->device_speed == BFQ_BFQD_FAST && +- bfqd->peak_rate < device_speed_thresh[dev_type]) { +- bfqd->device_speed = BFQ_BFQD_SLOW; +- bfqd->RT_prod = R_slow[dev_type] * +- T_slow[dev_type]; +- } else if (bfqd->device_speed == BFQ_BFQD_SLOW && +- bfqd->peak_rate > device_speed_thresh[dev_type]) { +- bfqd->device_speed = BFQ_BFQD_FAST; +- bfqd->RT_prod = R_fast[dev_type] * +- T_fast[dev_type]; +- } +- } ++ delta_ktime = bfqd->last_idling_start; ++ else ++ delta_ktime = ktime_get(); ++ delta_ktime = ktime_sub(delta_ktime, bfqd->last_budget_start); ++ delta_usecs = ktime_to_us(delta_ktime); ++ ++ /* don't trust short/unrealistic values. */ ++ if (delta_usecs < 1000 || delta_usecs >= LONG_MAX) { ++ if (blk_queue_nonrot(bfqd->queue)) ++ /* ++ * give same worst-case guarantees as idling ++ * for seeky ++ */ ++ *delta_ms = BFQ_MIN_TT / NSEC_PER_MSEC; ++ else /* charge at least one seek */ ++ *delta_ms = bfq_slice_idle / NSEC_PER_MSEC; ++ ++ bfq_log(bfqd, "bfq_bfqq_is_slow: unrealistic %u", delta_usecs); ++ ++ return slow; } - /* @@ -3009,22 +3465,34 @@ index d1f648d..3bc1f8b 100644 - */ - if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8) - return false; -- -- /* ++ *delta_ms = delta_usecs / USEC_PER_MSEC; + + /* - * A process is considered ``slow'' (i.e., seeky, so that we - * cannot treat it fairly in the service domain, as it would - * slow down too much the other processes) if, when a slice - * ends for whatever reason, it has received service at a - * rate that would not be high enough to complete the budget - * before the budget timeout expiration. -- */ ++ * Use only long (> 20ms) intervals to filter out excessive ++ * spikes in service rate estimation. + */ - expected = bw * 1000 * timeout >> BFQ_RATE_SHIFT; -+ bfq_log_bfqq(bfqd, bfqq, -+ "update_peak_rate: bw %llu sect/s, peak rate %llu, " -+ "slow %d", -+ (1000000*bw)>>BFQ_RATE_SHIFT, -+ (1000000*bfqd->peak_rate)>>BFQ_RATE_SHIFT, -+ bw < bfqd->peak_rate / 2); ++ if (delta_usecs > 20000) { ++ /* ++ * Caveat for rotational devices: processes doing I/O ++ * in the slower disk zones tend to be slow(er) even ++ * if not seeky. In this respect, the estimated peak ++ * rate is likely to be an average over the disk ++ * surface. Accordingly, to not be too harsh with ++ * unlucky processes, a process is deemed slow only if ++ * its rate has been lower than half of the estimated ++ * peak rate. ++ */ ++ slow = bfqq->entity.service < bfqd->bfq_max_budget / 2; ++ bfq_log(bfqd, "bfq_bfqq_is_slow: relative rate %d/%d", ++ bfqq->entity.service, bfqd->bfq_max_budget); ++ } - /* - * Caveat: processes doing IO in the slower disk zones will @@ -3035,18 +3503,18 @@ index d1f648d..3bc1f8b 100644 - * process slow. - */ - return expected > (4 * bfqq->entity.budget) / 3; ++ bfq_log_bfqq(bfqd, bfqq, "bfq_bfqq_is_slow: slow %d", slow); ++ + return slow; } /* -@@ -2191,6 +2705,15 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -2193,20 +2982,35 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq, static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, struct bfq_queue *bfqq) { + bfq_log_bfqq(bfqd, bfqq, -+ "softrt_next_start: service_blkg %lu " -+ "soft_rate %u sects/sec" -+ "interval %u", ++"softrt_next_start: service_blkg %lu soft_rate %u sects/sec interval %u", + bfqq->service_from_backlogged, + bfqd->bfq_wr_max_softrt_rate, + jiffies_to_msecs(HZ * bfqq->service_from_backlogged / @@ -3055,22 +3523,23 @@ index d1f648d..3bc1f8b 100644 return max(bfqq->last_idle_bklogged + HZ * bfqq->service_from_backlogged / bfqd->bfq_wr_max_softrt_rate, -@@ -2198,13 +2721,21 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, - } - - /* -- * Return the largest-possible time instant such that, for as long as possible, -- * the current time will be lower than this time instant according to the macro -- * time_is_before_jiffies(). +- jiffies + bfqq->bfqd->bfq_slice_idle + 4); ++ jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); ++} ++ ++/* + * Return the farthest future time instant according to jiffies + * macros. + */ +static unsigned long bfq_greatest_from_now(void) +{ + return jiffies + MAX_JIFFY_OFFSET; -+} -+ -+/* + } + + /* +- * Return the largest-possible time instant such that, for as long as possible, +- * the current time will be lower than this time instant according to the macro +- * time_is_before_jiffies(). + * Return the farthest past time instant according to jiffies + * macros. */ @@ -3082,7 +3551,7 @@ index d1f648d..3bc1f8b 100644 } /** -@@ -2214,28 +2745,24 @@ static unsigned long bfq_infinity_from_now(unsigned long now) +@@ -2216,28 +3020,24 @@ static unsigned long bfq_infinity_from_now(unsigned long now) * @compensate: if true, compensate for the time spent idling. * @reason: the reason causing the expiration. * @@ -3128,22 +3597,22 @@ index d1f648d..3bc1f8b 100644 */ static void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, -@@ -2243,40 +2770,53 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, +@@ -2245,41 +3045,52 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, enum bfqq_expiration reason) { bool slow; + unsigned long delta = 0; + struct bfq_entity *entity = &bfqq->entity; -+ + BUG_ON(bfqq != bfqd->in_service_queue); /* - * Update disk peak rate for autotuning and check whether the -+ * Update device peak rate for autotuning and check whether the - * process is slow (see bfq_update_peak_rate). +- * process is slow (see bfq_update_peak_rate). ++ * Check whether the process is slow (see bfq_bfqq_is_slow). */ - slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason); -+ slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason, &delta); ++ slow = bfq_bfqq_is_slow(bfqd, bfqq, compensate, reason, &delta); /* - * As above explained, 'punish' slow (i.e., seeky), timed-out @@ -3199,12 +3668,12 @@ index d1f648d..3bc1f8b 100644 + BUG_ON(bfqq->entity.budget < bfqq->entity.service); if (reason == BFQ_BFQQ_TOO_IDLE && -- bfqq->entity.service <= 2 * bfqq->entity.budget / 10 ) -+ entity->service <= 2 * entity->budget / 10 ) +- bfqq->entity.service <= 2 * bfqq->entity.budget / 10) ++ entity->service <= 2 * entity->budget / 10) bfq_clear_bfqq_IO_bound(bfqq); if (bfqd->low_latency && bfqq->wr_coeff == 1) -@@ -2285,19 +2825,23 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, +@@ -2288,19 +3099,23 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, if (bfqd->low_latency && bfqd->bfq_wr_max_softrt_rate > 0 && RB_EMPTY_ROOT(&bfqq->sort_list)) { /* @@ -3236,7 +3705,7 @@ index d1f648d..3bc1f8b 100644 /* * The application is still waiting for the * completion of one or more requests: -@@ -2314,7 +2858,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, +@@ -2317,7 +3132,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, * happened to be in the past. */ bfqq->soft_rt_next_start = @@ -3245,7 +3714,7 @@ index d1f648d..3bc1f8b 100644 /* * Schedule an update of soft_rt_next_start to when * the task may be discovered to be isochronous. -@@ -2324,15 +2868,27 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, +@@ -2327,15 +3142,27 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, } bfq_log_bfqq(bfqd, bfqq, @@ -3275,7 +3744,7 @@ index d1f648d..3bc1f8b 100644 } /* -@@ -2342,20 +2898,17 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, +@@ -2345,20 +3172,17 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd, */ static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq) { @@ -3304,7 +3773,7 @@ index d1f648d..3bc1f8b 100644 static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq) { bfq_log_bfqq(bfqq->bfqd, bfqq, -@@ -2397,10 +2950,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) +@@ -2400,10 +3224,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) { struct bfq_data *bfqd = bfqq->bfqd; bool idling_boosts_thr, idling_boosts_thr_without_issues, @@ -3318,16 +3787,7 @@ index d1f648d..3bc1f8b 100644 /* * The next variable takes into account the cases where idling * boosts the throughput. -@@ -2422,7 +2977,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) - */ - idling_boosts_thr = !bfqd->hw_tag || - (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) && -- bfq_bfqq_idle_window(bfqq)) ; -+ bfq_bfqq_idle_window(bfqq)); - - /* - * The value of the next variable, -@@ -2463,74 +3018,27 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) +@@ -2466,74 +3292,27 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) bfqd->wr_busy_queues == 0; /* @@ -3421,7 +3881,7 @@ index d1f648d..3bc1f8b 100644 * (i) each of these processes must get the same throughput as * the others; * (ii) all these processes have the same I/O pattern -@@ -2552,26 +3060,53 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) +@@ -2555,26 +3334,53 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * words, only if sub-condition (i) holds, then idling is * allowed, and the device tends to be prevented from queueing * many requests, possibly of several processes. The reason @@ -3495,7 +3955,7 @@ index d1f648d..3bc1f8b 100644 * * According to the above considerations, the next variable is * true (only) if sub-condition (i) holds. To compute the -@@ -2579,7 +3114,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) +@@ -2582,7 +3388,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * the function bfq_symmetric_scenario(), but also check * whether bfqq is being weight-raised, because * bfq_symmetric_scenario() does not take into account also @@ -3504,7 +3964,7 @@ index d1f648d..3bc1f8b 100644 * bfq_weights_tree_add()). * * As a side note, it is worth considering that the above -@@ -2601,17 +3136,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) +@@ -2604,17 +3410,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * bfqq. Such a case is when bfqq became active in a burst of * queue activations. Queues that became active during a large * burst benefit only from throughput, as discussed in the @@ -3527,13 +3987,15 @@ index d1f648d..3bc1f8b 100644 /* * We have now all the components we need to compute the return -@@ -2621,6 +3155,14 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) +@@ -2624,6 +3429,16 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * 2) idling either boosts the throughput (without issues), or * is necessary to preserve service guarantees. */ -+ bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d " -+ "wr_busy %d boosts %d IO-bound %d guar %d", -+ bfq_bfqq_sync(bfqq), idling_boosts_thr, ++ bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d", ++ bfq_bfqq_sync(bfqq), idling_boosts_thr); ++ ++ bfq_log_bfqq(bfqd, bfqq, ++ "may_idle: wr_busy %d boosts %d IO-bound %d guar %d", + bfqd->wr_busy_queues, + idling_boosts_thr_without_issues, + bfq_bfqq_IO_bound(bfqq), @@ -3542,7 +4004,7 @@ index d1f648d..3bc1f8b 100644 return bfq_bfqq_sync(bfqq) && (idling_boosts_thr_without_issues || idling_needed_for_service_guarantees); -@@ -2632,7 +3174,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) +@@ -2635,7 +3450,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * 1) the queue must remain in service and cannot be expired, and * 2) the device must be idled to wait for the possible arrival of a new * request for the queue. @@ -3551,17 +4013,57 @@ index d1f648d..3bc1f8b 100644 * why performing device idling is the best choice to boost the throughput * and preserve service guarantees when bfq_bfqq_may_idle itself * returns true. -@@ -2698,9 +3240,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) +@@ -2665,7 +3480,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue"); + + if (bfq_may_expire_for_budg_timeout(bfqq) && +- !timer_pending(&bfqd->idle_slice_timer) && ++ !hrtimer_active(&bfqd->idle_slice_timer) && + !bfq_bfqq_must_idle(bfqq)) + goto expire; + +@@ -2685,7 +3500,8 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + * not disable disk idling even when a new request + * arrives. + */ +- if (timer_pending(&bfqd->idle_slice_timer)) { ++ if (bfq_bfqq_wait_request(bfqq)) { ++ BUG_ON(!hrtimer_active(&bfqd->idle_slice_timer)); + /* + * If we get here: 1) at least a new request + * has arrived but we have not disabled the +@@ -2700,10 +3516,8 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + * So we disable idling. */ bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); +- del_timer(&bfqd->idle_slice_timer); -#ifdef CONFIG_BFQ_GROUP_IOSCHED ++ hrtimer_try_to_cancel(&bfqd->idle_slice_timer); bfqg_stats_update_idle_time(bfqq_group(bfqq)); -#endif } goto keep_queue; } -@@ -2745,14 +3285,11 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -2714,7 +3528,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) + * for a new request, or has requests waiting for a completion and + * may idle after their completion, then keep it anyway. + */ +- if (timer_pending(&bfqd->idle_slice_timer) || ++ if (hrtimer_active(&bfqd->idle_slice_timer) || + (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) { + bfqq = NULL; + goto keep_queue; +@@ -2736,6 +3550,9 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) + struct bfq_entity *entity = &bfqq->entity; + + if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */ ++ BUG_ON(bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && ++ time_is_after_jiffies(bfqq->last_wr_start_finish)); ++ + bfq_log_bfqq(bfqd, bfqq, + "raising period dur %u/%u msec, old coeff %u, w %d(%d)", + jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish), +@@ -2749,22 +3566,30 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change"); /* @@ -3574,12 +4076,39 @@ index d1f648d..3bc1f8b 100644 + * time has elapsed from the beginning of this + * weight-raising period, then end weight raising. */ - if (bfq_bfqq_in_large_burst(bfqq) || +- if (bfq_bfqq_in_large_burst(bfqq) || - bfq_bfqq_cooperations(bfqq) >= bfqd->bfq_coop_thresh || - time_is_before_jiffies(bfqq->last_wr_start_finish + - bfqq->wr_cur_max_time)) { - bfqq->last_wr_start_finish = jiffies; -@@ -2811,13 +3348,29 @@ static int bfq_dispatch_request(struct bfq_data *bfqd, +- time_is_before_jiffies(bfqq->last_wr_start_finish + +- bfqq->wr_cur_max_time)) { +- bfqq->last_wr_start_finish = jiffies; +- bfq_log_bfqq(bfqd, bfqq, +- "wrais ending at %lu, rais_max_time %u", +- bfqq->last_wr_start_finish, +- jiffies_to_msecs(bfqq->wr_cur_max_time)); ++ if (bfq_bfqq_in_large_burst(bfqq)) + bfq_bfqq_end_wr(bfqq); ++ else if (time_is_before_jiffies(bfqq->last_wr_start_finish + ++ bfqq->wr_cur_max_time)) { ++ if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time || ++ time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt + ++ bfq_wr_duration(bfqd))) ++ bfq_bfqq_end_wr(bfqq); ++ else { ++ /* switch back to interactive wr */ ++ bfqq->wr_coeff = bfqd->bfq_wr_coeff; ++ bfqq->wr_cur_max_time = bfq_wr_duration(bfqd); ++ bfqq->last_wr_start_finish = ++ bfqq->wr_start_at_switch_to_srt; ++ BUG_ON(time_is_after_jiffies( ++ bfqq->last_wr_start_finish)); ++ bfqq->entity.prio_changed = 1; ++ bfq_log_bfqq(bfqd, bfqq, ++ "back to interactive wr"); ++ } + } + } + /* Update weight both if it must be raised and if it must be lowered */ +@@ -2815,13 +3640,29 @@ static int bfq_dispatch_request(struct bfq_data *bfqd, */ if (!bfqd->rq_in_driver) bfq_schedule_dispatch(bfqd); @@ -3609,7 +4138,7 @@ index d1f648d..3bc1f8b 100644 bfq_update_wr_data(bfqd, bfqq); bfq_log_bfqq(bfqd, bfqq, -@@ -2833,9 +3386,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd, +@@ -2837,9 +3678,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd, bfqd->in_service_bic = RQ_BIC(rq); } @@ -3620,7 +4149,7 @@ index d1f648d..3bc1f8b 100644 goto expire; return dispatched; -@@ -2881,8 +3432,8 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd) +@@ -2885,8 +3724,8 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd) st = bfq_entity_service_tree(&bfqq->entity); dispatched += __bfq_forced_dispatch_bfqq(bfqq); @@ -3630,7 +4159,7 @@ index d1f648d..3bc1f8b 100644 bfq_forget_idle(st); } -@@ -2895,9 +3446,9 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) +@@ -2899,37 +3738,37 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) { struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_queue *bfqq; @@ -3641,7 +4170,25 @@ index d1f648d..3bc1f8b 100644 if (bfqd->busy_queues == 0) return 0; -@@ -2908,21 +3459,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) + if (unlikely(force)) + return bfq_forced_dispatch(bfqd); + ++ /* ++ * Force device to serve one request at a time if ++ * strict_guarantees is true. Forcing this service scheme is ++ * currently the ONLY way to guarantee that the request ++ * service order enforced by the scheduler is respected by a ++ * queueing device. Otherwise the device is free even to make ++ * some unlucky request wait for as long as the device ++ * wishes. ++ * ++ * Of course, serving one request at at time may cause loss of ++ * throughput. ++ */ ++ if (bfqd->strict_guarantees && bfqd->rq_in_driver > 0) ++ return 0; ++ + bfqq = bfq_select_queue(bfqd); if (!bfqq) return 0; @@ -3662,9 +4209,13 @@ index d1f648d..3bc1f8b 100644 - return 0; + BUG_ON(bfqq->entity.budget < bfqq->entity.service); - bfq_clear_bfqq_wait_request(bfqq); - BUG_ON(timer_pending(&bfqd->idle_slice_timer)); -@@ -2933,6 +3470,8 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) +- bfq_clear_bfqq_wait_request(bfqq); +- BUG_ON(timer_pending(&bfqd->idle_slice_timer)); ++ BUG_ON(bfq_bfqq_wait_request(bfqq)); + + if (!bfq_dispatch_request(bfqd, bfqq)) + return 0; +@@ -2937,6 +3776,8 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) bfq_log_bfqq(bfqd, bfqq, "dispatched %s request", bfq_bfqq_sync(bfqq) ? "sync" : "async"); @@ -3673,7 +4224,7 @@ index d1f648d..3bc1f8b 100644 return 1; } -@@ -2944,23 +3483,22 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) +@@ -2948,23 +3789,22 @@ static int bfq_dispatch_requests(struct request_queue *q, int force) */ static void bfq_put_queue(struct bfq_queue *bfqq) { @@ -3702,7 +4253,7 @@ index d1f648d..3bc1f8b 100644 if (bfq_bfqq_sync(bfqq)) /* -@@ -2973,7 +3511,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq) +@@ -2977,7 +3817,7 @@ static void bfq_put_queue(struct bfq_queue *bfqq) */ hlist_del_init(&bfqq->burst_list_node); @@ -3711,7 +4262,7 @@ index d1f648d..3bc1f8b 100644 kmem_cache_free(bfq_pool, bfqq); #ifdef CONFIG_BFQ_GROUP_IOSCHED -@@ -3007,8 +3545,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -3011,8 +3851,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_schedule_dispatch(bfqd); } @@ -3721,10 +4272,12 @@ index d1f648d..3bc1f8b 100644 bfq_put_cooperator(bfqq); -@@ -3019,26 +3556,7 @@ static void bfq_init_icq(struct io_cq *icq) - { - struct bfq_io_cq *bic = icq_to_bic(icq); +@@ -3021,28 +3860,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) + static void bfq_init_icq(struct io_cq *icq) + { +- struct bfq_io_cq *bic = icq_to_bic(icq); +- - bic->ttime.last_end_request = jiffies; - /* - * A newly created bic indicates that the process has just @@ -3745,11 +4298,11 @@ index d1f648d..3bc1f8b 100644 - * as needing weight raising. - */ - bic->wr_time_left = 1; -+ bic->ttime.last_end_request = bfq_smallest_from_now(); ++ icq_to_bic(icq)->ttime.last_end_request = ktime_get_ns() - (1ULL<<32); } static void bfq_exit_icq(struct io_cq *icq) -@@ -3046,21 +3564,21 @@ static void bfq_exit_icq(struct io_cq *icq) +@@ -3050,21 +3868,21 @@ static void bfq_exit_icq(struct io_cq *icq) struct bfq_io_cq *bic = icq_to_bic(icq); struct bfq_data *bfqd = bic_to_bfqd(bic); @@ -3778,26 +4331,27 @@ index d1f648d..3bc1f8b 100644 } } -@@ -3068,7 +3586,8 @@ static void bfq_exit_icq(struct io_cq *icq) +@@ -3072,8 +3890,8 @@ static void bfq_exit_icq(struct io_cq *icq) * Update the entity prio values; note that the new values will not * be used until the next (re)activation. */ --static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) +-static void +-bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) +static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, + struct bfq_io_cq *bic) { struct task_struct *tsk = current; int ioprio_class; -@@ -3100,7 +3619,7 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b +@@ -3105,7 +3923,7 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) break; } - if (bfqq->new_ioprio < 0 || bfqq->new_ioprio >= IOPRIO_BE_NR) { + if (bfqq->new_ioprio >= IOPRIO_BE_NR) { - printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n", - bfqq->new_ioprio); + pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n", + bfqq->new_ioprio); BUG(); -@@ -3108,45 +3627,40 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b +@@ -3113,45 +3931,40 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); bfqq->entity.prio_changed = 1; @@ -3857,7 +4411,7 @@ index d1f648d..3bc1f8b 100644 } static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, -@@ -3155,8 +3669,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -3160,8 +3973,9 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, RB_CLEAR_NODE(&bfqq->entity.rb_node); INIT_LIST_HEAD(&bfqq->fifo); INIT_HLIST_NODE(&bfqq->burst_list_node); @@ -3868,7 +4422,7 @@ index d1f648d..3bc1f8b 100644 bfqq->bfqd = bfqd; if (bic) -@@ -3166,6 +3681,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -3171,6 +3985,7 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfq_class_idle(bfqq)) bfq_mark_bfqq_idle_window(bfqq); bfq_mark_bfqq_sync(bfqq); @@ -3876,14 +4430,16 @@ index d1f648d..3bc1f8b 100644 } else bfq_clear_bfqq_sync(bfqq); bfq_mark_bfqq_IO_bound(bfqq); -@@ -3175,72 +3691,17 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -3180,72 +3995,19 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfqq->pid = pid; bfqq->wr_coeff = 1; - bfqq->last_wr_start_finish = 0; -+ bfqq->last_wr_start_finish = bfq_smallest_from_now(); ++ bfqq->last_wr_start_finish = jiffies; ++ bfqq->wr_start_at_switch_to_srt = bfq_smallest_from_now(); + bfqq->budget_timeout = bfq_smallest_from_now(); + bfqq->split_time = bfq_smallest_from_now(); ++ /* * Set to the value for which bfqq will not be deemed as * soft rt when it becomes backlogged. @@ -3934,7 +4490,7 @@ index d1f648d..3bc1f8b 100644 - - if (bfqq) { - bfq_init_bfqq(bfqd, bfqq, bic, current->pid, -- is_sync); +- is_sync); - bfq_init_entity(&bfqq->entity, bfqg); - bfq_log_bfqq(bfqd, bfqq, "allocated"); - } else { @@ -3955,7 +4511,7 @@ index d1f648d..3bc1f8b 100644 } static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, -@@ -3263,44 +3724,60 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, +@@ -3268,90 +4030,84 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, } static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, @@ -3976,7 +4532,7 @@ index d1f648d..3bc1f8b 100644 - struct bfq_group *bfqg; + rcu_read_lock(); + -+ bfqg = bfq_find_set_group(bfqd,bio_blkcg(bio)); ++ bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio)); + if (!bfqg) { + bfqq = &bfqd->oom_bfqq; + goto out; @@ -4034,14 +4590,33 @@ index d1f648d..3bc1f8b 100644 return bfqq; } -@@ -3316,37 +3793,21 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd, - bic->ttime.ttime_samples; + static void bfq_update_io_thinktime(struct bfq_data *bfqd, + struct bfq_io_cq *bic) + { +- unsigned long elapsed = jiffies - bic->ttime.last_end_request; +- unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle); ++ struct bfq_ttime *ttime = &bic->ttime; ++ u64 elapsed = ktime_get_ns() - bic->ttime.last_end_request; ++ ++ elapsed = min_t(u64, elapsed, 2 * bfqd->bfq_slice_idle); + +- bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8; +- bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8; +- bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) / +- bic->ttime.ttime_samples; ++ ttime->ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8; ++ ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8); ++ ttime->ttime_mean = div64_ul(ttime->ttime_total + 128, ++ ttime->ttime_samples); } -static void bfq_update_io_seektime(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct request *rq) --{ ++static void ++bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, ++ struct request *rq) + { - sector_t sdist; - u64 total; - @@ -4060,32 +4635,22 @@ index d1f648d..3bc1f8b 100644 - sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*1024); - else - sdist = min(sdist, (bfqq->seek_mean * 4) + 2*1024*64); - +- - bfqq->seek_samples = (7*bfqq->seek_samples + 256) / 8; - bfqq->seek_total = (7*bfqq->seek_total + (u64)256*sdist) / 8; - total = bfqq->seek_total + (bfqq->seek_samples/2); - do_div(total, bfqq->seek_samples); - bfqq->seek_mean = (sector_t)total; -+static void -+bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, -+ struct request *rq) -+{ -+ sector_t sdist = 0; -+ if (bfqq->last_request_pos) { -+ if (bfqq->last_request_pos < blk_rq_pos(rq)) -+ sdist = blk_rq_pos(rq) - bfqq->last_request_pos; -+ else -+ sdist = bfqq->last_request_pos - blk_rq_pos(rq); -+ } - +- - bfq_log_bfqq(bfqd, bfqq, "dist=%llu mean=%llu", (u64)sdist, - (u64)bfqq->seek_mean); + bfqq->seek_history <<= 1; -+ bfqq->seek_history |= (sdist > BFQQ_SEEK_THR); ++ bfqq->seek_history |= ++ get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR; } /* -@@ -3364,7 +3825,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, +@@ -3369,7 +4125,8 @@ static void bfq_update_idle_window(struct bfq_data *bfqd, return; /* Idle window just restored, statistics are meaningless. */ @@ -4095,7 +4660,7 @@ index d1f648d..3bc1f8b 100644 return; enable_idle = bfq_bfqq_idle_window(bfqq); -@@ -3404,22 +3866,13 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -3409,22 +4166,13 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_update_io_thinktime(bfqd, bic); bfq_update_io_seektime(bfqd, bfqq, rq); @@ -4114,13 +4679,13 @@ index d1f648d..3bc1f8b 100644 bfq_log_bfqq(bfqd, bfqq, - "rq_enqueued: idle_window=%d (seeky %d, mean %llu)", - bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq), -- (long long unsigned)bfqq->seek_mean); +- (unsigned long long) bfqq->seek_mean); + "rq_enqueued: idle_window=%d (seeky %d)", + bfq_bfqq_idle_window(bfqq), BFQQ_SEEKY(bfqq)); bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); -@@ -3433,14 +3886,15 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -3438,14 +4186,15 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, * is small and the queue is not to be expired, then * just exit. * @@ -4144,17 +4709,19 @@ index d1f648d..3bc1f8b 100644 */ if (small_req && !budget_timeout) return; -@@ -3453,9 +3907,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -3457,10 +4206,8 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, + * timer. */ bfq_clear_bfqq_wait_request(bfqq); - del_timer(&bfqd->idle_slice_timer); +- del_timer(&bfqd->idle_slice_timer); -#ifdef CONFIG_BFQ_GROUP_IOSCHED ++ hrtimer_try_to_cancel(&bfqd->idle_slice_timer); bfqg_stats_update_idle_time(bfqq_group(bfqq)); -#endif /* * The queue is not empty, because a new request just -@@ -3499,27 +3951,19 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq) +@@ -3504,28 +4251,21 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq) */ new_bfqq->allocated[rq_data_dir(rq)]++; bfqq->allocated[rq_data_dir(rq)]--; @@ -4182,10 +4749,13 @@ index d1f648d..3bc1f8b 100644 - */ - if (bfqq->bic) - bfqq->bic->wr_time_left = 0; - rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; +- rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)]; ++ rq->fifo_time = ktime_get_ns() + ++ jiffies_to_nsecs(bfqd->bfq_fifo_expire[rq_is_sync(rq)]); list_add_tail(&rq->queuelist, &bfqq->fifo); -@@ -3528,8 +3972,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq) + bfq_rq_enqueued(bfqd, bfqq, rq); +@@ -3533,8 +4273,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq) static void bfq_update_hw_tag(struct bfq_data *bfqd) { @@ -4196,11 +4766,13 @@ index d1f648d..3bc1f8b 100644 if (bfqd->hw_tag == 1) return; -@@ -3555,48 +3999,45 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) +@@ -3560,48 +4300,85 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) { struct bfq_queue *bfqq = RQ_BFQQ(rq); struct bfq_data *bfqd = bfqq->bfqd; - bool sync = bfq_bfqq_sync(bfqq); ++ u64 now_ns; ++ u32 delta_us; - bfq_log_bfqq(bfqd, bfqq, "completed one req with %u sects left (%d)", - blk_rq_sectors(rq), sync); @@ -4217,8 +4789,10 @@ index d1f648d..3bc1f8b 100644 -#ifdef CONFIG_BFQ_GROUP_IOSCHED bfqg_stats_update_completion(bfqq_group(bfqq), rq_start_time_ns(rq), - rq_io_start_time_ns(rq), rq->cmd_flags); +- rq_io_start_time_ns(rq), rq->cmd_flags); -#endif ++ rq_io_start_time_ns(rq), req_op(rq), ++ rq->cmd_flags); if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { + BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); @@ -4247,7 +4821,44 @@ index d1f648d..3bc1f8b 100644 - bfqd->sync_flight--; - RQ_BIC(rq)->ttime.last_end_request = jiffies; - } -+ RQ_BIC(rq)->ttime.last_end_request = jiffies; ++ now_ns = ktime_get_ns(); ++ ++ RQ_BIC(rq)->ttime.last_end_request = now_ns; ++ ++ /* ++ * Using us instead of ns, to get a reasonable precision in ++ * computing rate in next check. ++ */ ++ delta_us = div_u64(now_ns - bfqd->last_completion, NSEC_PER_USEC); ++ ++ bfq_log(bfqd, "rq_completed: delta %uus/%luus max_size %u rate %llu/%llu", ++ delta_us, BFQ_MIN_TT/NSEC_PER_USEC, bfqd->last_rq_max_size, ++ (USEC_PER_SEC* ++ (u64)((bfqd->last_rq_max_size<>BFQ_RATE_SHIFT, ++ (USEC_PER_SEC*(u64)(1UL<<(BFQ_RATE_SHIFT-10)))>>BFQ_RATE_SHIFT); ++ ++ /* ++ * If the request took rather long to complete, and, according ++ * to the maximum request size recorded, this completion latency ++ * implies that the request was certainly served at a very low ++ * rate (less than 1M sectors/sec), then the whole observation ++ * interval that lasts up to this time instant cannot be a ++ * valid time interval for computing a new peak rate. Invoke ++ * bfq_update_rate_reset to have the following three steps ++ * taken: ++ * - close the observation interval at the last (previous) ++ * request dispatch or completion ++ * - compute rate, if possible, for that observation interval ++ * - reset to zero samples, which will trigger a proper ++ * re-initialization of the observation interval on next ++ * dispatch ++ */ ++ if (delta_us > BFQ_MIN_TT/NSEC_PER_USEC && ++ (bfqd->last_rq_max_size<last_completion = now_ns; /* - * If we are waiting to discover whether the request pattern of the @@ -4265,7 +4876,7 @@ index d1f648d..3bc1f8b 100644 */ if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 && RB_EMPTY_ROOT(&bfqq->sort_list)) -@@ -3608,10 +4049,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) +@@ -3613,10 +4390,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq) * or if we want to idle in case it has no pending requests. */ if (bfqd->in_service_queue == bfqq) { @@ -4277,7 +4888,25 @@ index d1f648d..3bc1f8b 100644 bfq_arm_slice_timer(bfqd); goto out; } else if (bfq_may_expire_for_budg_timeout(bfqq)) -@@ -3682,14 +4120,14 @@ static void bfq_put_request(struct request *rq) +@@ -3646,7 +4420,7 @@ static int __bfq_may_queue(struct bfq_queue *bfqq) + return ELV_MQUEUE_MAY; + } + +-static int bfq_may_queue(struct request_queue *q, int rw) ++static int bfq_may_queue(struct request_queue *q, int op, int op_flags) + { + struct bfq_data *bfqd = q->elevator->elevator_data; + struct task_struct *tsk = current; +@@ -3663,7 +4437,7 @@ static int bfq_may_queue(struct request_queue *q, int rw) + if (!bic) + return ELV_MQUEUE_MAY; + +- bfqq = bic_to_bfqq(bic, rw_is_sync(rw)); ++ bfqq = bic_to_bfqq(bic, rw_is_sync(op, op_flags)); + if (bfqq) + return __bfq_may_queue(bfqq); + +@@ -3687,14 +4461,14 @@ static void bfq_put_request(struct request *rq) rq->elv.priv[1] = NULL; bfq_log_bfqq(bfqq->bfqd, bfqq, "put_request %p, %d", @@ -4294,7 +4923,7 @@ index d1f648d..3bc1f8b 100644 */ static struct bfq_queue * bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq) -@@ -3727,11 +4165,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, +@@ -3732,11 +4506,8 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, unsigned long flags; bool split = false; @@ -4307,7 +4936,7 @@ index d1f648d..3bc1f8b 100644 if (!bic) goto queue_fail; -@@ -3741,23 +4176,47 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, +@@ -3746,23 +4517,47 @@ static int bfq_set_request(struct request_queue *q, struct request *rq, new_queue: bfqq = bic_to_bfqq(bic, is_sync); if (!bfqq || bfqq == &bfqd->oom_bfqq) { @@ -4335,18 +4964,14 @@ index d1f648d..3bc1f8b 100644 + "large burst"); bfq_mark_bfqq_in_large_burst(bfqq); - else { -- bfq_clear_bfqq_in_large_burst(bfqq); -- if (bic->was_in_burst_list) -- hlist_add_head(&bfqq->burst_list_node, -- &bfqd->burst_list); + } else { + bfq_log_bfqq(bfqd, bfqq, + "set_request: clearing in " + "large burst"); -+ bfq_clear_bfqq_in_large_burst(bfqq); -+ if (bic->was_in_burst_list) -+ hlist_add_head(&bfqq->burst_list_node, -+ &bfqd->burst_list); + bfq_clear_bfqq_in_large_burst(bfqq); + if (bic->was_in_burst_list) + hlist_add_head(&bfqq->burst_list_node, + &bfqd->burst_list); } + bfqq->split_time = jiffies; } @@ -4362,7 +4987,7 @@ index d1f648d..3bc1f8b 100644 bfqq = bfq_split_bfqq(bic, bfqq); split = true; if (!bfqq) -@@ -3766,9 +4225,8 @@ new_queue: +@@ -3771,9 +4566,8 @@ new_queue: } bfqq->allocated[rw]++; @@ -4374,7 +4999,7 @@ index d1f648d..3bc1f8b 100644 rq->elv.priv[0] = bic; rq->elv.priv[1] = bfqq; -@@ -3783,7 +4241,6 @@ new_queue: +@@ -3788,7 +4582,6 @@ new_queue: if (likely(bfqq != &bfqd->oom_bfqq) && bfqq_process_refs(bfqq) == 1) { bfqq->bic = bic; if (split) { @@ -4382,7 +5007,7 @@ index d1f648d..3bc1f8b 100644 /* * If the queue has just been split from a shared * queue, restore the idle window and the possible -@@ -3793,6 +4250,9 @@ new_queue: +@@ -3798,6 +4591,9 @@ new_queue: } } @@ -4392,7 +5017,39 @@ index d1f648d..3bc1f8b 100644 spin_unlock_irqrestore(q->queue_lock, flags); return 0; -@@ -3872,6 +4332,7 @@ static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) +@@ -3824,9 +4620,10 @@ static void bfq_kick_queue(struct work_struct *work) + * Handler of the expiration of the timer running if the in-service queue + * is idling inside its time slice. + */ +-static void bfq_idle_slice_timer(unsigned long data) ++static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) + { +- struct bfq_data *bfqd = (struct bfq_data *)data; ++ struct bfq_data *bfqd = container_of(timer, struct bfq_data, ++ idle_slice_timer); + struct bfq_queue *bfqq; + unsigned long flags; + enum bfqq_expiration reason; +@@ -3844,6 +4641,8 @@ static void bfq_idle_slice_timer(unsigned long data) + */ + if (bfqq) { + bfq_log_bfqq(bfqd, bfqq, "slice_timer expired"); ++ bfq_clear_bfqq_wait_request(bfqq); ++ + if (bfq_bfqq_budget_timeout(bfqq)) + /* + * Also here the queue can be safely expired +@@ -3869,14 +4668,16 @@ schedule_dispatch: + bfq_schedule_dispatch(bfqd); + + spin_unlock_irqrestore(bfqd->queue->queue_lock, flags); ++ return HRTIMER_NORESTART; + } + + static void bfq_shutdown_timer_wq(struct bfq_data *bfqd) + { +- del_timer_sync(&bfqd->idle_slice_timer); ++ hrtimer_cancel(&bfqd->idle_slice_timer); cancel_work_sync(&bfqd->unplug_work); } @@ -4400,7 +5057,7 @@ index d1f648d..3bc1f8b 100644 static void __bfq_put_async_bfqq(struct bfq_data *bfqd, struct bfq_queue **bfqq_ptr) { -@@ -3880,9 +4341,9 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd, +@@ -3885,9 +4686,9 @@ static void __bfq_put_async_bfqq(struct bfq_data *bfqd, bfq_log(bfqd, "put_async_bfqq: %p", bfqq); if (bfqq) { @@ -4412,7 +5069,7 @@ index d1f648d..3bc1f8b 100644 bfq_put_queue(bfqq); *bfqq_ptr = NULL; } -@@ -3904,6 +4365,7 @@ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) +@@ -3909,6 +4710,7 @@ static void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg) __bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq); } @@ -4420,16 +5077,18 @@ index d1f648d..3bc1f8b 100644 static void bfq_exit_queue(struct elevator_queue *e) { -@@ -3923,8 +4385,6 @@ static void bfq_exit_queue(struct elevator_queue *e) +@@ -3928,9 +4730,7 @@ static void bfq_exit_queue(struct elevator_queue *e) bfq_shutdown_timer_wq(bfqd); - synchronize_rcu(); - - BUG_ON(timer_pending(&bfqd->idle_slice_timer)); +- BUG_ON(timer_pending(&bfqd->idle_slice_timer)); ++ BUG_ON(hrtimer_active(&bfqd->idle_slice_timer)); #ifdef CONFIG_BFQ_GROUP_IOSCHED -@@ -3973,11 +4433,14 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) + blkcg_deactivate_policy(q, &blkcg_policy_bfq); +@@ -3978,11 +4778,14 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) * will not attempt to free it. */ bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0); @@ -4445,7 +5104,7 @@ index d1f648d..3bc1f8b 100644 /* * Trigger weight initialization, according to ioprio, at the * oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio -@@ -3996,9 +4459,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) +@@ -4001,13 +4804,10 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) goto out_free; bfq_init_root_group(bfqd->root_group, bfqd); bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group); @@ -4453,9 +5112,15 @@ index d1f648d..3bc1f8b 100644 - bfqd->active_numerous_groups = 0; -#endif - init_timer(&bfqd->idle_slice_timer); +- init_timer(&bfqd->idle_slice_timer); ++ hrtimer_init(&bfqd->idle_slice_timer, CLOCK_MONOTONIC, ++ HRTIMER_MODE_REL); bfqd->idle_slice_timer.function = bfq_idle_slice_timer; -@@ -4023,20 +4483,19 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) +- bfqd->idle_slice_timer.data = (unsigned long)bfqd; + + bfqd->queue_weights_tree = RB_ROOT; + bfqd->group_weights_tree = RB_ROOT; +@@ -4028,20 +4828,19 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->bfq_back_penalty = bfq_back_penalty; bfqd->bfq_slice_idle = bfq_slice_idle; bfqd->bfq_class_idle_last_service = 0; @@ -4483,7 +5148,7 @@ index d1f648d..3bc1f8b 100644 bfqd->bfq_wr_rt_max_time = msecs_to_jiffies(300); bfqd->bfq_wr_max_time = 0; bfqd->bfq_wr_min_idle_time = msecs_to_jiffies(2000); -@@ -4048,16 +4507,15 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) +@@ -4053,16 +4852,15 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) * video. */ bfqd->wr_busy_queues = 0; @@ -4504,9 +5169,36 @@ index d1f648d..3bc1f8b 100644 bfqd->device_speed = BFQ_BFQD_FAST; return 0; -@@ -4161,10 +4619,8 @@ SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); +@@ -4088,7 +4886,7 @@ static int __init bfq_slab_setup(void) + + static ssize_t bfq_var_show(unsigned int var, char *page) + { +- return sprintf(page, "%d\n", var); ++ return sprintf(page, "%u\n", var); + } + + static ssize_t bfq_var_store(unsigned long *var, const char *page, +@@ -4159,21 +4957,21 @@ static ssize_t bfq_weights_show(struct elevator_queue *e, char *page) + static ssize_t __FUNC(struct elevator_queue *e, char *page) \ + { \ + struct bfq_data *bfqd = e->elevator_data; \ +- unsigned int __data = __VAR; \ +- if (__CONV) \ ++ u64 __data = __VAR; \ ++ if (__CONV == 1) \ + __data = jiffies_to_msecs(__data); \ ++ else if (__CONV == 2) \ ++ __data = div_u64(__data, NSEC_PER_MSEC); \ + return bfq_var_show(__data, (page)); \ + } +-SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1); +-SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1); ++SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 2); ++SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 2); + SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0); SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0); - SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); +-SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1); ++SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 2); SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0); -SHOW_FUNCTION(bfq_max_budget_async_rq_show, - bfqd->bfq_max_budget_async_rq, 0); @@ -4517,52 +5209,129 @@ index d1f648d..3bc1f8b 100644 SHOW_FUNCTION(bfq_low_latency_show, bfqd->low_latency, 0); SHOW_FUNCTION(bfq_wr_coeff_show, bfqd->bfq_wr_coeff, 0); SHOW_FUNCTION(bfq_wr_rt_max_time_show, bfqd->bfq_wr_rt_max_time, 1); -@@ -4199,10 +4655,6 @@ STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); +@@ -4183,6 +4981,17 @@ SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async, + SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0); + #undef SHOW_FUNCTION + ++#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \ ++static ssize_t __FUNC(struct elevator_queue *e, char *page) \ ++{ \ ++ struct bfq_data *bfqd = e->elevator_data; \ ++ u64 __data = __VAR; \ ++ __data = div_u64(__data, NSEC_PER_USEC); \ ++ return bfq_var_show(__data, (page)); \ ++} ++USEC_SHOW_FUNCTION(bfq_slice_idle_us_show, bfqd->bfq_slice_idle); ++#undef USEC_SHOW_FUNCTION ++ + #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ + static ssize_t \ + __FUNC(struct elevator_queue *e, const char *page, size_t count) \ +@@ -4194,24 +5003,22 @@ __FUNC(struct elevator_queue *e, const char *page, size_t count) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ +- if (__CONV) \ ++ if (__CONV == 1) \ + *(__PTR) = msecs_to_jiffies(__data); \ ++ else if (__CONV == 2) \ ++ *(__PTR) = (u64)__data * NSEC_PER_MSEC; \ + else \ + *(__PTR) = __data; \ + return ret; \ + } + STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1, +- INT_MAX, 1); ++ INT_MAX, 2); + STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1, +- INT_MAX, 1); ++ INT_MAX, 2); + STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0); STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1, INT_MAX, 0); - STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); +-STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1); -STORE_FUNCTION(bfq_max_budget_async_rq_store, &bfqd->bfq_max_budget_async_rq, - 1, INT_MAX, 0); -STORE_FUNCTION(bfq_timeout_async_store, &bfqd->bfq_timeout[BLK_RW_ASYNC], 0, - INT_MAX, 1); ++STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 2); STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0); STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1); STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX, -@@ -4224,10 +4676,8 @@ static ssize_t bfq_weights_store(struct elevator_queue *e, +@@ -4224,6 +5031,23 @@ STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0, + INT_MAX, 0); + #undef STORE_FUNCTION - static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) - { ++#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ ++static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\ ++{ \ ++ struct bfq_data *bfqd = e->elevator_data; \ ++ unsigned long __data; \ ++ int ret = bfq_var_store(&__data, (page), count); \ ++ if (__data < (MIN)) \ ++ __data = (MIN); \ ++ else if (__data > (MAX)) \ ++ __data = (MAX); \ ++ *(__PTR) = (u64)__data * NSEC_PER_USEC; \ ++ return ret; \ ++} ++USEC_STORE_FUNCTION(bfq_slice_idle_us_store, &bfqd->bfq_slice_idle, 0, ++ UINT_MAX); ++#undef USEC_STORE_FUNCTION ++ + /* do nothing for the moment */ + static ssize_t bfq_weights_store(struct elevator_queue *e, + const char *page, size_t count) +@@ -4231,16 +5055,6 @@ static ssize_t bfq_weights_store(struct elevator_queue *e, + return count; + } + +-static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd) +-{ - u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]); - - if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) +- if (bfqd->peak_rate_samples >= BFQ_PEAK_RATE_SAMPLES) - return bfq_calc_max_budget(bfqd->peak_rate, timeout); -+ return bfq_calc_max_budget(bfqd); - else - return bfq_default_max_budget; - } -@@ -4252,6 +4702,10 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e, +- else +- return bfq_default_max_budget; +-} +- + static ssize_t bfq_max_budget_store(struct elevator_queue *e, + const char *page, size_t count) + { +@@ -4249,7 +5063,7 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e, + int ret = bfq_var_store(&__data, (page), count); + + if (__data == 0) +- bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); ++ bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd); + else { + if (__data > INT_MAX) + __data = INT_MAX; +@@ -4261,6 +5075,10 @@ static ssize_t bfq_max_budget_store(struct elevator_queue *e, return ret; } -+/* ++/* + * Leaving this name to preserve name compatibility with cfq + * parameters, but this timeout is used for both sync and async. + */ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, const char *page, size_t count) { -@@ -4264,13 +4718,31 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, +@@ -4273,9 +5091,27 @@ static ssize_t bfq_timeout_sync_store(struct elevator_queue *e, else if (__data > INT_MAX) __data = INT_MAX; - bfqd->bfq_timeout[BLK_RW_SYNC] = msecs_to_jiffies(__data); + bfqd->bfq_timeout = msecs_to_jiffies(__data); if (bfqd->bfq_user_max_budget == 0) - bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); - - return ret; - } - +- bfqd->bfq_max_budget = bfq_estimated_max_budget(bfqd); ++ bfqd->bfq_max_budget = bfq_calc_max_budget(bfqd); ++ ++ return ret; ++} ++ +static ssize_t bfq_strict_guarantees_store(struct elevator_queue *e, + const char *page, size_t count) +{ @@ -4573,20 +5342,18 @@ index d1f648d..3bc1f8b 100644 + if (__data > 1) + __data = 1; + if (!bfqd->strict_guarantees && __data == 1 -+ && bfqd->bfq_slice_idle < msecs_to_jiffies(8)) -+ bfqd->bfq_slice_idle = msecs_to_jiffies(8); ++ && bfqd->bfq_slice_idle < 8 * NSEC_PER_MSEC) ++ bfqd->bfq_slice_idle = 8 * NSEC_PER_MSEC; + + bfqd->strict_guarantees = __data; -+ -+ return ret; -+} -+ - static ssize_t bfq_low_latency_store(struct elevator_queue *e, - const char *page, size_t count) - { -@@ -4297,9 +4769,8 @@ static struct elv_fs_entry bfq_attrs[] = { + + return ret; + } +@@ -4305,10 +5141,10 @@ static struct elv_fs_entry bfq_attrs[] = { + BFQ_ATTR(back_seek_max), BFQ_ATTR(back_seek_penalty), BFQ_ATTR(slice_idle), ++ BFQ_ATTR(slice_idle_us), BFQ_ATTR(max_budget), - BFQ_ATTR(max_budget_async_rq), BFQ_ATTR(timeout_sync), @@ -4595,7 +5362,17 @@ index d1f648d..3bc1f8b 100644 BFQ_ATTR(low_latency), BFQ_ATTR(wr_coeff), BFQ_ATTR(wr_max_time), -@@ -4342,9 +4813,28 @@ static struct elevator_type iosched_bfq = { +@@ -4328,7 +5164,8 @@ static struct elevator_type iosched_bfq = { + #ifdef CONFIG_BFQ_GROUP_IOSCHED + .elevator_bio_merged_fn = bfq_bio_merged, + #endif +- .elevator_allow_merge_fn = bfq_allow_merge, ++ .elevator_allow_bio_merge_fn = bfq_allow_bio_merge, ++ .elevator_allow_rq_merge_fn = bfq_allow_rq_merge, + .elevator_dispatch_fn = bfq_dispatch_requests, + .elevator_add_req_fn = bfq_insert_request, + .elevator_activate_req_fn = bfq_activate_request, +@@ -4351,18 +5188,28 @@ static struct elevator_type iosched_bfq = { .elevator_owner = THIS_MODULE, }; @@ -4620,32 +5397,46 @@ index d1f648d..3bc1f8b 100644 static int __init bfq_init(void) { int ret; -+ char msg[50] = "BFQ I/O-scheduler: v8r3"; - - /* - * Can be 0 on HZ < 1000 setups. -@@ -4352,9 +4842,6 @@ static int __init bfq_init(void) - if (bfq_slice_idle == 0) - bfq_slice_idle = 1; - +- +- /* +- * Can be 0 on HZ < 1000 setups. +- */ +- if (bfq_slice_idle == 0) +- bfq_slice_idle = 1; +- - if (bfq_timeout_async == 0) - bfq_timeout_async = 1; -- ++ char msg[50] = "BFQ I/O-scheduler: v8r4"; + #ifdef CONFIG_BFQ_GROUP_IOSCHED ret = blkcg_policy_register(&blkcg_policy_bfq); - if (ret) -@@ -4370,23 +4857,34 @@ static int __init bfq_init(void) - * installed on the reference devices (see the comments before the - * definitions of the two arrays). +@@ -4375,27 +5222,46 @@ static int __init bfq_init(void) + goto err_pol_unreg; + + /* +- * Times to load large popular applications for the typical systems +- * installed on the reference devices (see the comments before the +- * definitions of the two arrays). ++ * Times to load large popular applications for the typical ++ * systems installed on the reference devices (see the ++ * comments before the definitions of the next two ++ * arrays). Actually, we use slightly slower values, as the ++ * estimated peak rate tends to be smaller than the actual ++ * peak rate. The reason for this last fact is that estimates ++ * are computed over much shorter time intervals than the long ++ * intervals typically used for benchmarking. Why? First, to ++ * adapt more quickly to variations. Second, because an I/O ++ * scheduler cannot rely on a peak-rate-evaluation workload to ++ * be run for a long time. */ - T_slow[0] = msecs_to_jiffies(2600); - T_slow[1] = msecs_to_jiffies(1000); - T_fast[0] = msecs_to_jiffies(5500); - T_fast[1] = msecs_to_jiffies(2000); -+ T_slow[0] = msecs_to_jiffies(3500); -+ T_slow[1] = msecs_to_jiffies(1500); -+ T_fast[0] = msecs_to_jiffies(8000); -+ T_fast[1] = msecs_to_jiffies(3000); ++ T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */ ++ T_slow[1] = msecs_to_jiffies(1000); /* actually 1.5 sec */ ++ T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */ ++ T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */ /* - * Thresholds that determine the switch between speed classes (see @@ -4679,15 +5470,17 @@ index d1f648d..3bc1f8b 100644 return 0; diff --git a/block/bfq-sched.c b/block/bfq-sched.c -index a64fec1..7d73b9d 100644 +index a5ed694..45d63d3 100644 --- a/block/bfq-sched.c +++ b/block/bfq-sched.c -@@ -7,9 +7,11 @@ +@@ -7,9 +7,13 @@ * Copyright (C) 2008 Fabio Checconi * Paolo Valente * - * Copyright (C) 2010 Paolo Valente -+ * Copyright (C) 2016 Paolo Valente ++ * Copyright (C) 2015 Paolo Valente ++ * ++ * Copyright (C) 2016 Paolo Valente */ +static struct bfq_group *bfqq_group(struct bfq_queue *bfqq); @@ -4695,7 +5488,7 @@ index a64fec1..7d73b9d 100644 #ifdef CONFIG_BFQ_GROUP_IOSCHED #define for_each_entity(entity) \ for (; entity ; entity = entity->parent) -@@ -22,8 +24,6 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, +@@ -22,8 +26,6 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, int extract, struct bfq_data *bfqd); @@ -4704,7 +5497,7 @@ index a64fec1..7d73b9d 100644 static void bfq_update_budget(struct bfq_entity *next_in_service) { struct bfq_entity *bfqg_entity; -@@ -48,6 +48,7 @@ static void bfq_update_budget(struct bfq_entity *next_in_service) +@@ -48,6 +50,7 @@ static void bfq_update_budget(struct bfq_entity *next_in_service) static int bfq_update_next_in_service(struct bfq_sched_data *sd) { struct bfq_entity *next_in_service; @@ -4712,13 +5505,13 @@ index a64fec1..7d73b9d 100644 if (sd->in_service_entity) /* will update/requeue at the end of service */ -@@ -65,14 +66,29 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd) +@@ -65,14 +68,29 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd) if (next_in_service) bfq_update_budget(next_in_service); + else + goto exit; - ++ + bfqq = bfq_entity_to_bfqq(next_in_service); + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, @@ -4727,7 +5520,7 @@ index a64fec1..7d73b9d 100644 + struct bfq_group *bfqg = + container_of(next_in_service, + struct bfq_group, entity); -+ + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "update_next_in_service: chosen this entity"); + } @@ -4743,12 +5536,12 @@ index a64fec1..7d73b9d 100644 } #else #define for_each_entity(entity) \ -@@ -151,20 +167,35 @@ static u64 bfq_delta(unsigned long service, unsigned long weight) +@@ -151,20 +169,36 @@ static u64 bfq_delta(unsigned long service, unsigned long weight) static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); -- -+ unsigned long long start, finish, delta ; ++ unsigned long long start, finish, delta; + BUG_ON(entity->weight == 0); entity->finish = entity->start + @@ -4782,7 +5575,34 @@ index a64fec1..7d73b9d 100644 } } -@@ -386,8 +417,6 @@ static void bfq_active_insert(struct bfq_service_tree *st, +@@ -293,10 +327,26 @@ static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node) + static void bfq_update_active_node(struct rb_node *node) + { + struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node); ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + + entity->min_start = entity->start; + bfq_update_min(entity, node->rb_right); + bfq_update_min(entity, node->rb_left); ++ ++ if (bfqq) { ++ bfq_log_bfqq(bfqq->bfqd, bfqq, ++ "update_active_node: new min_start %llu", ++ ((entity->min_start>>10)*1000)>>12); ++#ifdef CONFIG_BFQ_GROUP_IOSCHED ++ } else { ++ struct bfq_group *bfqg = ++ container_of(entity, struct bfq_group, entity); ++ ++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, ++ "update_active_node: new min_start %llu", ++ ((entity->min_start>>10)*1000)>>12); ++#endif ++ } + } + + /** +@@ -386,8 +436,6 @@ static void bfq_active_insert(struct bfq_service_tree *st, BUG_ON(!bfqg); BUG_ON(!bfqd); bfqg->active_entities++; @@ -4791,16 +5611,16 @@ index a64fec1..7d73b9d 100644 } #endif } -@@ -399,7 +428,7 @@ static void bfq_active_insert(struct bfq_service_tree *st, +@@ -399,7 +447,7 @@ static void bfq_active_insert(struct bfq_service_tree *st, static unsigned short bfq_ioprio_to_weight(int ioprio) { BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR); - return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - ioprio; -+ return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF ; ++ return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF; } /** -@@ -422,9 +451,9 @@ static void bfq_get_entity(struct bfq_entity *entity) +@@ -422,9 +470,9 @@ static void bfq_get_entity(struct bfq_entity *entity) struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); if (bfqq) { @@ -4812,7 +5632,7 @@ index a64fec1..7d73b9d 100644 } } -@@ -499,10 +528,6 @@ static void bfq_active_extract(struct bfq_service_tree *st, +@@ -499,10 +547,6 @@ static void bfq_active_extract(struct bfq_service_tree *st, BUG_ON(!bfqd); BUG_ON(!bfqg->active_entities); bfqg->active_entities--; @@ -4823,7 +5643,7 @@ index a64fec1..7d73b9d 100644 } #endif } -@@ -552,7 +577,7 @@ static void bfq_forget_entity(struct bfq_service_tree *st, +@@ -552,7 +596,7 @@ static void bfq_forget_entity(struct bfq_service_tree *st, if (bfqq) { sd = entity->sched_data; bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d", @@ -4832,7 +5652,7 @@ index a64fec1..7d73b9d 100644 bfq_put_queue(bfqq); } } -@@ -602,7 +627,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, +@@ -602,7 +646,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, if (entity->prio_changed) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); @@ -4841,13 +5661,9 @@ index a64fec1..7d73b9d 100644 struct bfq_data *bfqd = NULL; struct rb_root *root; #ifdef CONFIG_BFQ_GROUP_IOSCHED -@@ -628,12 +653,14 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, - if (entity->new_weight != entity->orig_weight) { - if (entity->new_weight < BFQ_MIN_WEIGHT || +@@ -630,7 +674,10 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, entity->new_weight > BFQ_MAX_WEIGHT) { -- printk(KERN_CRIT "update_weight_prio: " -- "new_weight %d\n", -+ pr_crit("update_weight_prio: new_weight %d\n", + pr_crit("update_weight_prio: new_weight %d\n", entity->new_weight); - BUG(); + if (entity->new_weight < BFQ_MIN_WEIGHT) @@ -4855,12 +5671,9 @@ index a64fec1..7d73b9d 100644 + else + entity->new_weight = BFQ_MAX_WEIGHT; } -- entity->orig_weight = entity->new_weight; -+ entity->orig_weight = entity->new_weight; + entity->orig_weight = entity->new_weight; if (bfqq) - bfqq->ioprio = - bfq_weight_to_ioprio(entity->orig_weight); -@@ -662,6 +689,13 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, +@@ -661,6 +708,13 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st, * associated with its new weight. */ if (prev_weight != new_weight) { @@ -4874,7 +5687,7 @@ index a64fec1..7d73b9d 100644 root = bfqq ? &bfqd->queue_weights_tree : &bfqd->group_weights_tree; bfq_weights_tree_remove(bfqd, entity, root); -@@ -708,7 +742,7 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) +@@ -707,7 +761,7 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) st = bfq_entity_service_tree(entity); entity->service += served; @@ -4883,7 +5696,7 @@ index a64fec1..7d73b9d 100644 BUG_ON(st->wsum == 0); st->vtime += bfq_delta(served, st->wsum); -@@ -717,31 +751,69 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) +@@ -716,31 +770,69 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, int served) #ifdef CONFIG_BFQ_GROUP_IOSCHED bfqg_stats_set_start_empty_time(bfqq_group(bfqq)); #endif @@ -4901,7 +5714,12 @@ index a64fec1..7d73b9d 100644 + * @bfqd: the device * @bfqq: the queue that needs a service update. + * @time_ms: the amount of time during which the queue has received service -+ * + * +- * When it's not possible to be fair in the service domain, because +- * a queue is not consuming its budget fast enough (the meaning of +- * fast depends on the timeout parameter), we charge it a full +- * budget. In this way we should obtain a sort of time-domain +- * fairness among all the seeky/slow queues. + * If a queue does not consume its budget fast enough, then providing + * the queue with service fairness may impair throughput, more or less + * severely. For this reason, queues that consume their budget slowly @@ -4912,12 +5730,7 @@ index a64fec1..7d73b9d 100644 + * to the amount of service that they would have received during their + * service slot if they had been fast, i.e., if their requests had + * been dispatched at a rate equal to the estimated peak rate. - * -- * When it's not possible to be fair in the service domain, because -- * a queue is not consuming its budget fast enough (the meaning of -- * fast depends on the timeout parameter), we charge it a full -- * budget. In this way we should obtain a sort of time-domain -- * fairness among all the seeky/slow queues. ++ * + * It is worth noting that time fairness can cause important + * distortions in terms of bandwidth distribution, on devices with + * internal queueing. The reason is that I/O requests dispatched @@ -4937,17 +5750,17 @@ index a64fec1..7d73b9d 100644 + if (time_ms > 0 && time_ms < timeout_ms) + tot_serv_to_charge = + (bfqd->bfq_max_budget * time_ms) / timeout_ms; -+ + +- bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); + if (tot_serv_to_charge < entity->service) + tot_serv_to_charge = entity->service; -- bfq_log_bfqq(bfqq->bfqd, bfqq, "charge_full_budget"); +- bfq_bfqq_served(bfqq, entity->budget - entity->service); + bfq_log_bfqq(bfqq->bfqd, bfqq, + "charge_time: %lu/%u ms, %d/%d/%d sectors", + time_ms, timeout_ms, entity->service, + tot_serv_to_charge, entity->budget); - -- bfq_bfqq_served(bfqq, entity->budget - entity->service); ++ + /* Increase budget to avoid inconsistencies */ + if (tot_serv_to_charge > entity->budget) + entity->budget = tot_serv_to_charge; @@ -4963,7 +5776,7 @@ index a64fec1..7d73b9d 100644 * * Called whenever an entity is activated, i.e., it is not active and one * of its children receives a new request, or has to be reactivated due to -@@ -749,11 +821,16 @@ static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) +@@ -748,11 +840,16 @@ static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq) * service received if @entity is active) of the queue to calculate its * timestamps. */ @@ -4981,7 +5794,7 @@ index a64fec1..7d73b9d 100644 if (entity == sd->in_service_entity) { BUG_ON(entity->tree); /* -@@ -771,45 +848,133 @@ static void __bfq_activate_entity(struct bfq_entity *entity) +@@ -770,45 +867,133 @@ static void __bfq_activate_entity(struct bfq_entity *entity) * old start time. */ bfq_active_extract(st, entity); @@ -5003,9 +5816,7 @@ index a64fec1..7d73b9d 100644 - st->wsum += entity->weight; - bfq_get_entity(entity); + unsigned long long min_vstart; - -- BUG_ON(entity->on_st); -- entity->on_st = 1; ++ + /* See comments on bfq_fqq_update_budg_for_activation */ + if (non_blocking_wait_rq && bfq_gt(st->vtime, entity->finish)) { + backshifted = true; @@ -5030,7 +5841,9 @@ index a64fec1..7d73b9d 100644 + entity->start = min_vstart; + st->wsum += entity->weight; + bfq_get_entity(entity); -+ + +- BUG_ON(entity->on_st); +- entity->on_st = 1; + BUG_ON(entity->on_st); + entity->on_st = 1; + } @@ -5135,7 +5948,7 @@ index a64fec1..7d73b9d 100644 sd = entity->sched_data; if (!bfq_update_next_in_service(sd)) -@@ -890,23 +1055,24 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +@@ -889,23 +1074,24 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) if (!__bfq_deactivate_entity(entity, requeue)) /* @@ -5168,7 +5981,7 @@ index a64fec1..7d73b9d 100644 */ requeue = 1; } -@@ -916,9 +1082,23 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) +@@ -915,9 +1101,23 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue) update: entity = parent; for_each_entity(entity) { @@ -5193,7 +6006,31 @@ index a64fec1..7d73b9d 100644 if (!bfq_update_next_in_service(sd)) break; } -@@ -997,10 +1177,11 @@ left: +@@ -943,7 +1143,23 @@ static void bfq_update_vtime(struct bfq_service_tree *st) + + entry = rb_entry(node, struct bfq_entity, rb_node); + if (bfq_gt(entry->min_start, st->vtime)) { ++ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entry); + st->vtime = entry->min_start; ++ ++ if (bfqq) ++ bfq_log_bfqq(bfqq->bfqd, bfqq, ++ "update_vtime: new vtime %llu %p", ++ ((st->vtime>>10)*1000)>>12, st); ++#ifdef CONFIG_BFQ_GROUP_IOSCHED ++ else { ++ struct bfq_group *bfqg = ++ container_of(entry, struct bfq_group, entity); ++ ++ bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, ++ "update_vtime: new vtime %llu %p", ++ ((st->vtime>>10)*1000)>>12, st); ++ } ++#endif + bfq_forget_idle(st); + } + } +@@ -996,10 +1212,11 @@ left: * Update the virtual time in @st and return the first eligible entity * it contains. */ @@ -5207,7 +6044,7 @@ index a64fec1..7d73b9d 100644 if (RB_EMPTY_ROOT(&st->active)) return NULL; -@@ -1009,6 +1190,24 @@ static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, +@@ -1008,6 +1225,24 @@ static struct bfq_entity *__bfq_lookup_next_entity(struct bfq_service_tree *st, entity = bfq_first_active_entity(st); BUG_ON(bfq_gt(entity->start, st->vtime)); @@ -5232,9 +6069,16 @@ index a64fec1..7d73b9d 100644 /* * If the chosen entity does not match with the sched_data's * next_in_service and we are forcedly serving the IDLE priority -@@ -1045,10 +1244,28 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, +@@ -1043,11 +1278,36 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, + BUG_ON(sd->in_service_entity); ++ /* ++ * Choose from idle class, if needed to guarantee a minimum ++ * bandwidth to this class. This should also mitigate ++ * priority-inversion problems in case a low priority task is ++ * holding file system resources. ++ */ if (bfqd && - jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) { + jiffies - bfqd->bfq_class_idle_last_service > @@ -5243,11 +6087,12 @@ index a64fec1..7d73b9d 100644 true); if (entity) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ + if (bfqq) + bfq_log_bfqq(bfqd, bfqq, + "idle chosen from st %p %d", + st + BFQ_IOPRIO_CLASSES - 1, -+ BFQ_IOPRIO_CLASSES - 1) ; ++ BFQ_IOPRIO_CLASSES - 1); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = @@ -5256,22 +6101,23 @@ index a64fec1..7d73b9d 100644 + bfq_log_bfqg(bfqd, bfqg, + "idle chosen from st %p %d", + st + BFQ_IOPRIO_CLASSES - 1, -+ BFQ_IOPRIO_CLASSES - 1) ; ++ BFQ_IOPRIO_CLASSES - 1); + } +#endif i = BFQ_IOPRIO_CLASSES - 1; bfqd->bfq_class_idle_last_service = jiffies; sd->next_in_service = entity; -@@ -1057,6 +1274,24 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, +@@ -1056,6 +1316,25 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, for (; i < BFQ_IOPRIO_CLASSES; i++) { entity = __bfq_lookup_next_entity(st + i, false); if (entity) { + if (bfqd != NULL) { + struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); ++ + if (bfqq) + bfq_log_bfqq(bfqd, bfqq, + "chosen from st %p %d", -+ st + i, i) ; ++ st + i, i); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = @@ -5279,7 +6125,7 @@ index a64fec1..7d73b9d 100644 + + bfq_log_bfqg(bfqd, bfqg, + "chosen from st %p %d", -+ st + i, i) ; ++ st + i, i); + } +#endif + } @@ -5287,7 +6133,7 @@ index a64fec1..7d73b9d 100644 if (extract) { bfq_check_next_in_service(sd, entity); bfq_active_extract(st + i, entity); -@@ -1070,6 +1305,13 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, +@@ -1069,6 +1348,13 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, return entity; } @@ -5301,7 +6147,7 @@ index a64fec1..7d73b9d 100644 /* * Get next queue for service. */ -@@ -1086,7 +1328,36 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) +@@ -1085,7 +1371,36 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) sd = &bfqd->root_group->sched_data; for (; sd ; sd = entity->my_sched_data) { @@ -5338,7 +6184,18 @@ index a64fec1..7d73b9d 100644 BUG_ON(!entity); entity->service = 0; } -@@ -1113,9 +1384,7 @@ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -1103,8 +1418,9 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) + bfqd->in_service_bic = NULL; + } + ++ bfq_clear_bfqq_wait_request(bfqd->in_service_queue); ++ hrtimer_try_to_cancel(&bfqd->idle_slice_timer); + bfqd->in_service_queue = NULL; +- del_timer(&bfqd->idle_slice_timer); + } + + static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -1112,9 +1428,7 @@ static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; @@ -5349,7 +6206,7 @@ index a64fec1..7d73b9d 100644 bfq_deactivate_entity(entity, requeue); } -@@ -1123,12 +1392,11 @@ static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -1122,12 +1436,11 @@ static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct bfq_entity *entity = &bfqq->entity; @@ -5364,7 +6221,7 @@ index a64fec1..7d73b9d 100644 /* * Called when the bfqq no longer has requests pending, remove it from -@@ -1139,6 +1407,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -1138,6 +1451,7 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, { BUG_ON(!bfq_bfqq_busy(bfqq)); BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list)); @@ -5372,7 +6229,7 @@ index a64fec1..7d73b9d 100644 bfq_log_bfqq(bfqd, bfqq, "del from busy"); -@@ -1147,27 +1416,20 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, +@@ -1146,27 +1460,20 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq, BUG_ON(bfqd->busy_queues == 0); bfqd->busy_queues--; @@ -5397,16 +6254,16 @@ index a64fec1..7d73b9d 100644 -#ifdef CONFIG_BFQ_GROUP_IOSCHED bfqg_stats_update_dequeue(bfqq_group(bfqq)); -#endif - ++ + BUG_ON(bfqq->entity.budget < 0); -+ + bfq_deactivate_bfqq(bfqd, bfqq, requeue); + + BUG_ON(bfqq->entity.budget < 0); } /* -@@ -1185,16 +1447,11 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) +@@ -1184,16 +1491,11 @@ static void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_mark_bfqq_busy(bfqq); bfqd->busy_queues++; @@ -5426,17 +6283,28 @@ index a64fec1..7d73b9d 100644 bfqd->wr_busy_queues++; } diff --git a/block/bfq.h b/block/bfq.h -index f73c942..49d28b9 100644 +index fcce855..ea1e7d8 100644 --- a/block/bfq.h +++ b/block/bfq.h @@ -1,5 +1,5 @@ /* - * BFQ-v7r11 for 4.5.0: data structures and common functions prototypes. -+ * BFQ-v8r3 for 4.7.0: data structures and common functions prototypes. ++ * BFQ-v8r4 for 4.8.0: data structures and common functions prototypes. * * Based on ideas and code from CFQ: * Copyright (C) 2003 Jens Axboe -@@ -28,20 +28,21 @@ +@@ -7,7 +7,9 @@ + * Copyright (C) 2008 Fabio Checconi + * Paolo Valente + * +- * Copyright (C) 2010 Paolo Valente ++ * Copyright (C) 2015 Paolo Valente ++ * ++ * Copyright (C) 2016 Paolo Valente + */ + + #ifndef _BFQ_H +@@ -28,20 +30,21 @@ #define BFQ_DEFAULT_QUEUE_IOPRIO 4 @@ -5465,7 +6333,7 @@ index f73c942..49d28b9 100644 * * Each service tree represents a B-WF2Q+ scheduler on its own. Each * ioprio_class has its own independent scheduler, and so its own -@@ -49,27 +50,28 @@ struct bfq_entity; +@@ -49,27 +52,28 @@ struct bfq_entity; * of the containing bfqd. */ struct bfq_service_tree { @@ -5476,8 +6344,8 @@ index f73c942..49d28b9 100644 - struct bfq_entity *first_idle; - struct bfq_entity *last_idle; -+ struct bfq_entity *first_idle; /* idle entity with minimum F_i */ -+ struct bfq_entity *last_idle; /* idle entity with maximum F_i */ ++ struct bfq_entity *first_idle; /* idle entity with minimum F_i */ ++ struct bfq_entity *last_idle; /* idle entity with maximum F_i */ - u64 vtime; + u64 vtime; /* scheduler virtual time */ @@ -5504,7 +6372,7 @@ index f73c942..49d28b9 100644 * * The supported ioprio_classes are the same as in CFQ, in descending * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. -@@ -79,48 +81,29 @@ struct bfq_service_tree { +@@ -79,48 +83,29 @@ struct bfq_service_tree { * All the fields are protected by the queue lock of the containing bfqd. */ struct bfq_sched_data { @@ -5562,7 +6430,7 @@ index f73c942..49d28b9 100644 * * A bfq_entity is used to represent either a bfq_queue (leaf node in the * cgroup hierarchy) or a bfq_group into the upper level scheduler. Each -@@ -147,27 +130,52 @@ struct bfq_weight_counter { +@@ -147,27 +132,52 @@ struct bfq_weight_counter { * containing bfqd. */ struct bfq_entity { @@ -5600,7 +6468,7 @@ index f73c942..49d28b9 100644 + /* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */ + int budget; + -+ unsigned int weight; /* weight of the queue */ ++ unsigned int weight; /* weight of the queue */ + unsigned int new_weight; /* next weight if a change is in progress */ + + /* original weight, used to implement weight boosting */ @@ -5621,7 +6489,7 @@ index f73c942..49d28b9 100644 int prio_changed; }; -@@ -175,56 +183,6 @@ struct bfq_group; +@@ -175,56 +185,6 @@ struct bfq_group; /** * struct bfq_queue - leaf schedulable entity. @@ -5678,7 +6546,7 @@ index f73c942..49d28b9 100644 * * A bfq_queue is a leaf request queue; it can be associated with an * io_context or more, if it is async or shared between cooperating -@@ -235,117 +193,163 @@ struct bfq_group; +@@ -235,117 +195,174 @@ struct bfq_group; * All the fields are protected by the queue lock of the containing bfqd. */ struct bfq_queue { @@ -5794,6 +6662,10 @@ index f73c942..49d28b9 100644 + * last transition from idle to backlogged. + */ unsigned long service_from_backlogged; ++ /* ++ * Value of wr start time when switching to soft rt ++ */ ++ unsigned long wr_start_at_switch_to_srt; + + unsigned long split_time; /* time of last split */ }; @@ -5806,11 +6678,11 @@ index f73c942..49d28b9 100644 */ struct bfq_ttime { - unsigned long last_end_request; -+ unsigned long last_end_request; /* completion time of last request */ ++ u64 last_end_request; /* completion time of last request */ + -+ unsigned long ttime_total; /* total process thinktime */ ++ u64 ttime_total; /* total process thinktime */ + unsigned long ttime_samples; /* number of thinktime samples */ -+ unsigned long ttime_mean; /* average process thinktime */ ++ u64 ttime_mean; /* average process thinktime */ - unsigned long ttime_total; - unsigned long ttime_samples; @@ -5883,13 +6755,19 @@ index f73c942..49d28b9 100644 + * with another cooperating queue. + */ bool was_in_burst_list; -- + - unsigned int cooperations; - unsigned int failed_cooperations; ++ /* ++ * Similar to previous fields: save wr information. ++ */ ++ unsigned long saved_wr_coeff; ++ unsigned long saved_last_wr_start_finish; ++ unsigned long saved_wr_start_at_switch_to_srt; }; enum bfq_device_speed { -@@ -354,224 +358,216 @@ enum bfq_device_speed { +@@ -354,224 +371,234 @@ enum bfq_device_speed { }; /** @@ -6000,10 +6878,10 @@ index f73c942..49d28b9 100644 - * @last_ins_in_burst. - * @burst_size: number of queues in the current burst of queue activations. - * @bfq_large_burst_thresh: maximum burst size above which the current -- * queue-activation burst is deemed as 'large'. +- * queue-activation burst is deemed as 'large'. - * @large_burst: true if a large queue-activation burst is in progress. - * @burst_list: head of the burst list (as for the above fields, more details -- * in the comments to the function bfq_handle_burst). +- * in the comments to the function bfq_handle_burst). - * @low_latency: if set to true, low-latency heuristics are enabled. - * @bfq_wr_coeff: maximum factor by which the weight of a weight-raised - * queue is multiplied. @@ -6083,11 +6961,12 @@ index f73c942..49d28b9 100644 + /* number of budgets assigned */ int budgets_assigned; +- struct timer_list idle_slice_timer; + /* + * Timer set when idling (waiting) for the next request from + * the queue in service. + */ - struct timer_list idle_slice_timer; ++ struct hrtimer idle_slice_timer; + /* delayed work to restart dispatching on the request queue */ struct work_struct unplug_work; @@ -6099,14 +6978,33 @@ index f73c942..49d28b9 100644 + /* on-disk position of the last served request */ sector_t last_position; ++ /* time of last request completion (ns) */ ++ u64 last_completion; ++ ++ /* time of first rq dispatch in current observation interval (ns) */ ++ u64 first_dispatch; ++ /* time of last rq dispatch in current observation interval (ns) */ ++ u64 last_dispatch; ++ + /* beginning of the last budget */ ktime_t last_budget_start; + /* beginning of the last idle slice */ ktime_t last_idling_start; -+ /* number of samples used to calculate @peak_rate */ ++ ++ /* number of samples in current observation interval */ int peak_rate_samples; -+ /* peak transfer rate observed for a budget */ - u64 peak_rate; +- u64 peak_rate; ++ /* num of samples of seq dispatches in current observation interval */ ++ u32 sequential_samples; ++ /* total num of sectors transferred in current observation interval */ ++ u64 tot_sectors_dispatched; ++ /* max rq size seen during current observation interval (sectors) */ ++ u32 last_rq_max_size; ++ /* time elapsed from first dispatch in current observ. interval (us) */ ++ u64 delta_from_first; ++ /* current estimate of device peak rate */ ++ u32 peak_rate; ++ + /* maximum budget allotted to a bfq_queue before rescheduling */ int bfq_max_budget; @@ -6115,17 +7013,19 @@ index f73c942..49d28b9 100644 + /* list of all the bfq_queues idle on the device */ struct list_head idle_list; +- unsigned int bfq_fifo_expire[2]; + /* + * Timeout for async/sync requests; when it fires, requests + * are served in fifo order. + */ - unsigned int bfq_fifo_expire[2]; ++ u64 bfq_fifo_expire[2]; + /* weight of backward seeks wrt forward ones */ unsigned int bfq_back_penalty; + /* maximum allowed backward seek */ unsigned int bfq_back_max; +- unsigned int bfq_slice_idle; + /* maximum idling time */ - unsigned int bfq_slice_idle; ++ u32 bfq_slice_idle; + /* last time CLASS_IDLE was served */ u64 bfq_class_idle_last_service; @@ -6250,7 +7150,7 @@ index f73c942..49d28b9 100644 BFQ_BFQQ_FLAG_IO_bound, /* * bfqq has timed-out at least once * having consumed at most 2/10 of -@@ -581,17 +577,12 @@ enum bfqq_state_flags { +@@ -581,17 +608,12 @@ enum bfqq_state_flags { * bfqq activated in a large burst, * see comments to bfq_handle_burst. */ @@ -6269,7 +7169,7 @@ index f73c942..49d28b9 100644 }; #define BFQ_BFQQ_FNS(name) \ -@@ -608,25 +599,53 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ +@@ -608,25 +630,53 @@ static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \ return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \ } @@ -6328,7 +7228,7 @@ index f73c942..49d28b9 100644 #define bfq_log(bfqd, fmt, args...) \ blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args) -@@ -640,15 +659,12 @@ enum bfqq_expiration { +@@ -640,15 +690,12 @@ enum bfqq_expiration { BFQ_BFQQ_BUDGET_TIMEOUT, /* budget took too long to be used */ BFQ_BFQQ_BUDGET_EXHAUSTED, /* budget consumed */ BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */ @@ -6346,7 +7246,7 @@ index f73c942..49d28b9 100644 /* number of ios merged */ struct blkg_rwstat merged; /* total time spent on device in ns, may not be accurate w/ queueing */ -@@ -657,12 +673,8 @@ struct bfqg_stats { +@@ -657,12 +704,8 @@ struct bfqg_stats { struct blkg_rwstat wait_time; /* number of IOs queued up */ struct blkg_rwstat queued; @@ -6359,7 +7259,7 @@ index f73c942..49d28b9 100644 /* sum of number of ios queued across all samples */ struct blkg_stat avg_queue_size_sum; /* count of samples taken for average */ -@@ -680,8 +692,10 @@ struct bfqg_stats { +@@ -680,8 +723,10 @@ struct bfqg_stats { uint64_t start_idle_time; uint64_t start_empty_time; uint16_t flags; @@ -6370,7 +7270,7 @@ index f73c942..49d28b9 100644 /* * struct bfq_group_data - per-blkcg storage for the blkio subsystem. * -@@ -692,7 +706,7 @@ struct bfq_group_data { +@@ -692,7 +737,7 @@ struct bfq_group_data { /* must be the first member */ struct blkcg_policy_data pd; @@ -6379,7 +7279,7 @@ index f73c942..49d28b9 100644 }; /** -@@ -712,7 +726,7 @@ struct bfq_group_data { +@@ -712,7 +757,7 @@ struct bfq_group_data { * unused for the root group. Used to know whether there * are groups with more than one active @bfq_entity * (see the comments to the function @@ -6388,7 +7288,7 @@ index f73c942..49d28b9 100644 * @rq_pos_tree: rbtree sorted by next_request position, used when * determining if two or more queues have interleaving * requests (see bfq_find_close_cooperator()). -@@ -745,7 +759,6 @@ struct bfq_group { +@@ -745,7 +790,6 @@ struct bfq_group { struct rb_root rq_pos_tree; struct bfqg_stats stats; @@ -6396,7 +7296,7 @@ index f73c942..49d28b9 100644 }; #else -@@ -767,11 +780,25 @@ bfq_entity_service_tree(struct bfq_entity *entity) +@@ -767,11 +811,25 @@ bfq_entity_service_tree(struct bfq_entity *entity) struct bfq_sched_data *sched_data = entity->sched_data; struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); unsigned int idx = bfqq ? bfqq->ioprio_class - 1 : @@ -6409,7 +7309,7 @@ index f73c942..49d28b9 100644 + if (bfqq) + bfq_log_bfqq(bfqq->bfqd, bfqq, + "entity_service_tree %p %d", -+ sched_data->service_tree + idx, idx) ; ++ sched_data->service_tree + idx, idx); +#ifdef CONFIG_BFQ_GROUP_IOSCHED + else { + struct bfq_group *bfqg = @@ -6417,13 +7317,13 @@ index f73c942..49d28b9 100644 + + bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg, + "entity_service_tree %p %d", -+ sched_data->service_tree + idx, idx) ; ++ sched_data->service_tree + idx, idx); + } +#endif return sched_data->service_tree + idx; } -@@ -791,47 +818,6 @@ static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) +@@ -791,47 +849,6 @@ static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) return bic->icq.q->elevator->elevator_data; } @@ -6471,7 +7371,7 @@ index f73c942..49d28b9 100644 #ifdef CONFIG_BFQ_GROUP_IOSCHED static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq) -@@ -857,11 +843,13 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio); +@@ -857,11 +874,13 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio); static void bfq_put_queue(struct bfq_queue *bfqq); static void bfq_dispatch_insert(struct request_queue *q, struct request *rq); static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, @@ -6488,5 +7388,5 @@ index f73c942..49d28b9 100644 #endif /* _BFQ_H */ -- -1.9.1 +2.7.4 (Apple Git-66) diff --git a/README.BFQ b/README.BFQ index 4e0c72d..7787759 100644 --- a/README.BFQ +++ b/README.BFQ @@ -1,544 +1,592 @@ -Budget Fair Queueing I/O Scheduler -================================== - -This patchset introduces BFQ-v8r3 into Linux 4.7.0. -For further information: http://algogroup.unimore.it/people/paolo/disk_sched/. - -The overall diffstat is the following: - - block/Kconfig.iosched | 30 + - block/Makefile | 1 + - block/bfq-cgroup.c | 1178 +++++++++++++++++++++ - block/bfq-ioc.c | 36 + - block/bfq-iosched.c | 4895 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - block/bfq-sched.c | 1450 ++++++++++++++++++++++++++ - block/bfq.h | 848 +++++++++++++++ - include/linux/blkdev.h | 2 +- - 8 files changed, 8439 insertions(+), 1 deletion(-) - -CHANGELOG - -v8r3 - -. BUGFIX Update weight-raising coefficient when switching from - interactive to soft real-time. - -v8r2 - -. BUGFIX Removed variables that are not used if tracing is - disabled. Reported by Lee Tibbert - -. IMPROVEMENT Ported commit ae11889636: turned blkg_lookup_create into - blkg_lookup. As a side benefit, this finally enables BFQ to be used - as a module even with full hierarchical support. - -v8r1 - -. BUGFIX Fixed incorrect invariant check - -. IMPROVEMENT Privileged soft real-time applications against - interactive ones, to guarantee a lower and more stable latency to - the former - -v8 - -. BUGFIX: Fixed incorrect rcu locking in bfq_bic_update_cgroup - -. BUGFIX Fixed a few cgroups-related bugs, causing sporadic crashes - -. BUGFIX Fixed wrong computation of queue weights as a function of ioprios - -. BUGFIX Fixed wrong Kconfig.iosched dependency for BFQ_GROUP_IOSCHED - -. IMPROVEMENT Preemption-based, idle-less service guarantees. If - several processes are competing for the device at the same time, but - all processes and groups have the same weight, then the mechanism - introduced by this improvement enables BFQ to guarantee the expected - throughput distribution without ever idling the device. Throughput - is then much higher in this common scenario. - -. IMPROVEMENT Made burst handling more robust - -. IMPROVEMENT Reduced false positives in EQM - -. IMPROVEMENT Let queues preserve weight-raising also when shared - -. IMPROVEMENT Improved peak-rate estimation and autotuning of the - parameters related to the device rate - -. IMPROVEMENT Improved the weight-raising mechanism so as to further - reduce latency and to increase robustness - -. IMPROVEMENT Added a strict-guarantees tunable. If this tunable is - set, then device-idling is forced whenever needed to provide - accurate service guarantees. CAVEAT: idling unconditionally may even - increase latencies, in case of processes that did stop doing I/O. - -. IMPROVEMENT Improved handling of async (write) I/O requests - -. IMPROVEMENT Ported several good CFQ commits - -. CHANGE Changed default group weight to 100 - -. CODE IMPROVEMENT Refactored I/O-request-insertion code - -v7r11: -. BUGFIX Remove the group_list data structure, which ended up in an - inconsistent state if BFQ happened to be activated for some device - when some blkio groups already existed (these groups where not added - to the list). The blkg list for the request queue is now used where - the removed group_list was used. - -. BUGFIX Init and reset also dead_stats. - -. BUGFIX Added, in __bfq_deactivate_entity, the correct handling of the - case where the entity to deactivate has not yet been activated at all. - -. BUGFIX Added missing free of the root group for the case where full - hierarchical support is not activated. - -. IMPROVEMENT Removed the now useless bfq_disconnect_groups - function. The same functionality is achieved through multiple - invocations of bfq_pd_offline (which are in their turn guaranteed to - be executed, when needed, by the blk-cgroups code). - -v7r10 : -. BUGFIX: Fixed wrong check on whether cooperating processes belong - to the same cgroup. - -v7r9: -. IMPROVEMENT: Changed BFQ to use the blkio controller instead of its - own controller. BFQ now registers itself as a policy to the blkio - controller and implements its hierarchical scheduling support using - data structures that already exist in blk-cgroup. The bfqio - controller's code is completely removed. - -. CODE IMPROVEMENTS: Applied all suggestions from Tejun Heo, received - on the last submission to lkml: https://lkml.org/lkml/2014/5/27/314. - -v7r8: -. BUGFIX: Let weight-related fields of a bfq_entity be correctly initialized - (also) when the I/O priority of the entity is changed before the first - request is inserted into the bfq_queue associated to the entity. -. BUGFIX: When merging requests belonging to different bfq_queues, avoid - repositioning the surviving request. In fact, in this case the repositioning - may result in the surviving request being moved across bfq_queues, which - would ultimately cause bfq_queues' data structures to become inconsistent. -. BUGFIX: When merging requests belonging to the same bfq_queue, reposition - the surviving request so that it gets in the correct position, namely the - position of the dropped request, instead of always being moved to the head - of the FIFO of the bfq_queue (which means to let the request be considered - the eldest one). -. BUGFIX: Reduce the idling slice for seeky queues only if the scenario is - symmetric. This guarantees that also processes associated to seeky queues - do receive their reserved share of the throughput. - Contributed by Riccardo Pizzetti and Samuele Zecchini. -. IMPROVEMENT: Always perform device idling if the scenario is asymmetric in - terms of throughput distribution among processes. - This extends throughput-distribution guarantees to any process, regardless - of the properties of its request pattern and of the request patterns of the - other processes, and regardless of whether the device is NCQ-capable. -. IMPROVEMENT: Remove the current limitation on the maximum number of in-flight - requests allowed for a sync queue (limitation set in place for fairness - issues in CFQ, inherited by the first version of BFQ, but made unnecessary - by the latest accurate fairness strategies added to BFQ). Removing this - limitation enables devices with long internal queues to fill their queues - as much as they deem appropriate, also with sync requests. This avoids - throughput losses on these devices, because, to achieve a high throughput, - they often need to have a high number of requests queued internally. -. CODE IMPROVEMENT: Simplify I/O priority change logic by turning it into a - single-step procedure instead of a two-step one; improve readability by - rethinking the names of the functions involved in changing the I/O priority - of a bfq_queue. - -v7r7: -. BUGFIX: Prevent the OOM queue from being involved in the queue - cooperation mechanism. In fact, since the requests temporarily - redirected to the OOM queue could be redirected again to dedicated - queues at any time, the state needed to correctly handle merging - with the OOM queue would be quite complex and expensive to - maintain. Besides, in such a critical condition as an out of - memory, the benefits of queue merging may be little relevant, or - even negligible. -. IMPROVEMENT: Let the OOM queue be initialized only once. Previously, - the OOM queue was reinitialized, at each request enqueue, with the - parameters related to the process that issued that request. - Depending on the parameters of the processes doing I/O, this could - easily cause the OOM queue to be moved continuously across service - trees, or even across groups. It also caused the parameters of the - OOM queue to be continuously reset in any case. -. CODE IMPROVEMENT. Performed some minor code cleanups, and added some - BUG_ON()s that, if the weight of an entity becomes inconsistent, - should better help understand why. - -v7r6: -. IMPROVEMENT: Introduced a new mechanism that helps get the job done - more quickly with services and applications that create or reactivate - many parallel I/O-bound processes. This is the case, for example, with - systemd at boot, or with commands like git grep. -. CODE IMPROVEMENTS: Small code cleanups and improvements. - -v7r5: -. IMPROVEMENT: Improve throughput boosting by idling the device - only for processes that, in addition to perform sequential I/O, - are I/O-bound (apart from weight-raised queues, for which idling - is always performed to guarantee them a low latency). -. IMPROVEMENT: Improve throughput boosting by depriving processes - that cooperate often of weight-raising. -. CODE IMPROVEMENT: Pass of improvement of the readability of both - comments and actual code. - -v7r4: -. BUGFIX. Modified the code so as to be robust against late detection of - NCQ support for a rotational device. -. BUGFIX. Removed a bug that hindered the correct throughput distribution - on flash-based devices when not every process had to receive the same - fraction of the throughput. This fix entailed also a little efficiency - improvement, because it implied the removal of a short function executed - in a hot path. -. CODESTYLE IMPROVEMENT: removed quoted strings split across lines. - -v7r3: -. IMPROVEMENT: Improved throughput boosting with NCQ-capable HDDs and - random workloads. The mechanism that further boosts throghput with - these devices and workloads is activated only in the cases where it - does not cause any violation of throughput-distribution and latency - guarantees. -. IMPROVEMENT: Generalized the computation of the parameters of the - low-latency heuristic for interactive applications, so as to fit also - slower storage devices. The purpose of this improvement is to preserve - low-latency guarantees for interactive applications also on slower - devices, such as portable hard disks, multimedia and SD cards. -. BUGFIX: Re-added MODULE_LICENSE macro. -. CODE IMPROVEMENTS: Small code cleanups; introduced a coherent naming - scheme for all identifiers related to weight raising; refactored and - optimized a few hot paths. - -v7r2: -. BUGFIX/IMPROVEMENT. One of the requirements for an application to be - deemed as soft real-time is that it issues its requests in batches, and - stops doing I/O for a well-defined amount of time before issuing a new - batch. Imposing this minimum idle time allows BFQ to filter out I/O-bound - applications that may otherwise be incorrectly deemed as soft real-time - (under the circumstances described in detail in the comments to the - function bfq_bfqq_softrt_next_start()). Unfortunately, BFQ could however - start counting this idle time from two different events: either from the - expiration of the queue, if all requests of the queue had also been already - completed when the queue expired, or, if the previous condition did not - hold, from the first completion of one of the still outstanding requests. - In the second case, an application had more chances to be deemed as soft - real-time. - Actually, there was no reason for this differentiated treatment. We - addressed this issue by defining more precisely the above requirement for - an application to be deemed as soft real-time, and changing the code - consequently: a well-defined amount of time must elapse between the - completion of *all the requests* of the current pending batch and the - issuing of the first request of the next batch (this is, in the end, what - happens with a true soft real-time application). This change further - reduced false positives, and, as such, improved responsiveness and reduced - latency for actual soft real-time applications. -. CODE IMPROVEMENT. We cleaned up the code a little bit and addressed - some issues pointed out by the checkpatch.pl script. - -v7r1: -. BUGFIX. Replace the old value used to approximate 'infinity', with - the correct one to use in case times are compared through the macro - time_is_before_jiffies(). In fact, this macro, designed to take - wraparound issues into account, easily returns anomalous results if - its argument is equal to the value that we used as an approximation - of 'infinity', namely ((unsigned long) (-1)). The consequence was - that the logical expression used to determine whether a queue - belongs to a soft real-time application often yielded an incorrect - result. In the end, some application happened to be incorrectly - deemed as soft real-time and hence weight-raised. This affected both - throughput and latency guarantees. -. BUGFIX. Fixed a scriverner's error made in an attempt to use the - above macro in a logical expression. -. IMPROVEMENT/BUGFIX. On the expiration of a queue, use a more general - condition to allow a weight-raising period to start if the queue is - soft real-time. The previous condition could prevent an empty, - soft-real time queue from being correctly deemed as soft real-time. -. IMPROVEMENT/MINOR BUGFIX. Use jiffies-comparison macros also in the - following cases: - . to establish whether an application initially deemed as interactive - is now meeting the requirements for being classified as soft - real-time; - . to determine if a weight-raising period must be ended. -. CODE IMPROVEMENT. Change the type of the time quantities used in the - weight-raising heuristics to unsigned long, as the type of the time - (jiffies) is unsigned long. - -v7: -- IMPROVEMENT: In the presence of weight-raised queues and if the - device is NCQ-enabled, device idling is now disabled for non-raised - readers, i.e., for their associated sync queues. Hence a sync queue - is expired immediately if it becomes empty, and a new queue is - served. As explained in detail in the papers about BFQ, not idling - the device for sync queues when the latter become empty causes BFQ to - assign higher timestamps to these queues when they get backlogged - again, and hence to serve these queues less frequently. This fact, - plus to the fact that, because of the immediate expiration itself, - these queues get less service while they are granted access to the - disk, reduces the relative rate at which the processes associated to - these queues ask for requests from the I/O request pool. If the pool - is saturated, as it happens in the presence of write hogs, reducing - the above relative rate increases the probability that a request is - available (soon) in the pool when a weight-raised process needs it. - This change does seem to mitigate the typical starvation problems - that occur in the presence of write hogs and NCQ, and hence to - guarantee a higher application and system responsiveness in these - hostile scenarios. -- IMPROVEMENT/BUGFIX: Introduced a new classification rule to the soft - real-time heuristic, which takes into account also the isochronous - nature of such applications. The computation of next_start has been - fixed as well. Now it is correctly done from the time of the last - transition from idle to backlogged; the next_start is therefore - computed from the service received by the queue from its last - transition from idle to backlogged. Finally, the code which - preserved weight-raising for a soft real-time queue even with no - idle->backlogged transition has been removed. -- IMPROVEMENT: Add a few jiffies to the reference time interval used to - establish whether an application is greedy or not. This reference - interval was, by default, HZ/125 seconds, which could generate false - positives in the following two cases (especially if both cases occur): - 1) If HZ is so low that the duration of a jiffie is comparable to or - higher than the above reference time interval. This happens, e.g., - on slow devices with HZ=100. - 2) If jiffies, instead of increasing at a constant rate, may stop - increasing for some time, then suddenly 'jump' by several units to - recover the lost increments. This seems to happen, e.g., in virtual - machines. - The added number of jiffies has been found experimentally. In particular, - according to our experiments, adding this number of jiffies seems to make - the filter quite precise also in embedded systems and KVM/QEMU virtual - machines. Also contributed by - Alexander Spyridakis . -- IMPROVEMENT/BUGFIX: Keep disk idling also for NCQ-provided - rotational devices, which boosts the throughput on NCQ-enabled - rotational devices. -- BUGFIX: The budget-timeout condition in the bfq_rq_enqueued() function - was checked only if the request is large enough to provoke an unplug. As - a consequence, for a process always issuing small I/O requests the - budget timeout was never checked. The queue associated to the process - therefore expired only when its budget was exhausted, even if the - queue had already incurred a budget timeout from a while. - This fix lets a queue be checked for budget timeout at each request - enqueue, and, if needed, expires the queue accordingly even if the - request is small. -- BUGFIX: Make sure that weight-raising is resumed for a split queue, - if it was merged when already weight-raised. -- MINOR BUGFIX: Let bfq_end_raising_async() correctly end weight-raising - also for the queues belonging to the root group. -- IMPROVEMENT: Get rid of the some_coop_idle flag, which in its turn - was used to decide whether to disable idling for an in-service - shared queue whose seek mean decreased. In fact, disabling idling - for such a queue turned out to be useless. -- CODE IMPROVEMENT: The bfq_bfqq_must_idle() function and the - bfq_select_queue() function may not change the current in-service - queue in various cases. We have cleaned up the involved conditions, - by factoring out the common parts and getting rid of the useless - ones. -- MINOR CODE IMPROVEMENT: The idle_for_long_time condition in the - bfq_add_rq_rb() function should be evaluated only on an - idle->backlogged transition. Now the condition is set to false - by default, evaluating it only if the queue was not busy on a - request insertion. -- MINOR CODE IMPROVEMENT: Added a comment describing the rationale - behind the condition evaluated in the function - bfq_bfqq_must_not_expire(). - -v6r2: -- Fairness fix: the case of queue expiration for budget timeout is - now correctly handled also for sync queues, thus allowing also - the processes corresponding to these queues to be guaranteed their - reserved share of the disk throughput. -- Fixed a bug that prevented group weights from being correctly - set via the sysfs interface. -- Fixed a bug that cleared a previously-set group weight if the - same value was re-inserted via the sysfs interface. -- Fixed an EQM bug that allowed a newly-started process to skip - its initial weight-raising period if its queue was merged before - its first request was inserted. -- Fixed a bug that preserved already-started weight-raising periods - even if the low_latency tunable was disabled. -- The raising_max_time tunable now shows, more user-friendly, the - maximum raising time in milliseconds. - -v6r1: -- Fix use-after-free of queues in __bfq_bfqq_expire(). It may happen that - a call to bfq_del_bfqq_busy() puts the last reference taken on a queue - and frees it. Subsequent accesses to that same queue would result in a - use-after-free. Make sure that a queue that has just been deleted from - busy is no more touched. -- Use the uninitialized_var() macro when needed. It may happen that a - variable is initialized in a function that is called by the function - that defined it. Use the uninitialized_var() macro in these cases. - -v6: -- Replacement of the cooperating-queue merging mechanism borrowed from - CFQ with Early Queue Merge (EQM), a unified mechanism to get a - sequential read pattern, and hence a high throughput, with any set of - processes performing interleaved I/O. EQM also preserves low latency. - (see http://algogroup.unimore.it/people/paolo/disk_sched/description.php - for more details). Contributed by Mauro Andreolini and Arianna Avanzini. - The code for detecting whether two queues have to be merged is a - slightly modified version of the CFQ code for detecting whether two - queues belong to cooperating processes and whether the service of a - queue should be preempted to boost the throughput. -- Fix a bug that caused the peak rate of a disk to be computed as zero - in case of multiple I/O errors. Subsequent estimations of the weight - raising duration caused a division-by-zero error. - -v5r1: -- BUG FIX: Fixed stall occurring when the active queue is moved to - a different group while idling (this caused the idling timer to be - cancelled and hence no new queue to be selected, and no new - request to be dispatched). -- BUG FIX: Fixed wrong assignment of too high budgets to queues during - the first few seconds after initialization. -- BUG FIX: Added proper locking to the function handling the "weights" - tunable. - -v5: -- Added an heuristic that, if the tunable raising_max_time is set to - 0, automatically computes the duration of the weight raising - according to the estimated peak rate of the device. This enables - flash-based devices to reach maximum throughput as soon as possible, - without sacrificing latency. - -v4: -- Throughput-boosting for flash-based devices: improved version of commits - a68bbdd and f7d7b7a, which boosts the throughput while still preserving - latency guarantees for interactive and soft real-time applications. -- Better identification of NCQ-capable disks: port of commit e459dd0. - -v3-r4: -- Bugfixes - * Removed an important memory leak: under some circumstances the process references - to a queue were not decremented correctly, which prevented unused shared bfq_queue - to be correctly deallocated. - * Fixed various errors related to hierarchical scheduling: - * Removed an error causing tasks to be attached to the bfqio cgroup - controller even when BFQ was not the active scheduler - * Corrected wrong update of the budgets from the leaf to the root upon - forced selection of a service tree or a bfq_queue - * Fixed the way how active leaf entities are moved to the root group before - the group entity is deactivated when a cgroup is destroyed -- Throughput-boosting improvement for cooperating queues: close detection is now based - on a fixed threshold instead of the queue's average seek. This is a port of one of - the changes in the CFQ commit 3dde36d by Corrado Zoccolo. - -v3-r3: -- Bugfix: removed an important error causing occasional kernel panics when - moving a process to a new cgroup. The panic occurred if: - 1) the queue associated to the process was idle when the process was moved - and - 2) a new disk request was inserted into the queue just after the move. -- Further latency improvement through a better treatment of low-bandwidth - async queues. - -v3-r2: -- Bugfix: added a forgotten condition that prevents weights of low-bw async - queues from being raised when low_latency is off. -- Latency improvement: low-bw async queues are now better identified. - -v3-r1: -- Fixed an important request-dispatch bug causing occasional IO hangs. -- Added a new mechanism to reduce the latency of low-bw async queues. - This reduces the latency of also the sync queues synchronized with - the above async queues. -- Fixed a minor bug in iocontext locking (port of commits 9b50902 and 3181faa - from CFQ). - -v3: - -- Improved low-latency mechanisms, including a more accurate criterion to - distinguish between greedy-but-seeky and soft real-time applications. - Interactive applications now enjoy noticeably lower latencies. - -- Switch to the simpler one-request-dispatch-at-a-time scheme as in CFQ. - -- Ported cooperating-queues merging from CFQ (6d048f5, 1afba04, - d9e7620, a36e71f, 04dc6e7, 26a2ac0, 3ac6c9f, f2d1f0a, 83096eb, - 2e46e8b, df5fe3e, b3b6d04, e6c5bc7, c0324a0, f04a642, 8682e1f, - b9d8f4c, 2f7a2d8, ae54abe, e9ce335, 39c01b2, d02a2c0, c10b61f). - Contributed by Arianna Avanzini. Queues of processes performing IO - on interleaved, yet contiguous disk zones are merged to boost the - throughput. Some little optimizations to get a more stable throughput - have been added to the original CFQ version. - -- Added static fallback queue for extreme OOM conditions (porting of - CFQ commits d5036d7, 6118b70, b706f64, 32f2e80). Port contributed by - Francesco Allertsen. - -- Ported CFQ commits b0b78f8, 40bb54d, 30996f4, dddb745, ad5ebd2, cf7c25c; - mainly code cleanup and fix of minor bugs. Port contributed by - Francesco Allertsen. - -v2: - -- An issue that may cause little throughput loss on fast disks has been solved. - BFQ-v1 and CFQ may suffer from this problem. -- The disk-idling timeout has been better tuned to further file latency - (especially for the idle- or light-loaded-disk scenarios). -- One of the parameters of the low-latency heuristics has been tuned a little - bit more, so as to reduce the probability that a disk-bound process may - hamper the reduction of the latency of interactive and soft real-time - applications. - - - Same low-latency guarantees with and without NCQ. - - - Latency for interactive applications about halved with respect to BFQ-v1. - - - When the low_latency tunable is set, also soft real-time applications - now enjoy reduced latency. - - - A very little minimum bandwidth is now guaranteed to the - Idle IO-scheduling class also when the other classes are - backlogged, just to prevent them from starving. - -v1: - -This is a new version of BFQ with respect to the versions you can -find on Fabio's site: http://feanor.sssup.it/~fabio/linux/bfq. -Here is what we changed with respect to the previous versions: - -1) re-tuned the budget feedback mechanism: it is now slighlty more -biased toward assigning high budgets, to boost the aggregated -throughput more, and more quickly as new processes are started - -2) introduced more tolerance toward seeky queues (I verified that the -phenomena described below used to occur systematically): - - 2a: if a queue is expired after having received very little - service, then it is not punished as a seeky queue, even if it - occurred to consume that little service too slowly; the - rationale is that, if the new active queue has been served for - a too short time interval, then its possible sequential - accesses may not yet prevail on the initial latencies for - moving the disk head on the first sector requested - - 2b: the waiting time (disk idling) of a queue detected as seeky as - a function of the position of the requests it issued is reduced - to a very low value only after the queue has consumed a minimum - fraction of the assigned budget; this prevents processes - generating (partly) seeky workloads from being too ill-treated - - 2c: if a queue has consumed 'enough' budget upon a budget timeout, then, - even if it did not consume all of its budget, that queue is not punished - as any seeky queue; the rationale is that, depending on the disk zones, - a queue may be served at a lower rate than the estimated peak rate. - - Changes 2a and 2b have been critical in lowering latencies, whereas - change 2c, in addition to change 1, helped a lot increase the disk - throughput. - -3) slightly changed the peak rate estimator: a low-pass filter is now -used instead of just keeping the highest rate sampled; the rationale -is that the peak rate of a disk should be quite stable, so the filter -should converge more or less smoothly to the right value; it seemed to -correctly catch the peak rate with all disks we used - -4) added the low latency mechanism described in detail in -http://algogroup.unimore.it/people/paolo/disk_sched/description.php. - +Budget Fair Queueing I/O Scheduler +================================== + +This patchset introduces BFQ-v8r4 into Linux 4.8.0. +For further information: http://algogroup.unimore.it/people/paolo/disk_sched/. + +The overall diffstat is the following: + + block/Kconfig.iosched | 30 + + block/Makefile | 1 + + block/bfq-cgroup.c | 1196 +++++++++++++++++++++++++++++ + block/bfq-ioc.c | 36 + + block/bfq-iosched.c | 5288 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + block/bfq-sched.c | 1501 ++++++++++++++++++++++++++++++++++++ + block/bfq.h | 886 ++++++++++++++++++++++ + include/linux/blkdev.h | 2 +- + 8 files changed, 8939 insertions(+), 1 deletion(-) + +CHANGELOG + +v8r4 + +. BUGFIX The function bfq_find_set_group may return a NULL pointer, + which happened not to properly handled in the function + __bfq_bic_change_cgroup. This fix handles this case. Contributed by + Lee Tibbert. + +. BUGFIX Fix recovery of lost service for soft real-time + applications. This recovery is important for soft real-time + application to continue enjoying proper weight raising even if their + service happens to be delayed for a while. Contributed by Luca + Miccio. + +. BUGFIX Fix handling of wait_request state. The semantics of + hrtimers makes the following assumption false after invoking + hrtimer_try_to_cancel: the timer results as non + active. Unfortunately this assumption was used in the previous + version of the code. This change lets code comply with the new + semantics. + +. IMPROVEMENT Improve the peak-rate estimator. This change is a + complete rewrite of the peak-rate estimation algorithm. It is both + an improvement and a simplification: in particular it replaces the + previous, less effective, stable and clear algorithm for estimating + the peak rate. The previous algorihtm approximated the service rate + using the individual dispatch rates observed during the service + slots of queues. As such, it took into account not only just + individual queue workloads, but also rather short time intervals. + The new algorithm considers the global workload served by the + device, and computes the peak rate over much larger time + intervals. This makes the new algorihtm extremely more effective + with queueing devices and, in general, with devices with a + fluctuating bandwidth, either physical or virtual. + +. IMPROVEMENT Force the device to serve one request at a time if + strict_guarantees is true. Forcing this service scheme is currently + the ONLY way to guarantee that the request service order enforced by + the scheduler is respected by a queueing device. Otherwise the + device is free even to make some unlucky request wait for as long as + the device wishes. + Of course, serving one request at at time may cause loss of throughput. + +. IMPROVEMENT Let weight raising start for a soft real-time + application even while the application is till enjoying + weight-raising for interactive tasks. This allows soft real-time + applications to start enjoying the benefits of their special weight + raising as soon as possible. + +v8r3 + +. BUGFIX Update weight-raising coefficient when switching from + interactive to soft real-time. + +v8r2 + +. BUGFIX Removed variables that are not used if tracing is + disabled. Reported by Lee Tibbert + +. IMPROVEMENT Ported commit ae11889636: turned blkg_lookup_create into + blkg_lookup. As a side benefit, this finally enables BFQ to be used + as a module even with full hierarchical support. + +v8r1 + +. BUGFIX Fixed incorrect invariant check + +. IMPROVEMENT Privileged soft real-time applications against + interactive ones, to guarantee a lower and more stable latency to + the former + +v8 + +. BUGFIX: Fixed incorrect rcu locking in bfq_bic_update_cgroup + +. BUGFIX Fixed a few cgroups-related bugs, causing sporadic crashes + +. BUGFIX Fixed wrong computation of queue weights as a function of ioprios + +. BUGFIX Fixed wrong Kconfig.iosched dependency for BFQ_GROUP_IOSCHED + +. IMPROVEMENT Preemption-based, idle-less service guarantees. If + several processes are competing for the device at the same time, but + all processes and groups have the same weight, then the mechanism + introduced by this improvement enables BFQ to guarantee the expected + throughput distribution without ever idling the device. Throughput + is then much higher in this common scenario. + +. IMPROVEMENT Made burst handling more robust + +. IMPROVEMENT Reduced false positives in EQM + +. IMPROVEMENT Let queues preserve weight-raising also when shared + +. IMPROVEMENT Improved peak-rate estimation and autotuning of the + parameters related to the device rate + +. IMPROVEMENT Improved the weight-raising mechanism so as to further + reduce latency and to increase robustness + +. IMPROVEMENT Added a strict-guarantees tunable. If this tunable is + set, then device-idling is forced whenever needed to provide + accurate service guarantees. CAVEAT: idling unconditionally may even + increase latencies, in case of processes that did stop doing I/O. + +. IMPROVEMENT Improved handling of async (write) I/O requests + +. IMPROVEMENT Ported several good CFQ commits + +. CHANGE Changed default group weight to 100 + +. CODE IMPROVEMENT Refactored I/O-request-insertion code + +v7r11: +. BUGFIX Remove the group_list data structure, which ended up in an + inconsistent state if BFQ happened to be activated for some device + when some blkio groups already existed (these groups where not added + to the list). The blkg list for the request queue is now used where + the removed group_list was used. + +. BUGFIX Init and reset also dead_stats. + +. BUGFIX Added, in __bfq_deactivate_entity, the correct handling of the + case where the entity to deactivate has not yet been activated at all. + +. BUGFIX Added missing free of the root group for the case where full + hierarchical support is not activated. + +. IMPROVEMENT Removed the now useless bfq_disconnect_groups + function. The same functionality is achieved through multiple + invocations of bfq_pd_offline (which are in their turn guaranteed to + be executed, when needed, by the blk-cgroups code). + +v7r10 : +. BUGFIX: Fixed wrong check on whether cooperating processes belong + to the same cgroup. + +v7r9: +. IMPROVEMENT: Changed BFQ to use the blkio controller instead of its + own controller. BFQ now registers itself as a policy to the blkio + controller and implements its hierarchical scheduling support using + data structures that already exist in blk-cgroup. The bfqio + controller's code is completely removed. + +. CODE IMPROVEMENTS: Applied all suggestions from Tejun Heo, received + on the last submission to lkml: https://lkml.org/lkml/2014/5/27/314. + +v7r8: +. BUGFIX: Let weight-related fields of a bfq_entity be correctly initialized + (also) when the I/O priority of the entity is changed before the first + request is inserted into the bfq_queue associated to the entity. +. BUGFIX: When merging requests belonging to different bfq_queues, avoid + repositioning the surviving request. In fact, in this case the repositioning + may result in the surviving request being moved across bfq_queues, which + would ultimately cause bfq_queues' data structures to become inconsistent. +. BUGFIX: When merging requests belonging to the same bfq_queue, reposition + the surviving request so that it gets in the correct position, namely the + position of the dropped request, instead of always being moved to the head + of the FIFO of the bfq_queue (which means to let the request be considered + the eldest one). +. BUGFIX: Reduce the idling slice for seeky queues only if the scenario is + symmetric. This guarantees that also processes associated to seeky queues + do receive their reserved share of the throughput. + Contributed by Riccardo Pizzetti and Samuele Zecchini. +. IMPROVEMENT: Always perform device idling if the scenario is asymmetric in + terms of throughput distribution among processes. + This extends throughput-distribution guarantees to any process, regardless + of the properties of its request pattern and of the request patterns of the + other processes, and regardless of whether the device is NCQ-capable. +. IMPROVEMENT: Remove the current limitation on the maximum number of in-flight + requests allowed for a sync queue (limitation set in place for fairness + issues in CFQ, inherited by the first version of BFQ, but made unnecessary + by the latest accurate fairness strategies added to BFQ). Removing this + limitation enables devices with long internal queues to fill their queues + as much as they deem appropriate, also with sync requests. This avoids + throughput losses on these devices, because, to achieve a high throughput, + they often need to have a high number of requests queued internally. +. CODE IMPROVEMENT: Simplify I/O priority change logic by turning it into a + single-step procedure instead of a two-step one; improve readability by + rethinking the names of the functions involved in changing the I/O priority + of a bfq_queue. + +v7r7: +. BUGFIX: Prevent the OOM queue from being involved in the queue + cooperation mechanism. In fact, since the requests temporarily + redirected to the OOM queue could be redirected again to dedicated + queues at any time, the state needed to correctly handle merging + with the OOM queue would be quite complex and expensive to + maintain. Besides, in such a critical condition as an out of + memory, the benefits of queue merging may be little relevant, or + even negligible. +. IMPROVEMENT: Let the OOM queue be initialized only once. Previously, + the OOM queue was reinitialized, at each request enqueue, with the + parameters related to the process that issued that request. + Depending on the parameters of the processes doing I/O, this could + easily cause the OOM queue to be moved continuously across service + trees, or even across groups. It also caused the parameters of the + OOM queue to be continuously reset in any case. +. CODE IMPROVEMENT. Performed some minor code cleanups, and added some + BUG_ON()s that, if the weight of an entity becomes inconsistent, + should better help understand why. + +v7r6: +. IMPROVEMENT: Introduced a new mechanism that helps get the job done + more quickly with services and applications that create or reactivate + many parallel I/O-bound processes. This is the case, for example, with + systemd at boot, or with commands like git grep. +. CODE IMPROVEMENTS: Small code cleanups and improvements. + +v7r5: +. IMPROVEMENT: Improve throughput boosting by idling the device + only for processes that, in addition to perform sequential I/O, + are I/O-bound (apart from weight-raised queues, for which idling + is always performed to guarantee them a low latency). +. IMPROVEMENT: Improve throughput boosting by depriving processes + that cooperate often of weight-raising. +. CODE IMPROVEMENT: Pass of improvement of the readability of both + comments and actual code. + +v7r4: +. BUGFIX. Modified the code so as to be robust against late detection of + NCQ support for a rotational device. +. BUGFIX. Removed a bug that hindered the correct throughput distribution + on flash-based devices when not every process had to receive the same + fraction of the throughput. This fix entailed also a little efficiency + improvement, because it implied the removal of a short function executed + in a hot path. +. CODESTYLE IMPROVEMENT: removed quoted strings split across lines. + +v7r3: +. IMPROVEMENT: Improved throughput boosting with NCQ-capable HDDs and + random workloads. The mechanism that further boosts throghput with + these devices and workloads is activated only in the cases where it + does not cause any violation of throughput-distribution and latency + guarantees. +. IMPROVEMENT: Generalized the computation of the parameters of the + low-latency heuristic for interactive applications, so as to fit also + slower storage devices. The purpose of this improvement is to preserve + low-latency guarantees for interactive applications also on slower + devices, such as portable hard disks, multimedia and SD cards. +. BUGFIX: Re-added MODULE_LICENSE macro. +. CODE IMPROVEMENTS: Small code cleanups; introduced a coherent naming + scheme for all identifiers related to weight raising; refactored and + optimized a few hot paths. + +v7r2: +. BUGFIX/IMPROVEMENT. One of the requirements for an application to be + deemed as soft real-time is that it issues its requests in batches, and + stops doing I/O for a well-defined amount of time before issuing a new + batch. Imposing this minimum idle time allows BFQ to filter out I/O-bound + applications that may otherwise be incorrectly deemed as soft real-time + (under the circumstances described in detail in the comments to the + function bfq_bfqq_softrt_next_start()). Unfortunately, BFQ could however + start counting this idle time from two different events: either from the + expiration of the queue, if all requests of the queue had also been already + completed when the queue expired, or, if the previous condition did not + hold, from the first completion of one of the still outstanding requests. + In the second case, an application had more chances to be deemed as soft + real-time. + Actually, there was no reason for this differentiated treatment. We + addressed this issue by defining more precisely the above requirement for + an application to be deemed as soft real-time, and changing the code + consequently: a well-defined amount of time must elapse between the + completion of *all the requests* of the current pending batch and the + issuing of the first request of the next batch (this is, in the end, what + happens with a true soft real-time application). This change further + reduced false positives, and, as such, improved responsiveness and reduced + latency for actual soft real-time applications. +. CODE IMPROVEMENT. We cleaned up the code a little bit and addressed + some issues pointed out by the checkpatch.pl script. + +v7r1: +. BUGFIX. Replace the old value used to approximate 'infinity', with + the correct one to use in case times are compared through the macro + time_is_before_jiffies(). In fact, this macro, designed to take + wraparound issues into account, easily returns anomalous results if + its argument is equal to the value that we used as an approximation + of 'infinity', namely ((unsigned long) (-1)). The consequence was + that the logical expression used to determine whether a queue + belongs to a soft real-time application often yielded an incorrect + result. In the end, some application happened to be incorrectly + deemed as soft real-time and hence weight-raised. This affected both + throughput and latency guarantees. +. BUGFIX. Fixed a scriverner's error made in an attempt to use the + above macro in a logical expression. +. IMPROVEMENT/BUGFIX. On the expiration of a queue, use a more general + condition to allow a weight-raising period to start if the queue is + soft real-time. The previous condition could prevent an empty, + soft-real time queue from being correctly deemed as soft real-time. +. IMPROVEMENT/MINOR BUGFIX. Use jiffies-comparison macros also in the + following cases: + . to establish whether an application initially deemed as interactive + is now meeting the requirements for being classified as soft + real-time; + . to determine if a weight-raising period must be ended. +. CODE IMPROVEMENT. Change the type of the time quantities used in the + weight-raising heuristics to unsigned long, as the type of the time + (jiffies) is unsigned long. + +v7: +- IMPROVEMENT: In the presence of weight-raised queues and if the + device is NCQ-enabled, device idling is now disabled for non-raised + readers, i.e., for their associated sync queues. Hence a sync queue + is expired immediately if it becomes empty, and a new queue is + served. As explained in detail in the papers about BFQ, not idling + the device for sync queues when the latter become empty causes BFQ to + assign higher timestamps to these queues when they get backlogged + again, and hence to serve these queues less frequently. This fact, + plus to the fact that, because of the immediate expiration itself, + these queues get less service while they are granted access to the + disk, reduces the relative rate at which the processes associated to + these queues ask for requests from the I/O request pool. If the pool + is saturated, as it happens in the presence of write hogs, reducing + the above relative rate increases the probability that a request is + available (soon) in the pool when a weight-raised process needs it. + This change does seem to mitigate the typical starvation problems + that occur in the presence of write hogs and NCQ, and hence to + guarantee a higher application and system responsiveness in these + hostile scenarios. +- IMPROVEMENT/BUGFIX: Introduced a new classification rule to the soft + real-time heuristic, which takes into account also the isochronous + nature of such applications. The computation of next_start has been + fixed as well. Now it is correctly done from the time of the last + transition from idle to backlogged; the next_start is therefore + computed from the service received by the queue from its last + transition from idle to backlogged. Finally, the code which + preserved weight-raising for a soft real-time queue even with no + idle->backlogged transition has been removed. +- IMPROVEMENT: Add a few jiffies to the reference time interval used to + establish whether an application is greedy or not. This reference + interval was, by default, HZ/125 seconds, which could generate false + positives in the following two cases (especially if both cases occur): + 1) If HZ is so low that the duration of a jiffie is comparable to or + higher than the above reference time interval. This happens, e.g., + on slow devices with HZ=100. + 2) If jiffies, instead of increasing at a constant rate, may stop + increasing for some time, then suddenly 'jump' by several units to + recover the lost increments. This seems to happen, e.g., in virtual + machines. + The added number of jiffies has been found experimentally. In particular, + according to our experiments, adding this number of jiffies seems to make + the filter quite precise also in embedded systems and KVM/QEMU virtual + machines. Also contributed by + Alexander Spyridakis . +- IMPROVEMENT/BUGFIX: Keep disk idling also for NCQ-provided + rotational devices, which boosts the throughput on NCQ-enabled + rotational devices. +- BUGFIX: The budget-timeout condition in the bfq_rq_enqueued() function + was checked only if the request is large enough to provoke an unplug. As + a consequence, for a process always issuing small I/O requests the + budget timeout was never checked. The queue associated to the process + therefore expired only when its budget was exhausted, even if the + queue had already incurred a budget timeout from a while. + This fix lets a queue be checked for budget timeout at each request + enqueue, and, if needed, expires the queue accordingly even if the + request is small. +- BUGFIX: Make sure that weight-raising is resumed for a split queue, + if it was merged when already weight-raised. +- MINOR BUGFIX: Let bfq_end_raising_async() correctly end weight-raising + also for the queues belonging to the root group. +- IMPROVEMENT: Get rid of the some_coop_idle flag, which in its turn + was used to decide whether to disable idling for an in-service + shared queue whose seek mean decreased. In fact, disabling idling + for such a queue turned out to be useless. +- CODE IMPROVEMENT: The bfq_bfqq_must_idle() function and the + bfq_select_queue() function may not change the current in-service + queue in various cases. We have cleaned up the involved conditions, + by factoring out the common parts and getting rid of the useless + ones. +- MINOR CODE IMPROVEMENT: The idle_for_long_time condition in the + bfq_add_rq_rb() function should be evaluated only on an + idle->backlogged transition. Now the condition is set to false + by default, evaluating it only if the queue was not busy on a + request insertion. +- MINOR CODE IMPROVEMENT: Added a comment describing the rationale + behind the condition evaluated in the function + bfq_bfqq_must_not_expire(). + +v6r2: +- Fairness fix: the case of queue expiration for budget timeout is + now correctly handled also for sync queues, thus allowing also + the processes corresponding to these queues to be guaranteed their + reserved share of the disk throughput. +- Fixed a bug that prevented group weights from being correctly + set via the sysfs interface. +- Fixed a bug that cleared a previously-set group weight if the + same value was re-inserted via the sysfs interface. +- Fixed an EQM bug that allowed a newly-started process to skip + its initial weight-raising period if its queue was merged before + its first request was inserted. +- Fixed a bug that preserved already-started weight-raising periods + even if the low_latency tunable was disabled. +- The raising_max_time tunable now shows, more user-friendly, the + maximum raising time in milliseconds. + +v6r1: +- Fix use-after-free of queues in __bfq_bfqq_expire(). It may happen that + a call to bfq_del_bfqq_busy() puts the last reference taken on a queue + and frees it. Subsequent accesses to that same queue would result in a + use-after-free. Make sure that a queue that has just been deleted from + busy is no more touched. +- Use the uninitialized_var() macro when needed. It may happen that a + variable is initialized in a function that is called by the function + that defined it. Use the uninitialized_var() macro in these cases. + +v6: +- Replacement of the cooperating-queue merging mechanism borrowed from + CFQ with Early Queue Merge (EQM), a unified mechanism to get a + sequential read pattern, and hence a high throughput, with any set of + processes performing interleaved I/O. EQM also preserves low latency. + (see http://algogroup.unimore.it/people/paolo/disk_sched/description.php + for more details). Contributed by Mauro Andreolini and Arianna Avanzini. + The code for detecting whether two queues have to be merged is a + slightly modified version of the CFQ code for detecting whether two + queues belong to cooperating processes and whether the service of a + queue should be preempted to boost the throughput. +- Fix a bug that caused the peak rate of a disk to be computed as zero + in case of multiple I/O errors. Subsequent estimations of the weight + raising duration caused a division-by-zero error. + +v5r1: +- BUG FIX: Fixed stall occurring when the active queue is moved to + a different group while idling (this caused the idling timer to be + cancelled and hence no new queue to be selected, and no new + request to be dispatched). +- BUG FIX: Fixed wrong assignment of too high budgets to queues during + the first few seconds after initialization. +- BUG FIX: Added proper locking to the function handling the "weights" + tunable. + +v5: +- Added an heuristic that, if the tunable raising_max_time is set to + 0, automatically computes the duration of the weight raising + according to the estimated peak rate of the device. This enables + flash-based devices to reach maximum throughput as soon as possible, + without sacrificing latency. + +v4: +- Throughput-boosting for flash-based devices: improved version of commits + a68bbdd and f7d7b7a, which boosts the throughput while still preserving + latency guarantees for interactive and soft real-time applications. +- Better identification of NCQ-capable disks: port of commit e459dd0. + +v3-r4: +- Bugfixes + * Removed an important memory leak: under some circumstances the process references + to a queue were not decremented correctly, which prevented unused shared bfq_queue + to be correctly deallocated. + * Fixed various errors related to hierarchical scheduling: + * Removed an error causing tasks to be attached to the bfqio cgroup + controller even when BFQ was not the active scheduler + * Corrected wrong update of the budgets from the leaf to the root upon + forced selection of a service tree or a bfq_queue + * Fixed the way how active leaf entities are moved to the root group before + the group entity is deactivated when a cgroup is destroyed +- Throughput-boosting improvement for cooperating queues: close detection is now based + on a fixed threshold instead of the queue's average seek. This is a port of one of + the changes in the CFQ commit 3dde36d by Corrado Zoccolo. + +v3-r3: +- Bugfix: removed an important error causing occasional kernel panics when + moving a process to a new cgroup. The panic occurred if: + 1) the queue associated to the process was idle when the process was moved + and + 2) a new disk request was inserted into the queue just after the move. +- Further latency improvement through a better treatment of low-bandwidth + async queues. + +v3-r2: +- Bugfix: added a forgotten condition that prevents weights of low-bw async + queues from being raised when low_latency is off. +- Latency improvement: low-bw async queues are now better identified. + +v3-r1: +- Fixed an important request-dispatch bug causing occasional IO hangs. +- Added a new mechanism to reduce the latency of low-bw async queues. + This reduces the latency of also the sync queues synchronized with + the above async queues. +- Fixed a minor bug in iocontext locking (port of commits 9b50902 and 3181faa + from CFQ). + +v3: + +- Improved low-latency mechanisms, including a more accurate criterion to + distinguish between greedy-but-seeky and soft real-time applications. + Interactive applications now enjoy noticeably lower latencies. + +- Switch to the simpler one-request-dispatch-at-a-time scheme as in CFQ. + +- Ported cooperating-queues merging from CFQ (6d048f5, 1afba04, + d9e7620, a36e71f, 04dc6e7, 26a2ac0, 3ac6c9f, f2d1f0a, 83096eb, + 2e46e8b, df5fe3e, b3b6d04, e6c5bc7, c0324a0, f04a642, 8682e1f, + b9d8f4c, 2f7a2d8, ae54abe, e9ce335, 39c01b2, d02a2c0, c10b61f). + Contributed by Arianna Avanzini. Queues of processes performing IO + on interleaved, yet contiguous disk zones are merged to boost the + throughput. Some little optimizations to get a more stable throughput + have been added to the original CFQ version. + +- Added static fallback queue for extreme OOM conditions (porting of + CFQ commits d5036d7, 6118b70, b706f64, 32f2e80). Port contributed by + Francesco Allertsen. + +- Ported CFQ commits b0b78f8, 40bb54d, 30996f4, dddb745, ad5ebd2, cf7c25c; + mainly code cleanup and fix of minor bugs. Port contributed by + Francesco Allertsen. + +v2: + +- An issue that may cause little throughput loss on fast disks has been solved. + BFQ-v1 and CFQ may suffer from this problem. +- The disk-idling timeout has been better tuned to further file latency + (especially for the idle- or light-loaded-disk scenarios). +- One of the parameters of the low-latency heuristics has been tuned a little + bit more, so as to reduce the probability that a disk-bound process may + hamper the reduction of the latency of interactive and soft real-time + applications. + + - Same low-latency guarantees with and without NCQ. + + - Latency for interactive applications about halved with respect to BFQ-v1. + + - When the low_latency tunable is set, also soft real-time applications + now enjoy reduced latency. + + - A very little minimum bandwidth is now guaranteed to the + Idle IO-scheduling class also when the other classes are + backlogged, just to prevent them from starving. + +v1: + +This is a new version of BFQ with respect to the versions you can +find on Fabio's site: http://feanor.sssup.it/~fabio/linux/bfq. +Here is what we changed with respect to the previous versions: + +1) re-tuned the budget feedback mechanism: it is now slighlty more +biased toward assigning high budgets, to boost the aggregated +throughput more, and more quickly as new processes are started + +2) introduced more tolerance toward seeky queues (I verified that the +phenomena described below used to occur systematically): + + 2a: if a queue is expired after having received very little + service, then it is not punished as a seeky queue, even if it + occurred to consume that little service too slowly; the + rationale is that, if the new active queue has been served for + a too short time interval, then its possible sequential + accesses may not yet prevail on the initial latencies for + moving the disk head on the first sector requested + + 2b: the waiting time (disk idling) of a queue detected as seeky as + a function of the position of the requests it issued is reduced + to a very low value only after the queue has consumed a minimum + fraction of the assigned budget; this prevents processes + generating (partly) seeky workloads from being too ill-treated + + 2c: if a queue has consumed 'enough' budget upon a budget timeout, then, + even if it did not consume all of its budget, that queue is not punished + as any seeky queue; the rationale is that, depending on the disk zones, + a queue may be served at a lower rate than the estimated peak rate. + + Changes 2a and 2b have been critical in lowering latencies, whereas + change 2c, in addition to change 1, helped a lot increase the disk + throughput. + +3) slightly changed the peak rate estimator: a low-pass filter is now +used instead of just keeping the highest rate sampled; the rationale +is that the peak rate of a disk should be quite stable, so the filter +should converge more or less smoothly to the right value; it seemed to +correctly catch the peak rate with all disks we used + +4) added the low latency mechanism described in detail in +http://algogroup.unimore.it/people/paolo/disk_sched/description.php. + diff --git a/disable-mrproper-prepare-scripts-configs-in-devel-rpms.patch b/disable-mrproper-prepare-scripts-configs-in-devel-rpms.patch index 23f4538..d12e132 100644 --- a/disable-mrproper-prepare-scripts-configs-in-devel-rpms.patch +++ b/disable-mrproper-prepare-scripts-configs-in-devel-rpms.patch @@ -7,7 +7,7 @@ index 5a493e7..2055c07 100644 # in parallel PHONY += scripts -scripts: scripts_basic include/config/auto.conf include/config/tristate.conf \ -- asm-generic +- asm-generic gcc-plugins - $(Q)$(MAKE) $(build)=$(@) +scripts: diff --git a/fs-aufs4.patch b/fs-aufs4.patch index 7e802f7..789821d 100644 --- a/fs-aufs4.patch +++ b/fs-aufs4.patch @@ -1771,10 +1771,10 @@ index 0000000..4ab46ff +Currently this approach is applied to address_space_operations for +regular files only. diff --git a/MAINTAINERS b/MAINTAINERS -index 8c20323..d170184 100644 +index f593300..8a17054 100644 --- a/MAINTAINERS +++ b/MAINTAINERS -@@ -2213,6 +2213,19 @@ F: include/linux/audit.h +@@ -2256,6 +2256,19 @@ F: include/linux/audit.h F: include/uapi/linux/audit.h F: kernel/audit* @@ -1795,10 +1795,10 @@ index 8c20323..d170184 100644 M: Miguel Ojeda Sandonis W: http://miguelojeda.es/auxdisplay.htm diff --git a/drivers/block/loop.c b/drivers/block/loop.c -index 1fa8cc2..7339e65 100644 +index c9f2107..005e292 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c -@@ -712,6 +712,24 @@ static inline int is_loop_device(struct file *file) +@@ -701,6 +701,24 @@ static inline int is_loop_device(struct file *file) return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; } @@ -1824,10 +1824,10 @@ index 1fa8cc2..7339e65 100644 static ssize_t loop_attr_show(struct device *dev, char *page, diff --git a/fs/Kconfig b/fs/Kconfig -index b8fcb41..78adefb 100644 +index 2bc7ad7..3049386 100644 --- a/fs/Kconfig +++ b/fs/Kconfig -@@ -236,6 +236,7 @@ source "fs/pstore/Kconfig" +@@ -245,6 +245,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/exofs/Kconfig" @@ -1836,10 +1836,10 @@ index b8fcb41..78adefb 100644 endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile -index 85b6e13..e7bb164 100644 +index ed2b632..aa6d14b 100644 --- a/fs/Makefile +++ b/fs/Makefile -@@ -128,3 +128,4 @@ obj-y += exofs/ # Multiple modules +@@ -129,3 +129,4 @@ obj-y += exofs/ # Multiple modules obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ @@ -2152,10 +2152,10 @@ index 0000000..e48d268 +#endif /* __AUFS_H__ */ diff --git a/fs/aufs/branch.c b/fs/aufs/branch.c new file mode 100644 -index 0000000..7a60c73 +index 0000000..66495d2 --- /dev/null +++ b/fs/aufs/branch.c -@@ -0,0 +1,1409 @@ +@@ -0,0 +1,1412 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -2312,12 +2312,12 @@ index 0000000..7a60c73 + goto out_wbr; + } + -+ err = au_sbr_realloc(au_sbi(sb), new_nbranch); ++ err = au_sbr_realloc(au_sbi(sb), new_nbranch, /*may_shrink*/0); + if (!err) -+ err = au_di_realloc(au_di(root), new_nbranch); ++ err = au_di_realloc(au_di(root), new_nbranch, /*may_shrink*/0); + if (!err) { + inode = d_inode(root); -+ err = au_hinode_realloc(au_ii(inode), new_nbranch); ++ err = au_hinode_realloc(au_ii(inode), new_nbranch, /*may_shrink*/0); + } + if (!err) + return add_branch; /* success */ @@ -3061,7 +3061,8 @@ index 0000000..7a60c73 + sbinfo->si_branch[0 + bbot] = NULL; + sbinfo->si_bbot--; + -+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST); ++ p = au_krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST, ++ /*may_shrink*/1); + if (p) + sbinfo->si_branch = p; + /* harmless error */ @@ -3080,7 +3081,8 @@ index 0000000..7a60c73 + /* au_h_dentry_init(au_hdentry(dinfo, bbot); */ + dinfo->di_bbot--; + -+ p = krealloc(dinfo->di_hdentry, sizeof(*p) * bbot, AuGFP_SBILIST); ++ p = au_krealloc(dinfo->di_hdentry, sizeof(*p) * bbot, AuGFP_SBILIST, ++ /*may_shrink*/1); + if (p) + dinfo->di_hdentry = p; + /* harmless error */ @@ -3099,7 +3101,8 @@ index 0000000..7a60c73 + /* au_hinode_init(au_hinode(iinfo, bbot)); */ + iinfo->ii_bbot--; + -+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST); ++ p = au_krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST, ++ /*may_shrink*/1); + if (p) + iinfo->ii_hinode = p; + /* harmless error */ @@ -3926,10 +3929,10 @@ index 0000000..0bbb2d3 +-include ${srctree}/${src}/conf_priv.mk diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c new file mode 100644 -index 0000000..0a06bf2 +index 0000000..e48a4ff --- /dev/null +++ b/fs/aufs/cpup.c -@@ -0,0 +1,1383 @@ +@@ -0,0 +1,1391 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -4326,6 +4329,7 @@ index 0000000..0a06bf2 + } + }; + struct super_block *sb; ++ struct inode *h_src_inode; + struct task_struct *tsk = current; + + /* bsrc branch can be ro/rw. */ @@ -4341,7 +4345,9 @@ index 0000000..0a06bf2 + } + + /* try stopping to update while we copyup */ -+ IMustLock(d_inode(file[SRC].dentry)); ++ h_src_inode = d_inode(file[SRC].dentry); ++ if (!au_test_nfs(h_src_inode->i_sb)) ++ IMustLock(h_src_inode); + err = au_copy_file(file[DST].file, file[SRC].file, cpg->len); + + /* i wonder if we had O_NO_DELAY_FPUT flag */ @@ -4401,8 +4407,13 @@ index 0000000..0a06bf2 + goto out; + } + h_src_attr->valid = 1; -+ err = au_cp_regular(cpg); -+ inode_unlock(h_src_inode); ++ if (!au_test_nfs(h_src_inode->i_sb)) { ++ err = au_cp_regular(cpg); ++ inode_unlock(h_src_inode); ++ } else { ++ inode_unlock(h_src_inode); ++ err = au_cp_regular(cpg); ++ } + rerr = au_pin_hdir_relock(cpg->pin); + if (!err && rerr) + err = rerr; @@ -5913,10 +5924,10 @@ index 0000000..d1e09bd +#endif /* __DBGAUFS_H__ */ diff --git a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c new file mode 100644 -index 0000000..a88a26d +index 0000000..583ba75 --- /dev/null +++ b/fs/aufs/dcsub.c -@@ -0,0 +1,224 @@ +@@ -0,0 +1,225 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -6001,7 +6012,8 @@ index 0000000..a88a26d + err = -ENOMEM; + sz = dpages->ndpage * sizeof(*dpages->dpages); + p = au_kzrealloc(dpages->dpages, sz, -+ sz + sizeof(*dpages->dpages), gfp); ++ sz + sizeof(*dpages->dpages), gfp, ++ /*may_shrink*/0); + if (unlikely(!p)) + goto out; + @@ -6962,10 +6974,10 @@ index 0000000..cd5fc3f +#endif /* __AUFS_DEBUG_H__ */ diff --git a/fs/aufs/dentry.c b/fs/aufs/dentry.c new file mode 100644 -index 0000000..d6867c8 +index 0000000..0b6a3d4 --- /dev/null +++ b/fs/aufs/dentry.c -@@ -0,0 +1,1128 @@ +@@ -0,0 +1,1130 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -7017,7 +7029,7 @@ index 0000000..d6867c8 + br = au_sbr(dentry->d_sb, bindex); + wh_able = !!au_br_whable(br->br_perm); + if (wh_able) -+ wh_found = au_wh_test(h_parent, wh_name, /*try_sio*/0); ++ wh_found = au_wh_test(h_parent, wh_name, ignore_perm); + h_dentry = ERR_PTR(wh_found); + if (!wh_found) + goto real_lookup; @@ -7682,7 +7694,7 @@ index 0000000..d6867c8 + +int au_refresh_dentry(struct dentry *dentry, struct dentry *parent) +{ -+ int err, ebrange; ++ int err, ebrange, nbr; + unsigned int sigen; + struct au_dinfo *dinfo, *tmp; + struct super_block *sb; @@ -7698,8 +7710,9 @@ index 0000000..d6867c8 + if (unlikely(err)) + goto out; + ++ nbr = au_sbbot(sb) + 1; + dinfo = au_di(dentry); -+ err = au_di_realloc(dinfo, au_sbbot(sb) + 1); ++ err = au_di_realloc(dinfo, nbr, /*may_shrink*/0); + if (unlikely(err)) + goto out; + ebrange = au_dbrange_test(dentry); @@ -7742,6 +7755,7 @@ index 0000000..d6867c8 + au_dbg_verify_dinode(dentry); + AuTraceErr(err); + } ++ au_di_realloc(dinfo, nbr, /*may_shrink*/1); /* harmless if err */ + au_rw_write_unlock(&tmp->di_rwsem); + au_di_free(tmp); + if (unlikely(err)) @@ -8096,7 +8110,7 @@ index 0000000..d6867c8 +}; diff --git a/fs/aufs/dentry.h b/fs/aufs/dentry.h new file mode 100644 -index 0000000..94a3753 +index 0000000..9ea6eddf --- /dev/null +++ b/fs/aufs/dentry.h @@ -0,0 +1,255 @@ @@ -8181,7 +8195,7 @@ index 0000000..94a3753 +void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src); +int au_di_init(struct dentry *dentry); +void au_di_fin(struct dentry *dentry); -+int au_di_realloc(struct au_dinfo *dinfo, int nbr); ++int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink); + +void di_read_lock(struct dentry *d, int flags, unsigned int lsc); +void di_read_unlock(struct dentry *d, int flags); @@ -8357,10 +8371,10 @@ index 0000000..94a3753 +#endif /* __AUFS_DENTRY_H__ */ diff --git a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c new file mode 100644 -index 0000000..25e71e9 +index 0000000..3bc31e2 --- /dev/null +++ b/fs/aufs/dinfo.c -@@ -0,0 +1,552 @@ +@@ -0,0 +1,553 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -8505,7 +8519,7 @@ index 0000000..25e71e9 + au_di_free(dinfo); +} + -+int au_di_realloc(struct au_dinfo *dinfo, int nbr) ++int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink) +{ + int err, sz; + struct au_hdentry *hdp; @@ -8516,7 +8530,8 @@ index 0000000..25e71e9 + sz = sizeof(*hdp) * (dinfo->di_bbot + 1); + if (!sz) + sz = sizeof(*hdp); -+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS); ++ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS, ++ may_shrink); + if (hdp) { + dinfo->di_hdentry = hdp; + err = 0; @@ -9826,10 +9841,10 @@ index 0000000..8a145f1 +#endif /* __AUFS_DIR_H__ */ diff --git a/fs/aufs/dynop.c b/fs/aufs/dynop.c new file mode 100644 -index 0000000..197be54 +index 0000000..7e4a32a --- /dev/null +++ b/fs/aufs/dynop.c -@@ -0,0 +1,369 @@ +@@ -0,0 +1,371 @@ +/* + * Copyright (C) 2010-2016 Junjiro R. Okajima + * @@ -10021,6 +10036,8 @@ index 0000000..197be54 + /* this one will be changed according to an aufs mount option */ + DySetAop(direct_IO); + DySetAop(migratepage); ++ DySetAop(isolate_page); ++ DySetAop(putback_page); + DySetAop(launder_page); + DySetAop(is_partially_uptodate); + DySetAop(is_dirty_writeback); @@ -12334,10 +12351,10 @@ index 0000000..40289e4 +} diff --git a/fs/aufs/file.c b/fs/aufs/file.c new file mode 100644 -index 0000000..33dde75 +index 0000000..252299d --- /dev/null +++ b/fs/aufs/file.c -@@ -0,0 +1,845 @@ +@@ -0,0 +1,857 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -13008,23 +13025,26 @@ index 0000000..33dde75 + */ +static int refresh_file(struct file *file, int (*reopen)(struct file *file)) +{ -+ int err, need_reopen; ++ int err, need_reopen, nbr; + aufs_bindex_t bbot, bindex; + struct dentry *dentry; ++ struct super_block *sb; + struct au_finfo *finfo; + struct au_hfile *hfile; + + dentry = file->f_path.dentry; ++ sb = dentry->d_sb; ++ nbr = au_sbbot(sb) + 1; + finfo = au_fi(file); + if (!finfo->fi_hdir) { + hfile = &finfo->fi_htop; + AuDebugOn(!hfile->hf_file); -+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id); ++ bindex = au_br_index(sb, hfile->hf_br->br_id); + AuDebugOn(bindex < 0); + if (bindex != finfo->fi_btop) + au_set_fbtop(file, bindex); + } else { -+ err = au_fidir_realloc(finfo, au_sbbot(dentry->d_sb) + 1); ++ err = au_fidir_realloc(finfo, nbr, /*may_shrink*/0); + if (unlikely(err)) + goto out; + au_do_refresh_dir(file); @@ -13034,6 +13054,9 @@ index 0000000..33dde75 + need_reopen = 1; + if (!au_test_mmapped(file)) + err = au_file_refresh_by_inode(file, &need_reopen); ++ if (finfo->fi_hdir) ++ /* harmless if err */ ++ au_fidir_realloc(finfo, nbr, /*may_shrink*/1); + if (!err && need_reopen && !d_unlinked(dentry)) + err = reopen(file); + if (!err) { @@ -13141,6 +13164,10 @@ index 0000000..33dde75 + struct page *page, enum migrate_mode mode) +{ AuUnsupport(); return 0; } +#endif ++static bool aufs_isolate_page(struct page *page, isolate_mode_t mode) ++{ AuUnsupport(); return true; } ++static void aufs_putback_page(struct page *page) ++{ AuUnsupport(); } +static int aufs_launder_page(struct page *page) +{ AuUnsupport(); return 0; } +static int aufs_is_partially_uptodate(struct page *page, @@ -13175,6 +13202,8 @@ index 0000000..33dde75 + .releasepage = aufs_releasepage, + /* is fallback_migrate_page ok? */ + /* .migratepage = aufs_migratepage, */ ++ .isolate_page = aufs_isolate_page, ++ .putback_page = aufs_putback_page, + .launder_page = aufs_launder_page, + .is_partially_uptodate = aufs_is_partially_uptodate, + .is_dirty_writeback = aufs_is_dirty_writeback, @@ -13185,7 +13214,7 @@ index 0000000..33dde75 +}; diff --git a/fs/aufs/file.h b/fs/aufs/file.h new file mode 100644 -index 0000000..4698c98 +index 0000000..47cd520 --- /dev/null +++ b/fs/aufs/file.h @@ -0,0 +1,294 @@ @@ -13314,7 +13343,7 @@ index 0000000..4698c98 + +void au_update_figen(struct file *file); +struct au_fidir *au_fidir_alloc(struct super_block *sb); -+int au_fidir_realloc(struct au_finfo *finfo, int nbr); ++int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink); + +void au_fi_init_once(void *_fi); +void au_finfo_fin(struct file *file, int atonce); @@ -13485,7 +13514,7 @@ index 0000000..4698c98 +#endif /* __AUFS_FILE_H__ */ diff --git a/fs/aufs/finfo.c b/fs/aufs/finfo.c new file mode 100644 -index 0000000..dfb905d +index 0000000..4f6e92a --- /dev/null +++ b/fs/aufs/finfo.c @@ -0,0 +1,151 @@ @@ -13570,7 +13599,7 @@ index 0000000..dfb905d + return fidir; +} + -+int au_fidir_realloc(struct au_finfo *finfo, int nbr) ++int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink) +{ + int err; + struct au_fidir *fidir, *p; @@ -13581,7 +13610,7 @@ index 0000000..dfb905d + + err = -ENOMEM; + p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr), -+ GFP_NOFS); ++ GFP_NOFS, may_shrink); + if (p) { + p->fd_nent = nbr; + finfo->fi_hdir = p; @@ -15132,10 +15161,10 @@ index 0000000..4444fe1 +} diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c new file mode 100644 -index 0000000..8517019 +index 0000000..ad00ba5 --- /dev/null +++ b/fs/aufs/i_op.c -@@ -0,0 +1,1413 @@ +@@ -0,0 +1,1451 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -15169,12 +15198,15 @@ index 0000000..8517019 + int err; + const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND)); + ++ err = -EPERM; ++ if (write_mask && IS_IMMUTABLE(h_inode)) ++ goto out; ++ + err = -EACCES; -+ if ((write_mask && IS_IMMUTABLE(h_inode)) -+ || ((mask & MAY_EXEC) -+ && S_ISREG(h_inode->i_mode) -+ && (path_noexec(h_path) -+ || !(h_inode->i_mode & S_IXUGO)))) ++ if (((mask & MAY_EXEC) ++ && S_ISREG(h_inode->i_mode) ++ && (path_noexec(h_path) ++ || !(h_inode->i_mode & S_IXUGO)))) + goto out; + + /* @@ -16444,20 +16476,55 @@ index 0000000..8517019 + +/* ---------------------------------------------------------------------- */ + ++static int au_is_special(struct inode *inode) ++{ ++ return (inode->i_mode & (S_IFBLK | S_IFCHR | S_IFIFO | S_IFSOCK)); ++} ++ +static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags) +{ + int err; ++ aufs_bindex_t bindex; + struct super_block *sb; + struct inode *h_inode; ++ struct vfsmount *h_mnt; + + sb = inode->i_sb; ++ WARN_ONCE((flags & S_ATIME) && !IS_NOATIME(inode), ++ "unexpected s_flags 0x%lx", sb->s_flags); ++ + /* mmap_sem might be acquired already, cf. aufs_mmap() */ + lockdep_off(); + si_read_lock(sb, AuLock_FLUSH); + ii_write_lock_child(inode); + lockdep_on(); -+ h_inode = au_h_iptr(inode, au_ibtop(inode)); -+ err = vfsub_update_time(h_inode, ts, flags); ++ ++ err = 0; ++ bindex = au_ibtop(inode); ++ h_inode = au_h_iptr(inode, bindex); ++ if (!au_test_ro(sb, bindex, inode)) { ++ h_mnt = au_sbr_mnt(sb, bindex); ++ err = vfsub_mnt_want_write(h_mnt); ++ if (!err) { ++ err = vfsub_update_time(h_inode, ts, flags); ++ vfsub_mnt_drop_write(h_mnt); ++ } ++ } else if (au_is_special(h_inode)) { ++ /* ++ * Never copy-up here. ++ * These special files may already be opened and used for ++ * communicating. If we copied it up, then the communication ++ * would be corrupted. ++ */ ++ AuWarn1("timestamps for i%lu are ignored " ++ "since it is on readonly branch (hi%lu).\n", ++ inode->i_ino, h_inode->i_ino); ++ } else if (flags & ~S_ATIME) { ++ err = -EIO; ++ AuIOErr1("unexpected flags 0x%x\n", flags); ++ AuDebugOn(1); ++ } ++ + lockdep_off(); + if (!err) + au_cpup_attr_timesizes(inode); @@ -19019,10 +19086,10 @@ index 0000000..200b4d5 +} diff --git a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c new file mode 100644 -index 0000000..db04b18 +index 0000000..17cc9e9 --- /dev/null +++ b/fs/aufs/iinfo.c -@@ -0,0 +1,284 @@ +@@ -0,0 +1,285 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -19243,7 +19310,7 @@ index 0000000..db04b18 + return -ENOMEM; +} + -+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr) ++int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink) +{ + int err, i; + struct au_hinode *hip; @@ -19251,7 +19318,8 @@ index 0000000..db04b18 + AuRwMustWriteLock(&iinfo->ii_rwsem); + + err = -ENOMEM; -+ hip = krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS); ++ hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS, ++ may_shrink); + if (hip) { + iinfo->ii_hinode = hip; + i = iinfo->ii_bbot + 1; @@ -19309,10 +19377,10 @@ index 0000000..db04b18 +} diff --git a/fs/aufs/inode.c b/fs/aufs/inode.c new file mode 100644 -index 0000000..2234241 +index 0000000..16f0a37 --- /dev/null +++ b/fs/aufs/inode.c -@@ -0,0 +1,517 @@ +@@ -0,0 +1,519 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -19355,7 +19423,7 @@ index 0000000..2234241 + +static int au_ii_refresh(struct inode *inode, int *update) +{ -+ int err, e; ++ int err, e, nbr; + umode_t type; + aufs_bindex_t bindex, new_bindex; + struct super_block *sb; @@ -19367,9 +19435,10 @@ index 0000000..2234241 + + *update = 0; + sb = inode->i_sb; ++ nbr = au_sbbot(sb) + 1; + type = inode->i_mode & S_IFMT; + iinfo = au_ii(inode); -+ err = au_hinode_realloc(iinfo, au_sbbot(sb) + 1); ++ err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0); + if (unlikely(err)) + goto out; + @@ -19407,6 +19476,7 @@ index 0000000..2234241 + } + } + au_update_ibrange(inode, /*do_put_zero*/0); ++ au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */ + e = au_dy_irefresh(inode); + if (unlikely(e && !err)) + err = e; @@ -19832,7 +19902,7 @@ index 0000000..2234241 +} diff --git a/fs/aufs/inode.h b/fs/aufs/inode.h new file mode 100644 -index 0000000..f433330 +index 0000000..2f8c1c6 --- /dev/null +++ b/fs/aufs/inode.h @@ -0,0 +1,700 @@ @@ -20109,7 +20179,7 @@ index 0000000..f433330 +void au_hinode_init(struct au_hinode *hinode); +int au_iinfo_init(struct inode *inode); +void au_iinfo_fin(struct inode *inode); -+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr); ++int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink); + +#ifdef CONFIG_PROC_FS +/* plink.c */ @@ -20763,10 +20833,10 @@ index 0000000..fc5529b +#endif diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c new file mode 100644 -index 0000000..77df479 +index 0000000..eeb2a30 --- /dev/null +++ b/fs/aufs/loop.c -@@ -0,0 +1,146 @@ +@@ -0,0 +1,147 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -20873,7 +20943,8 @@ index 0000000..77df479 + new_nelem = au_warn_loopback_nelem + au_warn_loopback_step; + a = au_kzrealloc(au_warn_loopback_array, + au_warn_loopback_nelem * sizeof(unsigned long), -+ new_nelem * sizeof(unsigned long), GFP_ATOMIC); ++ new_nelem * sizeof(unsigned long), GFP_ATOMIC, ++ /*may_shrink*/0); + if (a) { + au_warn_loopback_nelem = new_nelem; + au_warn_loopback_array = a; @@ -21009,10 +21080,10 @@ index 0000000..4f83bdf +endif diff --git a/fs/aufs/module.c b/fs/aufs/module.c new file mode 100644 -index 0000000..01e55ab +index 0000000..ba19f08 --- /dev/null +++ b/fs/aufs/module.c -@@ -0,0 +1,289 @@ +@@ -0,0 +1,333 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -21038,13 +21109,57 @@ index 0000000..01e55ab +#include +#include "aufs.h" + -+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp) ++/* shrinkable realloc */ ++void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink) +{ -+ if (new_sz <= nused) -+ return p; ++ size_t sz; ++ int diff; + -+ p = krealloc(p, new_sz, gfp); -+ if (p) ++ sz = 0; ++ diff = -1; ++ if (p) { ++#if 0 /* unused */ ++ if (!new_sz) { ++ au_delayed_kfree(p); ++ p = NULL; ++ goto out; ++ } ++#else ++ AuDebugOn(!new_sz); ++#endif ++ sz = ksize(p); ++ diff = au_kmidx_sub(sz, new_sz); ++ } ++ if (sz && !diff) ++ goto out; ++ ++ if (sz < new_sz) ++ /* expand or SLOB */ ++ p = krealloc(p, new_sz, gfp); ++ else if (new_sz < sz && may_shrink) { ++ /* shrink */ ++ void *q; ++ ++ q = kmalloc(new_sz, gfp); ++ if (q) { ++ if (p) { ++ memcpy(q, p, new_sz); ++ au_delayed_kfree(p); ++ } ++ p = q; ++ } else ++ p = NULL; ++ } ++ ++out: ++ return p; ++} ++ ++void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp, ++ int may_shrink) ++{ ++ p = au_krealloc(p, new_sz, gfp, may_shrink); ++ if (p && new_sz > nused) + memset(p + nused, 0, new_sz - nused); + return p; +} @@ -21304,10 +21419,10 @@ index 0000000..01e55ab +module_exit(aufs_exit); diff --git a/fs/aufs/module.h b/fs/aufs/module.h new file mode 100644 -index 0000000..c81b221 +index 0000000..cb3ae30 --- /dev/null +++ b/fs/aufs/module.h -@@ -0,0 +1,144 @@ +@@ -0,0 +1,156 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -21348,7 +21463,19 @@ index 0000000..c81b221 + +extern int au_dir_roflags; + -+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp); ++void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink); ++void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp, ++ int may_shrink); ++ ++static inline int au_kmidx_sub(size_t sz, size_t new_sz) ++{ ++#ifndef CONFIG_SLOB ++ return kmalloc_index(sz) - kmalloc_index(new_sz); ++#else ++ return -1; /* SLOB is untested */ ++#endif ++} ++ +int au_seq_path(struct seq_file *seq, struct path *path); + +#ifdef CONFIG_PROC_FS @@ -25695,10 +25822,10 @@ index 0000000..678fe6f +#endif /* __AUFS_RWSEM_H__ */ diff --git a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c new file mode 100644 -index 0000000..3859f7f +index 0000000..0b7e93b --- /dev/null +++ b/fs/aufs/sbinfo.c -@@ -0,0 +1,354 @@ +@@ -0,0 +1,355 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -25832,7 +25959,7 @@ index 0000000..3859f7f + return err; +} + -+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr) ++int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink) +{ + int err, sz; + struct au_branch **brp; @@ -25843,7 +25970,8 @@ index 0000000..3859f7f + sz = sizeof(*brp) * (sbinfo->si_bbot + 1); + if (unlikely(!sz)) + sz = sizeof(*brp); -+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS); ++ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS, ++ may_shrink); + if (brp) { + sbinfo->si_branch = brp; + err = 0; @@ -27218,7 +27346,7 @@ index 0000000..093fba6 +}; diff --git a/fs/aufs/super.h b/fs/aufs/super.h new file mode 100644 -index 0000000..4e6e5ab +index 0000000..37ea182 --- /dev/null +++ b/fs/aufs/super.h @@ -0,0 +1,638 @@ @@ -27503,7 +27631,7 @@ index 0000000..4e6e5ab +/* sbinfo.c */ +void au_si_free(struct kobject *kobj); +int au_si_alloc(struct super_block *sb); -+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr); ++int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink); + +unsigned int au_sigen_inc(struct super_block *sb); +aufs_bindex_t au_new_br_id(struct super_block *sb); @@ -28624,10 +28752,10 @@ index 0000000..cbebb37 +} diff --git a/fs/aufs/vdir.c b/fs/aufs/vdir.c new file mode 100644 -index 0000000..1fe1c42 +index 0000000..1ca6760 --- /dev/null +++ b/fs/aufs/vdir.c -@@ -0,0 +1,899 @@ +@@ -0,0 +1,900 @@ +/* + * Copyright (C) 2005-2016 Junjiro R. Okajima + * @@ -28909,8 +29037,8 @@ index 0000000..1fe1c42 + unsigned char **o; + + err = -ENOMEM; -+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1), -+ GFP_NOFS); ++ o = au_krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1), ++ GFP_NOFS, /*may_shrink*/0); + if (unlikely(!o)) + goto out; + @@ -29333,8 +29461,8 @@ index 0000000..1fe1c42 + if (tgt->vd_nblk < src->vd_nblk) { + unsigned char **p; + -+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk, -+ GFP_NOFS); ++ p = au_krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk, ++ GFP_NOFS, /*may_shrink*/0); + if (unlikely(!p)) + goto out; + tgt->vd_deblk = p; @@ -29344,7 +29472,8 @@ index 0000000..1fe1c42 + unsigned char *p; + + tgt->vd_deblk_sz = deblk_sz; -+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS); ++ p = au_krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS, ++ /*may_shrink*/1); + if (unlikely(!p)) + goto out; + tgt->vd_deblk[0] = p; @@ -34663,10 +34792,10 @@ index 0000000..7f62beb + return err; +} diff --git a/fs/dcache.c b/fs/dcache.c -index 1ed81bb..34f4ea4 100644 +index 5c7cc95..76280ee 100644 --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -1205,7 +1205,7 @@ enum d_walk_ret { +@@ -1164,7 +1164,7 @@ enum d_walk_ret { * * The @enter() and @finish() callbacks are called with d_lock held. */ @@ -34675,7 +34804,7 @@ index 1ed81bb..34f4ea4 100644 enum d_walk_ret (*enter)(void *, struct dentry *), void (*finish)(void *)) { -@@ -1313,6 +1313,7 @@ rename_retry: +@@ -1272,6 +1272,7 @@ rename_retry: seq = 1; goto again; } @@ -34684,7 +34813,7 @@ index 1ed81bb..34f4ea4 100644 /* * Search for at least 1 mount point in the dentry's subdirs. diff --git a/fs/exec.c b/fs/exec.c -index 887c1c9..40e8767 100644 +index 6fcfb3f..ed9d646 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -104,6 +104,7 @@ bool path_noexec(const struct path *path) @@ -34762,10 +34891,10 @@ index ad17e05..ae9f267 100644 void __init files_init(void) { diff --git a/fs/inode.c b/fs/inode.c -index 9ea4219..ef8c6907 100644 +index 7e3ef3a..aa171ce 100644 --- a/fs/inode.c +++ b/fs/inode.c -@@ -851,6 +851,8 @@ unsigned int get_next_ino(void) +@@ -853,6 +853,8 @@ unsigned int get_next_ino(void) unsigned int *p = &get_cpu_var(last_ino); unsigned int res = *p; @@ -34774,7 +34903,7 @@ index 9ea4219..ef8c6907 100644 #ifdef CONFIG_SMP if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { static atomic_t shared_last_ino; -@@ -863,7 +865,7 @@ unsigned int get_next_ino(void) +@@ -865,7 +867,7 @@ unsigned int get_next_ino(void) res++; /* get_next_ino should not provide a 0 inode number */ if (unlikely(!res)) @@ -34783,7 +34912,7 @@ index 9ea4219..ef8c6907 100644 *p = res; put_cpu_var(last_ino); return res; -@@ -1591,7 +1593,7 @@ EXPORT_SYMBOL(generic_update_time); +@@ -1593,7 +1595,7 @@ EXPORT_SYMBOL(generic_update_time); * This does the actual work of updating an inodes time or version. Must have * had called mnt_want_write() before calling this. */ @@ -34792,7 +34921,7 @@ index 9ea4219..ef8c6907 100644 { int (*update_time)(struct inode *, struct timespec *, int); -@@ -1600,6 +1602,7 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) +@@ -1602,6 +1604,7 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) return update_time(inode, time, flags); } @@ -34801,7 +34930,7 @@ index 9ea4219..ef8c6907 100644 /** * touch_atime - update the access time diff --git a/fs/namespace.c b/fs/namespace.c -index 419f746..9c0e0af 100644 +index 7bb2cda..88ec098 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -463,6 +463,7 @@ void __mnt_drop_write(struct vfsmount *mnt) @@ -34821,7 +34950,7 @@ index 419f746..9c0e0af 100644 static void cleanup_group_ids(struct mount *mnt, struct mount *end) { diff --git a/fs/notify/group.c b/fs/notify/group.c -index 3e2dd85..b17cb4b 100644 +index b47f7cf..618bc9e 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -22,6 +22,7 @@ @@ -34832,7 +34961,7 @@ index 3e2dd85..b17cb4b 100644 #include #include "fsnotify.h" -@@ -81,6 +82,7 @@ void fsnotify_get_group(struct fsnotify_group *group) +@@ -100,6 +101,7 @@ void fsnotify_get_group(struct fsnotify_group *group) { atomic_inc(&group->refcnt); } @@ -34840,7 +34969,7 @@ index 3e2dd85..b17cb4b 100644 /* * Drop a reference to a group. Free it if it's through. -@@ -90,6 +92,7 @@ void fsnotify_put_group(struct fsnotify_group *group) +@@ -109,6 +111,7 @@ void fsnotify_put_group(struct fsnotify_group *group) if (atomic_dec_and_test(&group->refcnt)) fsnotify_final_destroy_group(group); } @@ -34848,7 +34977,7 @@ index 3e2dd85..b17cb4b 100644 /* * Create a new fsnotify_group and hold a reference for the group returned. -@@ -118,6 +121,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) +@@ -137,6 +140,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) return group; } @@ -34893,7 +35022,7 @@ index d3fea0b..5fc06ad 100644 /* * Destroy all marks in destroy_list, waits for SRCU period to finish before diff --git a/fs/open.c b/fs/open.c -index 93ae3cd..d25b9bd 100644 +index 4fd6e25..ec6f532 100644 --- a/fs/open.c +++ b/fs/open.c @@ -64,6 +64,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, @@ -34913,10 +35042,10 @@ index 93ae3cd..d25b9bd 100644 static int do_dentry_open(struct file *f, struct inode *inode, diff --git a/fs/proc/base.c b/fs/proc/base.c -index a11eb71..8f10865 100644 +index ac0df4d..42255e5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c -@@ -1939,7 +1939,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) +@@ -1938,7 +1938,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { @@ -34942,7 +35071,7 @@ index f8595e8..cb8eda0 100644 ino = inode->i_ino; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c -index 4648c7f..061cb85 100644 +index f6fa99e..2750949 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -298,7 +298,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) @@ -34957,7 +35086,7 @@ index 4648c7f..061cb85 100644 dev = inode->i_sb->s_dev; ino = inode->i_ino; pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; -@@ -1624,7 +1627,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) +@@ -1634,7 +1637,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct proc_maps_private *proc_priv = &numa_priv->proc_maps; struct vm_area_struct *vma = v; struct numa_maps *md = &numa_priv->md; @@ -34983,7 +35112,7 @@ index faacb0c..17b43be 100644 ino = inode->i_ino; pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; diff --git a/fs/read_write.c b/fs/read_write.c -index 933b53a..260c4a4 100644 +index 66215a7..c643215 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -515,6 +515,30 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count, @@ -35059,10 +35188,10 @@ index dd9bf7e..0606690 100644 /** * splice_direct_to_actor - splices data directly between two non-pipes diff --git a/fs/xattr.c b/fs/xattr.c -index 4beafc4..e118715 100644 +index c243905..b60dc60 100644 --- a/fs/xattr.c +++ b/fs/xattr.c -@@ -207,6 +207,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, +@@ -214,6 +214,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, *xattr_value = value; return error; } @@ -35083,10 +35212,10 @@ index 7444f5f..bdac0be 100644 static inline void fput_light(struct file *file, int fput_needed) { diff --git a/include/linux/fs.h b/include/linux/fs.h -index dd28814..b689a48 100644 +index 901e25d..a71aa9e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h -@@ -1306,6 +1306,7 @@ extern void fasync_free(struct fasync_struct *); +@@ -1275,6 +1275,7 @@ extern void fasync_free(struct fasync_struct *); /* can be called from interrupts */ extern void kill_fasync(struct fasync_struct **, int, int); @@ -35094,7 +35223,7 @@ index dd28814..b689a48 100644 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); extern void f_setown(struct file *filp, unsigned long arg, int force); extern void f_delown(struct file *filp); -@@ -1690,6 +1691,7 @@ struct file_operations { +@@ -1699,6 +1700,7 @@ struct file_operations { ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); @@ -35102,7 +35231,7 @@ index dd28814..b689a48 100644 int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); -@@ -1750,6 +1752,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, +@@ -1759,6 +1761,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, struct iovec *fast_pointer, struct iovec **ret_pointer); @@ -35115,7 +35244,7 @@ index dd28814..b689a48 100644 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); -@@ -2105,6 +2113,7 @@ extern int current_umask(void); +@@ -2123,6 +2131,7 @@ extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); extern int generic_update_time(struct inode *, struct timespec *, int); @@ -35124,10 +35253,10 @@ index dd28814..b689a48 100644 /* /sys/fs */ extern struct kobject *fs_kobj; diff --git a/include/linux/mm.h b/include/linux/mm.h -index ece042d..1e24513 100644 +index 277cd39..582d30b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h -@@ -1239,6 +1239,28 @@ static inline int fixup_user_fault(struct task_struct *tsk, +@@ -1278,6 +1278,28 @@ static inline int fixup_user_fault(struct task_struct *tsk, } #endif @@ -35157,10 +35286,10 @@ index ece042d..1e24513 100644 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, int write); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h -index ca3e517..10bc491 100644 +index 903200f..55fc528 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h -@@ -274,6 +274,7 @@ struct vm_region { +@@ -275,6 +275,7 @@ struct vm_region { unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ @@ -35168,7 +35297,7 @@ index ca3e517..10bc491 100644 int vm_usage; /* region usage count (access under nommu_region_sem) */ bool vm_icache_flushed : 1; /* true if the icache has been flushed for -@@ -348,6 +349,7 @@ struct vm_area_struct { +@@ -349,6 +350,7 @@ struct vm_area_struct { unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE units */ struct file * vm_file; /* File we map to (can be NULL). */ @@ -35192,7 +35321,7 @@ index da2751d..2e0fca6 100644 + unsigned int flags); #endif diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild -index ec10cfe..800211b 100644 +index 185f8ea..5deb0d1 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -59,6 +59,7 @@ header-y += atmsvc.h @@ -35205,7 +35334,7 @@ index ec10cfe..800211b 100644 header-y += auxvec.h diff --git a/include/uapi/linux/aufs_type.h b/include/uapi/linux/aufs_type.h new file mode 100644 -index 0000000..cacb35d +index 0000000..13a7beb --- /dev/null +++ b/include/uapi/linux/aufs_type.h @@ -0,0 +1,419 @@ @@ -35250,7 +35379,7 @@ index 0000000..cacb35d + +#include + -+#define AUFS_VERSION "4.7-20160822" ++#define AUFS_VERSION "4.8-20161010" + +/* todo? move this to linux-2.6.19/include/magic.h */ +#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's') @@ -35629,10 +35758,10 @@ index 0000000..cacb35d + +#endif /* __AUFS_TYPE_H__ */ diff --git a/kernel/fork.c b/kernel/fork.c -index 4a7ec0c..8c8f7ac 100644 +index beb3172..ad4cfa8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -479,7 +479,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) +@@ -477,7 +477,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) struct inode *inode = file_inode(file); struct address_space *mapping = file->f_mapping; @@ -35642,19 +35771,19 @@ index 4a7ec0c..8c8f7ac 100644 atomic_dec(&inode->i_writecount); i_mmap_lock_write(mapping); diff --git a/kernel/task_work.c b/kernel/task_work.c -index 53fa971..bce3211 100644 +index d513051..e056d54 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c -@@ -118,3 +118,4 @@ void task_work_run(void) +@@ -119,3 +119,4 @@ void task_work_run(void) } while (work); } } +EXPORT_SYMBOL_GPL(task_work_run); diff --git a/mm/Makefile b/mm/Makefile -index 78c6f7d..aea4230 100644 +index 2ca1faf..6b9da3f 100644 --- a/mm/Makefile +++ b/mm/Makefile -@@ -37,7 +37,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ +@@ -40,7 +40,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ @@ -35664,10 +35793,10 @@ index 78c6f7d..aea4230 100644 obj-y += init-mm.o diff --git a/mm/filemap.c b/mm/filemap.c -index 20f3b1f..ee827ce 100644 +index 2d0986a..4a31bad 100644 --- a/mm/filemap.c +++ b/mm/filemap.c -@@ -2208,7 +2208,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +@@ -2284,7 +2284,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) int ret = VM_FAULT_LOCKED; sb_start_pagefault(inode->i_sb); @@ -35677,10 +35806,10 @@ index 20f3b1f..ee827ce 100644 if (page->mapping != inode->i_mapping) { unlock_page(page); diff --git a/mm/memory.c b/mm/memory.c -index 9e04681..06980d1 100644 +index 793fe0f..45f39f3 100644 --- a/mm/memory.c +++ b/mm/memory.c -@@ -2100,7 +2100,7 @@ static inline int wp_page_reuse(struct mm_struct *mm, +@@ -2113,7 +2113,7 @@ static inline int wp_page_reuse(struct fault_env *fe, pte_t orig_pte, } if (!page_mkwrite) @@ -35690,10 +35819,10 @@ index 9e04681..06980d1 100644 return VM_FAULT_WRITE; diff --git a/mm/mmap.c b/mm/mmap.c -index de2c176..b7f391c 100644 +index ca9d91b..f3ebc5a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c -@@ -162,7 +162,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) +@@ -163,7 +163,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); if (vma->vm_file) @@ -35702,7 +35831,7 @@ index de2c176..b7f391c 100644 mpol_put(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); return next; -@@ -782,7 +782,7 @@ again: remove_next = 1 + (end > next->vm_end); +@@ -790,7 +790,7 @@ again: if (remove_next) { if (file) { uprobe_munmap(next, next->vm_start, next->vm_end); @@ -35711,7 +35840,7 @@ index de2c176..b7f391c 100644 } if (next->anon_vma) anon_vma_merge(vma, next); -@@ -1563,8 +1563,8 @@ out: +@@ -1574,8 +1574,8 @@ out: return addr; unmap_and_free_vma: @@ -35721,7 +35850,7 @@ index de2c176..b7f391c 100644 /* Undo any partial mapping done by a device driver. */ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); -@@ -2358,7 +2358,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, +@@ -2380,7 +2380,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, goto out_free_mpol; if (new->vm_file) @@ -35730,7 +35859,7 @@ index de2c176..b7f391c 100644 if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); -@@ -2377,7 +2377,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, +@@ -2399,7 +2399,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (new->vm_ops && new->vm_ops->close) new->vm_ops->close(new); if (new->vm_file) @@ -35739,7 +35868,7 @@ index de2c176..b7f391c 100644 unlink_anon_vmas(new); out_free_mpol: mpol_put(vma_policy(new)); -@@ -2528,7 +2528,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, +@@ -2550,7 +2550,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, struct vm_area_struct *vma; unsigned long populate = 0; unsigned long ret = -EINVAL; @@ -35748,7 +35877,7 @@ index de2c176..b7f391c 100644 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n", current->comm, current->pid); -@@ -2597,10 +2597,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, +@@ -2625,10 +2625,27 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, } } @@ -35777,7 +35906,7 @@ index de2c176..b7f391c 100644 out: up_write(&mm->mmap_sem); if (populate) -@@ -2873,7 +2890,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, +@@ -2903,7 +2920,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (anon_vma_clone(new_vma, vma)) goto out_free_mempol; if (new_vma->vm_file) @@ -35787,7 +35916,7 @@ index de2c176..b7f391c 100644 new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); diff --git a/mm/nommu.c b/mm/nommu.c -index c2e588802..c39edc4 100644 +index 95daf81..5086a29 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -644,7 +644,7 @@ static void __put_nommu_region(struct vm_region *region) @@ -35923,10 +36052,10 @@ index 0000000..b323b8a +} +#endif /* !CONFIG_MMU */ diff --git a/security/commoncap.c b/security/commoncap.c -index e7fadde..6423e53 100644 +index 14540bd..4e3b242 100644 --- a/security/commoncap.c +++ b/security/commoncap.c -@@ -1058,12 +1058,14 @@ int cap_mmap_addr(unsigned long addr) +@@ -1066,12 +1066,14 @@ int cap_mmap_addr(unsigned long addr) } return ret; } @@ -35962,7 +36091,7 @@ index 03c1652..f88c84b 100644 int devcgroup_inode_mknod(int mode, dev_t dev) { diff --git a/security/security.c b/security/security.c -index 7095693..da7fe2c 100644 +index 4838e7f..36c741e 100644 --- a/security/security.c +++ b/security/security.c @@ -434,6 +434,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry) @@ -36029,7 +36158,7 @@ index 7095693..da7fe2c 100644 int security_inode_setattr(struct dentry *dentry, struct iattr *attr) { -@@ -737,6 +745,7 @@ int security_file_permission(struct file *file, int mask) +@@ -758,6 +766,7 @@ int security_file_permission(struct file *file, int mask) return fsnotify_perm(file, mask); } @@ -36037,7 +36166,7 @@ index 7095693..da7fe2c 100644 int security_file_alloc(struct file *file) { -@@ -796,6 +805,7 @@ int security_mmap_file(struct file *file, unsigned long prot, +@@ -817,6 +826,7 @@ int security_mmap_file(struct file *file, unsigned long prot, return ret; return ima_file_mmap(file, prot); } diff --git a/kernel-i586.config b/kernel-i586.config index 5608066..30f8490 100644 --- a/kernel-i586.config +++ b/kernel-i586.config @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 4.7.x-nrj-desktop Kernel Configuration +# Linux/x86 4.8.x-nrj-desktop Kernel Configuration # # CONFIG_64BIT is not set CONFIG_X86_32=y @@ -106,7 +106,7 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PREEMPT_RCU=y # CONFIG_RCU_EXPERT is not set CONFIG_SRCU=y -CONFIG_TASKS_RCU=y +# CONFIG_TASKS_RCU is not set CONFIG_RCU_STALL_COMMON=y # CONFIG_TREE_RCU_TRACE is not set # CONFIG_RCU_EXPEDITE_BOOT is not set @@ -201,6 +201,7 @@ CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set +# CONFIG_SLAB_FREELIST_RANDOM is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SYSTEM_DATA_VERIFICATION is not set CONFIG_PROFILING=y @@ -218,6 +219,7 @@ CONFIG_KRETPROBES=y CONFIG_USER_RETURN_NOTIFIER=y CONFIG_GENERIC_SMP_IDLE_THREAD=y CONFIG_SECCOMP_FILTER=y +# CONFIG_GCC_PLUGINS is not set # CONFIG_CC_STACKPROTECTOR is not set CONFIG_CC_STACKPROTECTOR_NONE=y # CONFIG_CC_STACKPROTECTOR_REGULAR is not set @@ -576,9 +578,12 @@ CONFIG_ACPI_APEI_PCIEAER=y CONFIG_ACPI_APEI_MEMORY_FAILURE=y CONFIG_ACPI_APEI_EINJ=m CONFIG_ACPI_APEI_ERST_DEBUG=m +CONFIG_DPTF_POWER=m CONFIG_ACPI_EXTLOG=m CONFIG_PMIC_OPREGION=y CONFIG_CRC_PMIC_OPREGION=y +# CONFIG_BXT_WC_PMIC_OPREGION is not set +CONFIG_ACPI_CONFIGFS=m CONFIG_SFI=y CONFIG_X86_APM_BOOT=y CONFIG_APM=y @@ -594,8 +599,7 @@ CONFIG_APM=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_ATTR_SET=y CONFIG_CPU_FREQ_GOV_COMMON=y -CONFIG_CPU_FREQ_STAT=m -CONFIG_CPU_FREQ_STAT_DETAILS=y +# CONFIG_CPU_FREQ_STAT is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set @@ -670,7 +674,7 @@ CONFIG_PCIEASPM_DEFAULT=y # CONFIG_PCIEASPM_POWERSAVE is not set # CONFIG_PCIEASPM_PERFORMANCE is not set CONFIG_PCIE_PME=y -CONFIG_PCIE_DPC=m +# CONFIG_PCIE_DPC is not set CONFIG_PCI_BUS_ADDR_T_64BIT=y CONFIG_PCI_MSI=y CONFIG_PCI_MSI_IRQ_DOMAIN=y @@ -735,6 +739,7 @@ CONFIG_RAPIDIO_DISC_TIMEOUT=30 CONFIG_RAPIDIO_DMA_ENGINE=y # CONFIG_RAPIDIO_DEBUG is not set CONFIG_RAPIDIO_ENUM_BASIC=m +CONFIG_RAPIDIO_CHMAN=m CONFIG_RAPIDIO_MPORT_CDEV=m # @@ -744,6 +749,7 @@ CONFIG_RAPIDIO_TSI57X=y CONFIG_RAPIDIO_CPS_XX=y CONFIG_RAPIDIO_TSI568=y CONFIG_RAPIDIO_CPS_GEN2=y +CONFIG_RAPIDIO_RXS_GEN3=m # CONFIG_X86_SYSFB is not set # @@ -823,6 +829,7 @@ CONFIG_TCP_CONG_HTCP=m CONFIG_TCP_CONG_HSTCP=m CONFIG_TCP_CONG_HYBLA=m CONFIG_TCP_CONG_VEGAS=m +CONFIG_TCP_CONG_NV=m CONFIG_TCP_CONG_SCALABLE=m CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m @@ -1341,6 +1348,7 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 CONFIG_NET_EMATCH_CMP=m @@ -1378,6 +1386,8 @@ CONFIG_OPENVSWITCH_VXLAN=m CONFIG_OPENVSWITCH_GENEVE=m CONFIG_VSOCKETS=m CONFIG_VMWARE_VMCI_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS_COMMON=m CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m @@ -1386,6 +1396,7 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_HSR=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_L3_MASTER_DEV=y +# CONFIG_NET_NCSI is not set CONFIG_RPS=y CONFIG_RFS_ACCEL=y CONFIG_XPS=y @@ -1836,6 +1847,7 @@ CONFIG_MTD_NAND_CS553X=m CONFIG_MTD_NAND_NANDSIM=m CONFIG_MTD_NAND_PLATFORM=m CONFIG_MTD_NAND_HISI504=m +CONFIG_MTD_NAND_MTK=m CONFIG_MTD_ONENAND=m CONFIG_MTD_ONENAND_VERIFY_WRITE=y CONFIG_MTD_ONENAND_GENERIC=m @@ -2043,7 +2055,7 @@ CONFIG_VMWARE_VMCI=m # CONFIG_ECHO=m # CONFIG_CXL_BASE is not set -# CONFIG_CXL_KERNEL_API is not set +# CONFIG_CXL_AFU_DRIVER_OPS is not set # CONFIG_CXL_EEH is not set CONFIG_IDE=m @@ -2217,7 +2229,9 @@ CONFIG_SCSI_MPT3SAS_MAX_SGE=128 CONFIG_SCSI_MPT2SAS=m CONFIG_SCSI_UFSHCD=m CONFIG_SCSI_UFSHCD_PCI=m +CONFIG_SCSI_UFS_DWC_TC_PCI=m CONFIG_SCSI_UFSHCD_PLATFORM=m +CONFIG_SCSI_UFS_DWC_TC_PLATFORM=m CONFIG_SCSI_HPTIOP=m CONFIG_SCSI_BUSLOGIC=m CONFIG_SCSI_FLASHPOINT=y @@ -2514,6 +2528,7 @@ CONFIG_CAIF_HSI=m CONFIG_CAIF_VIRTIO=m CONFIG_VHOST_NET=m CONFIG_VHOST_SCSI=m +CONFIG_VHOST_VSOCK=m CONFIG_VHOST_RING=m CONFIG_VHOST=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set @@ -2522,8 +2537,13 @@ CONFIG_VHOST=m # Distributed Switch Architecture drivers # CONFIG_NET_DSA_MV88E6060=m -CONFIG_NET_DSA_MV88E6XXX=m CONFIG_NET_DSA_BCM_SF2=m +CONFIG_B53=m +CONFIG_B53_SPI_DRIVER=m +CONFIG_B53_MDIO_DRIVER=m +CONFIG_B53_MMAP_DRIVER=m +CONFIG_B53_SRAB_DRIVER=m +CONFIG_NET_DSA_MV88E6XXX=m CONFIG_ETHERNET=y CONFIG_MDIO=m CONFIG_NET_VENDOR_3COM=y @@ -2585,6 +2605,7 @@ CONFIG_CHELSIO_T4_DCB=y CONFIG_CHELSIO_T4_UWIRE=y CONFIG_CHELSIO_T4_FCOE=y CONFIG_CHELSIO_T4VF=m +CONFIG_CHELSIO_LIB=m CONFIG_NET_VENDOR_CIRRUS=y CONFIG_CS89x0=m # CONFIG_CS89x0_PLATFORM is not set @@ -2812,6 +2833,7 @@ CONFIG_FIXED_PHY=m CONFIG_MDIO_BITBANG=m CONFIG_MDIO_GPIO=m CONFIG_MDIO_BCM_UNIMAC=m +CONFIG_INTEL_XWAY_PHY=m CONFIG_MICREL_KS8995MA=m CONFIG_PLIP=m CONFIG_PPP=m @@ -3382,6 +3404,7 @@ CONFIG_TABLET_USB_AIPTEK=m CONFIG_TABLET_USB_GTCO=m CONFIG_TABLET_USB_HANWANG=m CONFIG_TABLET_USB_KBTAB=m +CONFIG_TABLET_USB_PEGASUS=m CONFIG_TABLET_SERIAL_WACOM4=m CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_PROPERTIES=y @@ -3463,8 +3486,12 @@ CONFIG_TOUCHSCREEN_TSC2004=m CONFIG_TOUCHSCREEN_TSC2005=m CONFIG_TOUCHSCREEN_TSC2007=m CONFIG_TOUCHSCREEN_PCAP=m +CONFIG_TOUCHSCREEN_RM_TS=m +CONFIG_TOUCHSCREEN_SILEAD=m +CONFIG_TOUCHSCREEN_SIS_I2C=m CONFIG_TOUCHSCREEN_ST1232=m CONFIG_TOUCHSCREEN_SUR40=m +CONFIG_TOUCHSCREEN_SURFACE3_SPI=m CONFIG_TOUCHSCREEN_SX8654=m CONFIG_TOUCHSCREEN_TPS6507X=m CONFIG_TOUCHSCREEN_ZFORCE=m @@ -3688,7 +3715,9 @@ CONFIG_HPET_MMAP=y CONFIG_HPET_MMAP_DEFAULT=y CONFIG_HANGCHECK_TIMER=m CONFIG_TCG_TPM=y +CONFIG_TCG_TIS_CORE=y CONFIG_TCG_TIS=y +CONFIG_TCG_TIS_SPI=m CONFIG_TCG_TIS_I2C_ATMEL=m CONFIG_TCG_TIS_I2C_INFINEON=m CONFIG_TCG_TIS_I2C_NUVOTON=m @@ -3696,6 +3725,7 @@ CONFIG_TCG_NSC=m CONFIG_TCG_ATMEL=m CONFIG_TCG_INFINEON=m CONFIG_TCG_CRB=m +CONFIG_TCG_VTPM_PROXY=m CONFIG_TCG_TIS_ST33ZP24=m CONFIG_TCG_TIS_ST33ZP24_I2C=m CONFIG_TCG_TIS_ST33ZP24_SPI=m @@ -4106,6 +4136,7 @@ CONFIG_SENSORS_F71882FG=m CONFIG_SENSORS_F75375S=m CONFIG_SENSORS_MC13783_ADC=m CONFIG_SENSORS_FSCHMD=m +CONFIG_SENSORS_FTSTEUTATES=m CONFIG_SENSORS_GL518SM=m CONFIG_SENSORS_GL520SM=m # CONFIG_SENSORS_G760A is not set @@ -4184,6 +4215,7 @@ CONFIG_SENSORS_UCD9200=m CONFIG_SENSORS_ZL6100=m # CONFIG_SENSORS_SHT15 is not set # CONFIG_SENSORS_SHT21 is not set +CONFIG_SENSORS_SHT3x=m CONFIG_SENSORS_SHTC1=m CONFIG_SENSORS_SIS5595=m CONFIG_SENSORS_DME1737=m @@ -4204,6 +4236,7 @@ CONFIG_SENSORS_ADS7871=m # CONFIG_SENSORS_AMC6821 is not set CONFIG_SENSORS_INA209=m # CONFIG_SENSORS_INA2XX is not set +CONFIG_SENSORS_INA3221=m CONFIG_SENSORS_TC74=m CONFIG_SENSORS_THMC50=m CONFIG_SENSORS_TMP102=m @@ -4373,6 +4406,7 @@ CONFIG_BCMA_HOST_PCI_POSSIBLE=y CONFIG_BCMA_HOST_PCI=y CONFIG_BCMA_HOST_SOC=y CONFIG_BCMA_DRIVER_PCI=y +CONFIG_BCMA_SFLASH=y CONFIG_BCMA_DRIVER_GMAC_CMN=y CONFIG_BCMA_DRIVER_GPIO=y # CONFIG_BCMA_DEBUG is not set @@ -5111,6 +5145,8 @@ CONFIG_DVB_M88DS3103=m CONFIG_DVB_DRXK=m CONFIG_DVB_TDA18271C2DD=m CONFIG_DVB_SI2165=m +CONFIG_DVB_MN88472=m +CONFIG_DVB_MN88473=m # # DVB-S (satellite) frontends @@ -5236,6 +5272,7 @@ CONFIG_DVB_M88RS2000=m CONFIG_DVB_AF9033=m CONFIG_DVB_HORUS3A=m CONFIG_DVB_ASCOT2E=m +CONFIG_DVB_HELENE=m # # Tools to develop new frontends @@ -5297,6 +5334,7 @@ CONFIG_DRM_NOUVEAU_BACKLIGHT=y CONFIG_DRM_I915=m # CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT is not set CONFIG_DRM_I915_USERPTR=y +# CONFIG_DRM_I915_GVT is not set # # drm/i915 Debugging @@ -5767,9 +5805,14 @@ CONFIG_SND_SST_MFLD_PLATFORM=m CONFIG_SND_SST_IPC=m CONFIG_SND_SST_IPC_ACPI=m CONFIG_SND_SOC_INTEL_SST=m +CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m CONFIG_SND_SOC_INTEL_SST_ACPI=m CONFIG_SND_SOC_INTEL_SST_MATCH=m +CONFIG_SND_SOC_INTEL_HASWELL=m +CONFIG_SND_SOC_INTEL_HASWELL_MACH=m +CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m +CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m @@ -5784,6 +5827,7 @@ CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m # Allwinner SoC Audio support # CONFIG_SND_SUN4I_CODEC=m +CONFIG_SND_SUN4I_I2S=m CONFIG_SND_SOC_XTFPGA_I2S=m CONFIG_SND_SOC_I2C_AND_SPI=m @@ -5792,13 +5836,16 @@ CONFIG_SND_SOC_I2C_AND_SPI=m # CONFIG_SND_SOC_AC97_CODEC=m CONFIG_SND_SOC_ADAU1701=m +CONFIG_SND_SOC_ADAU7002=m CONFIG_SND_SOC_AK4104=m CONFIG_SND_SOC_AK4554=m CONFIG_SND_SOC_AK4613=m CONFIG_SND_SOC_AK4642=m CONFIG_SND_SOC_AK5386=m CONFIG_SND_SOC_ALC5623=m +CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CS35L32=m +CONFIG_SND_SOC_CS35L33=m CONFIG_SND_SOC_CS42L51=m CONFIG_SND_SOC_CS42L51_I2C=m CONFIG_SND_SOC_CS42L52=m @@ -5812,6 +5859,8 @@ CONFIG_SND_SOC_CS4271_SPI=m CONFIG_SND_SOC_CS42XX8=m CONFIG_SND_SOC_CS42XX8_I2C=m CONFIG_SND_SOC_CS4349=m +CONFIG_SND_SOC_CS53L30=m +CONFIG_SND_SOC_DA7219=m CONFIG_SND_SOC_DMIC=m CONFIG_SND_SOC_ES8328=m CONFIG_SND_SOC_GTM601=m @@ -5819,6 +5868,8 @@ CONFIG_SND_SOC_HDAC_HDMI=m CONFIG_SND_SOC_INNO_RK3036=m CONFIG_SND_SOC_MAX98090=m CONFIG_SND_SOC_MAX98357A=m +CONFIG_SND_SOC_MAX98504=m +CONFIG_SND_SOC_MAX9860=m CONFIG_SND_SOC_PCM1681=m CONFIG_SND_SOC_PCM179X=m CONFIG_SND_SOC_PCM179X_I2C=m @@ -5883,8 +5934,10 @@ CONFIG_SND_SOC_WM8960=m CONFIG_SND_SOC_WM8962=m CONFIG_SND_SOC_WM8974=m CONFIG_SND_SOC_WM8978=m +CONFIG_SND_SOC_WM8985=m CONFIG_SND_SOC_NAU8825=m CONFIG_SND_SOC_TPA6130A2=m +CONFIG_SND_SIMPLE_CARD_UTILS=m CONFIG_SND_SIMPLE_CARD=m CONFIG_SOUND_PRIME=m # CONFIG_SOUND_MSNDCLAS is not set @@ -5955,6 +6008,7 @@ CONFIG_HID_ICADE=m CONFIG_HID_TWINHAN=m CONFIG_HID_KENSINGTON=m CONFIG_HID_LCPOWER=m +CONFIG_HID_LED=m CONFIG_HID_LENOVO=m CONFIG_HID_LOGITECH=m CONFIG_HID_LOGITECH_DJ=m @@ -6008,6 +6062,7 @@ CONFIG_ZEROPLUS_FF=y CONFIG_HID_ZYDACRON=m CONFIG_HID_SENSOR_HUB=m # CONFIG_HID_SENSOR_CUSTOM_SENSOR is not set +CONFIG_HID_ALPS=m # # USB HID support @@ -6341,6 +6396,7 @@ CONFIG_LEDS_WRAP=m # CONFIG_LEDS_PCA9532 is not set # CONFIG_LEDS_GPIO is not set # CONFIG_LEDS_LP3944 is not set +CONFIG_LEDS_LP3952=m CONFIG_LEDS_LP55XX_COMMON=m CONFIG_LEDS_LP5521=m CONFIG_LEDS_LP5523=m @@ -6382,7 +6438,7 @@ CONFIG_LEDS_BLINKM=m CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_ONESHOT=m -CONFIG_LEDS_TRIGGER_IDE_DISK=y +# CONFIG_LEDS_TRIGGER_DISK is not set # CONFIG_LEDS_TRIGGER_MTD is not set CONFIG_LEDS_TRIGGER_HEARTBEAT=m CONFIG_LEDS_TRIGGER_BACKLIGHT=m @@ -6424,6 +6480,7 @@ CONFIG_EDAC_I5000=m CONFIG_EDAC_I5100=m CONFIG_EDAC_I7300=m CONFIG_RTC_LIB=y +CONFIG_RTC_MC146818_LIB=y CONFIG_RTC_CLASS=y CONFIG_RTC_HCTOSYS=y CONFIG_RTC_HCTOSYS_DEVICE="rtc0" @@ -6495,6 +6552,7 @@ CONFIG_RTC_DRV_DS1305=m CONFIG_RTC_DRV_DS1343=m CONFIG_RTC_DRV_DS1347=m CONFIG_RTC_DRV_DS1390=m +CONFIG_RTC_DRV_MAX6916=m CONFIG_RTC_DRV_R9701=m CONFIG_RTC_DRV_RX4581=m CONFIG_RTC_DRV_RX6110=m @@ -6795,7 +6853,6 @@ CONFIG_ADIS16201=m CONFIG_ADIS16203=m CONFIG_ADIS16209=m CONFIG_ADIS16240=m -CONFIG_LIS3L02DQ=m CONFIG_SCA3000=m # @@ -6892,8 +6949,8 @@ CONFIG_SPEAKUP_SYNTH_TXPRT=m CONFIG_SPEAKUP_SYNTH_DUMMY=m CONFIG_STAGING_MEDIA=y CONFIG_I2C_BCM2048=m +# CONFIG_MEDIA_CEC is not set CONFIG_DVB_CXD2099=m -CONFIG_DVB_MN88472=m CONFIG_LIRC_STAGING=y CONFIG_LIRC_BT829=m CONFIG_LIRC_IMON=m @@ -6943,6 +7000,7 @@ CONFIG_HDM_USB=m CONFIG_ISDN_DRV_ICN=m CONFIG_ISDN_DRV_PCBIT=m CONFIG_ISDN_DRV_ACT2000=m +CONFIG_KS7010=m CONFIG_X86_PLATFORM_DEVICES=y CONFIG_ACER_WMI=m CONFIG_ACERHDF=m @@ -6991,6 +7049,7 @@ CONFIG_TOSHIBA_HAPS=m CONFIG_TOSHIBA_WMI=m CONFIG_ACPI_CMPC=m CONFIG_INTEL_HID_EVENT=m +CONFIG_INTEL_VBTN=m CONFIG_INTEL_IPS=m # CONFIG_INTEL_PMC_CORE is not set CONFIG_IBM_RTL=m @@ -7029,7 +7088,7 @@ CONFIG_COMMON_CLK_PALMAS=m CONFIG_COMMON_CLK_PWM=m # CONFIG_COMMON_CLK_PXA is not set # CONFIG_COMMON_CLK_PIC32 is not set -# CONFIG_COMMON_CLK_OXNAS is not set +# CONFIG_SUNXI_CCU is not set # # Hardware Spinlock drivers @@ -7074,6 +7133,10 @@ CONFIG_STE_MODEM_RPROC=m # # SOC (System On Chip) specific Drivers # + +# +# Broadcom SoC drivers +# # CONFIG_SUNXI_SRAM is not set CONFIG_SOC_TI=y CONFIG_PM_DEVFREQ=y @@ -7117,6 +7180,7 @@ CONFIG_IIO_TRIGGERED_BUFFER=m CONFIG_IIO_CONFIGFS=m CONFIG_IIO_TRIGGER=y CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 +CONFIG_IIO_SW_DEVICE=m CONFIG_IIO_SW_TRIGGER=m CONFIG_IIO_TRIGGERED_EVENT=m @@ -7124,6 +7188,7 @@ CONFIG_IIO_TRIGGERED_EVENT=m # Accelerometers # CONFIG_BMA180=m +CONFIG_BMA220=m CONFIG_BMC150_ACCEL=m CONFIG_BMC150_ACCEL_I2C=m CONFIG_BMC150_ACCEL_SPI=m @@ -7136,6 +7201,7 @@ CONFIG_KXCJK1013=m CONFIG_MMA7455=m CONFIG_MMA7455_I2C=m CONFIG_MMA7455_SPI=m +CONFIG_MMA7660=m CONFIG_MMA8452=m # CONFIG_MMA9551 is not set # CONFIG_MMA9553 is not set @@ -7367,12 +7433,14 @@ CONFIG_HID_SENSOR_DEVICE_ROTATION=m # CONFIG_IIO_HRTIMER_TRIGGER=m CONFIG_IIO_INTERRUPT_TRIGGER=m +CONFIG_IIO_TIGHTLOOP_TRIGGER=m CONFIG_IIO_SYSFS_TRIGGER=m # # Digital potentiometers # CONFIG_DS1803=m +CONFIG_MAX5487=m CONFIG_MCP4131=m CONFIG_MCP4531=m CONFIG_TPL0102=m @@ -7438,6 +7506,7 @@ CONFIG_VME_PIO2=m CONFIG_PWM=y CONFIG_PWM_SYSFS=y CONFIG_PWM_CRC=y +CONFIG_PWM_CROS_EC=m CONFIG_PWM_LP3943=m CONFIG_PWM_LPSS=m CONFIG_PWM_LPSS_PCI=m @@ -7450,6 +7519,7 @@ CONFIG_IPACK_BUS=m # CONFIG_BOARD_TPCI200 is not set # CONFIG_SERIAL_IPOCTAL is not set CONFIG_RESET_CONTROLLER=y +CONFIG_TI_SYSCON_RESET=m CONFIG_FMC=m CONFIG_FMC_FAKEDEV=m CONFIG_FMC_TRIVIAL=m @@ -7554,8 +7624,7 @@ CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=m -CONFIG_EXT4_FS_ENCRYPTION=y +# CONFIG_EXT4_ENCRYPTION is not set # CONFIG_EXT4_DEBUG is not set CONFIG_JBD2=y # CONFIG_JBD2_DEBUG is not set @@ -7604,6 +7673,7 @@ CONFIG_F2FS_FS_SECURITY=y CONFIG_FS_DAX=y CONFIG_FS_POSIX_ACL=y CONFIG_EXPORTFS=y +# CONFIG_EXPORTFS_BLOCK_OPS is not set CONFIG_FILE_LOCKING=y CONFIG_MANDATORY_FILE_LOCKING=y CONFIG_FS_ENCRYPTION=m @@ -7743,6 +7813,9 @@ CONFIG_ROMFS_BACKED_BY_BOTH=y CONFIG_ROMFS_ON_BLOCK=y CONFIG_ROMFS_ON_MTD=y CONFIG_PSTORE=y +CONFIG_PSTORE_ZLIB_COMPRESS=y +# CONFIG_PSTORE_LZO_COMPRESS is not set +# CONFIG_PSTORE_LZ4_COMPRESS is not set # CONFIG_PSTORE_CONSOLE is not set # CONFIG_PSTORE_PMSG is not set # CONFIG_PSTORE_FTRACE is not set @@ -7791,6 +7864,7 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y # CONFIG_NFSD_BLOCKLAYOUT is not set # CONFIG_NFSD_SCSILAYOUT is not set +# CONFIG_NFSD_FLEXFILELAYOUT is not set # CONFIG_NFSD_V4_SECURITY_LABEL is not set # CONFIG_NFSD_FAULT_INJECTION is not set CONFIG_GRACE_PERIOD=m @@ -7817,8 +7891,8 @@ CONFIG_CIFS_DEBUG=y # CONFIG_CIFS_DEBUG2 is not set CONFIG_CIFS_DFS_UPCALL=y CONFIG_CIFS_SMB2=y -CONFIG_CIFS_FSCACHE=y # CONFIG_CIFS_SMB311 is not set +CONFIG_CIFS_FSCACHE=y CONFIG_NCP_FS=m CONFIG_NCPFS_PACKET_SIGNING=y # CONFIG_NCPFS_IOCTL_LOCKING is not set @@ -8132,6 +8206,7 @@ CONFIG_SECURITY_NETWORK_XFRM=y CONFIG_SECURITY_PATH=y CONFIG_INTEL_TXT=y CONFIG_LSM_MMAP_MIN_ADDR=65536 +# CONFIG_HARDENED_USERCOPY is not set CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -8190,7 +8265,11 @@ CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_RNG_DEFAULT=m CONFIG_CRYPTO_AKCIPHER2=y CONFIG_CRYPTO_AKCIPHER=m +CONFIG_CRYPTO_KPP2=y +CONFIG_CRYPTO_KPP=m CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y CONFIG_CRYPTO_USER=m @@ -8256,6 +8335,7 @@ CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m diff --git a/kernel-nrj-desktop-i586.config b/kernel-nrj-desktop-i586.config index 2b087af..36aded3 100644 --- a/kernel-nrj-desktop-i586.config +++ b/kernel-nrj-desktop-i586.config @@ -135,3 +135,8 @@ CONFIG_INFINIBAND_ISERT=m CONFIG_STAGING_RDMA=m CONFIG_LNET_XPRT_IB=m CONFIG_SUNRPC_XPRT_RDMA=m +CONFIG_NVME_RDMA=m +CONFIG_NVME_TARGET=m +CONFIG_NVME_TARGET_LOOP=m +CONFIG_NVME_TARGET_RDMA=m +CONFIG_RDMA_RXE=m \ No newline at end of file diff --git a/kernel-nrj-desktop-x86_64.config b/kernel-nrj-desktop-x86_64.config index 9b5435e..49e6978 100644 --- a/kernel-nrj-desktop-x86_64.config +++ b/kernel-nrj-desktop-x86_64.config @@ -129,3 +129,8 @@ CONFIG_ISDN_DIVAS_PRIPCI=y CONFIG_ISDN_DIVAS_DIVACAPI=m CONFIG_ISDN_DIVAS_USERIDI=m CONFIG_ISDN_DIVAS_MAINT=m +CONFIG_NVME_RDMA=m +CONFIG_NVME_TARGET=m +CONFIG_NVME_TARGET_LOOP=m +CONFIG_NVME_TARGET_RDMA=m +CONFIG_RDMA_RXE=m diff --git a/kernel-nrj-laptop-i586.config b/kernel-nrj-laptop-i586.config index d1702be..a37581a 100644 --- a/kernel-nrj-laptop-i586.config +++ b/kernel-nrj-laptop-i586.config @@ -11,3 +11,6 @@ CONFIG_HZ=300 # CONFIG_CAPI_AVM is not set # CONFIG_CAPI_EICON is not set # CONFIG_INFINIBAND is not set +# CONFIG_NVME_RDMA is not set +# CONFIG_NVME_TARGET is not set +# CONFIG_RDMA_RXE is not set diff --git a/kernel-nrj-laptop-x86_64.config b/kernel-nrj-laptop-x86_64.config index 5b69945..7b43968 100644 --- a/kernel-nrj-laptop-x86_64.config +++ b/kernel-nrj-laptop-x86_64.config @@ -11,3 +11,6 @@ CONFIG_HZ=300 # CONFIG_WAN is not set # CONFIG_CAPI_AVM is not set # CONFIG_CAPI_EICON is not set +# CONFIG_NVME_RDMA is not set +# CONFIG_NVME_TARGET is not set +# CONFIG_RDMA_RXE is not set diff --git a/kernel-x86_64.config b/kernel-x86_64.config index b797099..a142e41 100644 --- a/kernel-x86_64.config +++ b/kernel-x86_64.config @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 4.7.x-nrj-desktop Kernel Configuration +# Linux/x86 4.8.x-nrj-desktop Kernel Configuration # CONFIG_64BIT=y CONFIG_X86_64=y @@ -110,7 +110,7 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PREEMPT_RCU=y # CONFIG_RCU_EXPERT is not set CONFIG_SRCU=y -CONFIG_TASKS_RCU=y +# CONFIG_TASKS_RCU is not set CONFIG_RCU_STALL_COMMON=y # CONFIG_TREE_RCU_TRACE is not set # CONFIG_RCU_EXPEDITE_BOOT is not set @@ -206,6 +206,7 @@ CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set +# CONFIG_SLAB_FREELIST_RANDOM is not set CONFIG_SLUB_CPU_PARTIAL=y # CONFIG_SYSTEM_DATA_VERIFICATION is not set CONFIG_PROFILING=y @@ -223,6 +224,7 @@ CONFIG_KRETPROBES=y CONFIG_USER_RETURN_NOTIFIER=y CONFIG_GENERIC_SMP_IDLE_THREAD=y CONFIG_SECCOMP_FILTER=y +# CONFIG_GCC_PLUGINS is not set # CONFIG_CC_STACKPROTECTOR is not set CONFIG_CC_STACKPROTECTOR_NONE=y # CONFIG_CC_STACKPROTECTOR_REGULAR is not set @@ -492,6 +494,7 @@ CONFIG_RANDOMIZE_BASE=y CONFIG_RANDOMIZE_BASE_MAX_OFFSET=0x40000000 CONFIG_X86_NEED_RELOCS=y CONFIG_PHYSICAL_ALIGN=0x1000000 +# CONFIG_RANDOMIZE_MEMORY is not set CONFIG_HOTPLUG_CPU=y # CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set # CONFIG_DEBUG_HOTPLUG_CPU0 is not set @@ -573,9 +576,12 @@ CONFIG_ACPI_APEI_PCIEAER=y CONFIG_ACPI_APEI_MEMORY_FAILURE=y CONFIG_ACPI_APEI_EINJ=m # CONFIG_ACPI_APEI_ERST_DEBUG is not set +CONFIG_DPTF_POWER=m CONFIG_ACPI_EXTLOG=m CONFIG_PMIC_OPREGION=y CONFIG_CRC_PMIC_OPREGION=y +# CONFIG_BXT_WC_PMIC_OPREGION is not set +CONFIG_ACPI_CONFIGFS=m CONFIG_SFI=y # @@ -584,8 +590,7 @@ CONFIG_SFI=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_ATTR_SET=y CONFIG_CPU_FREQ_GOV_COMMON=y -CONFIG_CPU_FREQ_STAT=m -CONFIG_CPU_FREQ_STAT_DETAILS=y +# CONFIG_CPU_FREQ_STAT is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set @@ -650,7 +655,7 @@ CONFIG_PCIEASPM_DEFAULT=y # CONFIG_PCIEASPM_POWERSAVE is not set # CONFIG_PCIEASPM_PERFORMANCE is not set CONFIG_PCIE_PME=y -CONFIG_PCIE_DPC=m +# CONFIG_PCIE_DPC is not set CONFIG_PCI_BUS_ADDR_T_64BIT=y CONFIG_PCI_MSI=y CONFIG_PCI_MSI_IRQ_DOMAIN=y @@ -704,6 +709,7 @@ CONFIG_RAPIDIO_DISC_TIMEOUT=30 CONFIG_RAPIDIO_DMA_ENGINE=y # CONFIG_RAPIDIO_DEBUG is not set CONFIG_RAPIDIO_ENUM_BASIC=m +CONFIG_RAPIDIO_CHMAN=m CONFIG_RAPIDIO_MPORT_CDEV=m # @@ -713,6 +719,7 @@ CONFIG_RAPIDIO_TSI57X=y CONFIG_RAPIDIO_CPS_XX=y CONFIG_RAPIDIO_TSI568=y CONFIG_RAPIDIO_CPS_GEN2=y +CONFIG_RAPIDIO_RXS_GEN3=m # CONFIG_X86_SYSFB is not set # @@ -799,6 +806,7 @@ CONFIG_TCP_CONG_HTCP=m CONFIG_TCP_CONG_HSTCP=m CONFIG_TCP_CONG_HYBLA=m CONFIG_TCP_CONG_VEGAS=m +CONFIG_TCP_CONG_NV=m CONFIG_TCP_CONG_SCALABLE=m CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m @@ -1313,6 +1321,7 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 CONFIG_NET_EMATCH_CMP=m @@ -1350,6 +1359,8 @@ CONFIG_OPENVSWITCH_VXLAN=m CONFIG_OPENVSWITCH_GENEVE=m CONFIG_VSOCKETS=m CONFIG_VMWARE_VMCI_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS_COMMON=m CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=y @@ -1358,6 +1369,7 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_HSR=m CONFIG_NET_SWITCHDEV=y CONFIG_NET_L3_MASTER_DEV=y +# CONFIG_NET_NCSI is not set CONFIG_RPS=y CONFIG_RFS_ACCEL=y CONFIG_XPS=y @@ -1800,6 +1812,7 @@ CONFIG_MTD_NAND_CAFE=m CONFIG_MTD_NAND_NANDSIM=m CONFIG_MTD_NAND_PLATFORM=m CONFIG_MTD_NAND_HISI504=m +CONFIG_MTD_NAND_MTK=m CONFIG_MTD_ONENAND=m CONFIG_MTD_ONENAND_VERIFY_WRITE=y CONFIG_MTD_ONENAND_GENERIC=m @@ -2013,6 +2026,7 @@ CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY=0 CONFIG_ECHO=m # CONFIG_CXL_BASE is not set # CONFIG_CXL_KERNEL_API is not set +# CONFIG_CXL_AFU_DRIVER_OPS is not set # CONFIG_CXL_EEH is not set CONFIG_IDE=m @@ -2162,7 +2176,9 @@ CONFIG_SCSI_MPT3SAS_MAX_SGE=128 CONFIG_SCSI_MPT2SAS=m CONFIG_SCSI_UFSHCD=m CONFIG_SCSI_UFSHCD_PCI=m +CONFIG_SCSI_UFS_DWC_TC_PCI=m CONFIG_SCSI_UFSHCD_PLATFORM=m +CONFIG_SCSI_UFS_DWC_TC_PLATFORM=m CONFIG_SCSI_HPTIOP=m CONFIG_SCSI_BUSLOGIC=m CONFIG_SCSI_FLASHPOINT=y @@ -2437,6 +2453,7 @@ CONFIG_CAIF_HSI=m CONFIG_CAIF_VIRTIO=m CONFIG_VHOST_NET=m CONFIG_VHOST_SCSI=m +CONFIG_VHOST_VSOCK=m CONFIG_VHOST_RING=m CONFIG_VHOST=m # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set @@ -2445,8 +2462,13 @@ CONFIG_VHOST=m # Distributed Switch Architecture drivers # CONFIG_NET_DSA_MV88E6060=m -CONFIG_NET_DSA_MV88E6XXX=m CONFIG_NET_DSA_BCM_SF2=m +CONFIG_B53=m +CONFIG_B53_SPI_DRIVER=m +CONFIG_B53_MDIO_DRIVER=m +CONFIG_B53_MMAP_DRIVER=m +CONFIG_B53_SRAB_DRIVER=m +CONFIG_NET_DSA_MV88E6XXX=m CONFIG_ETHERNET=y CONFIG_MDIO=m CONFIG_NET_VENDOR_3COM=y @@ -2508,6 +2530,7 @@ CONFIG_CHELSIO_T4_DCB=y CONFIG_CHELSIO_T4_UWIRE=y CONFIG_CHELSIO_T4_FCOE=y CONFIG_CHELSIO_T4VF=m +CONFIG_CHELSIO_LIB=m CONFIG_NET_VENDOR_CISCO=y CONFIG_ENIC=m CONFIG_CX_ECAT=m @@ -2734,6 +2757,7 @@ CONFIG_MDIO_CAVIUM=m CONFIG_MDIO_OCTEON=m CONFIG_MDIO_THUNDER=m CONFIG_MDIO_BCM_UNIMAC=m +CONFIG_INTEL_XWAY_PHY=m CONFIG_MICREL_KS8995MA=m CONFIG_PLIP=m CONFIG_PPP=m @@ -3292,6 +3316,7 @@ CONFIG_TABLET_USB_AIPTEK=m CONFIG_TABLET_USB_GTCO=m CONFIG_TABLET_USB_HANWANG=m CONFIG_TABLET_USB_KBTAB=m +CONFIG_TABLET_USB_PEGASUS=m CONFIG_TABLET_SERIAL_WACOM4=m CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_PROPERTIES=y @@ -3372,8 +3397,12 @@ CONFIG_TOUCHSCREEN_TSC2004=m CONFIG_TOUCHSCREEN_TSC2005=m CONFIG_TOUCHSCREEN_TSC2007=m CONFIG_TOUCHSCREEN_PCAP=m +CONFIG_TOUCHSCREEN_RM_TS=m +CONFIG_TOUCHSCREEN_SILEAD=m +CONFIG_TOUCHSCREEN_SIS_I2C=m CONFIG_TOUCHSCREEN_ST1232=m CONFIG_TOUCHSCREEN_SUR40=m +CONFIG_TOUCHSCREEN_SURFACE3_SPI=m CONFIG_TOUCHSCREEN_SX8654=m CONFIG_TOUCHSCREEN_TPS6507X=m CONFIG_TOUCHSCREEN_ZFORCE=m @@ -3500,6 +3529,7 @@ CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_PNP=y +CONFIG_SERIAL_8250_FINTEK=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_PCI=y @@ -3514,7 +3544,6 @@ CONFIG_SERIAL_8250_RSA=y # CONFIG_SERIAL_8250_FSL is not set CONFIG_SERIAL_8250_DW=m CONFIG_SERIAL_8250_RT288X=y -CONFIG_SERIAL_8250_FINTEK=y CONFIG_SERIAL_8250_MID=m CONFIG_SERIAL_8250_MOXA=m @@ -3586,7 +3615,9 @@ CONFIG_HPET_MMAP=y CONFIG_HPET_MMAP_DEFAULT=y CONFIG_HANGCHECK_TIMER=m CONFIG_TCG_TPM=y +CONFIG_TCG_TIS_CORE=y CONFIG_TCG_TIS=y +CONFIG_TCG_TIS_SPI=m CONFIG_TCG_TIS_I2C_ATMEL=m CONFIG_TCG_TIS_I2C_INFINEON=m CONFIG_TCG_TIS_I2C_NUVOTON=m @@ -3595,6 +3626,7 @@ CONFIG_TCG_ATMEL=m CONFIG_TCG_INFINEON=m CONFIG_TCG_XEN=m CONFIG_TCG_CRB=m +CONFIG_TCG_VTPM_PROXY=m CONFIG_TCG_TIS_ST33ZP24=m CONFIG_TCG_TIS_ST33ZP24_I2C=m CONFIG_TCG_TIS_ST33ZP24_SPI=m @@ -3996,6 +4028,7 @@ CONFIG_SENSORS_F71882FG=m CONFIG_SENSORS_F75375S=m CONFIG_SENSORS_MC13783_ADC=m CONFIG_SENSORS_FSCHMD=m +CONFIG_SENSORS_FTSTEUTATES=m CONFIG_SENSORS_GL518SM=m CONFIG_SENSORS_GL520SM=m # CONFIG_SENSORS_G760A is not set @@ -4074,6 +4107,7 @@ CONFIG_SENSORS_UCD9200=m CONFIG_SENSORS_ZL6100=m # CONFIG_SENSORS_SHT15 is not set # CONFIG_SENSORS_SHT21 is not set +CONFIG_SENSORS_SHT3x=m CONFIG_SENSORS_SHTC1=m CONFIG_SENSORS_SIS5595=m CONFIG_SENSORS_DME1737=m @@ -4094,6 +4128,7 @@ CONFIG_SENSORS_ADS7871=m # CONFIG_SENSORS_AMC6821 is not set CONFIG_SENSORS_INA209=m # CONFIG_SENSORS_INA2XX is not set +CONFIG_SENSORS_INA3221=m CONFIG_SENSORS_TC74=m CONFIG_SENSORS_THMC50=m CONFIG_SENSORS_TMP102=m @@ -4252,6 +4287,7 @@ CONFIG_BCMA_HOST_PCI_POSSIBLE=y CONFIG_BCMA_HOST_PCI=y CONFIG_BCMA_HOST_SOC=y CONFIG_BCMA_DRIVER_PCI=y +CONFIG_BCMA_SFLASH=y CONFIG_BCMA_DRIVER_GMAC_CMN=y CONFIG_BCMA_DRIVER_GPIO=y # CONFIG_BCMA_DEBUG is not set @@ -4955,6 +4991,8 @@ CONFIG_DVB_M88DS3103=m CONFIG_DVB_DRXK=m CONFIG_DVB_TDA18271C2DD=m CONFIG_DVB_SI2165=m +CONFIG_DVB_MN88472=m +CONFIG_DVB_MN88473=m # # DVB-S (satellite) frontends @@ -5080,6 +5118,7 @@ CONFIG_DVB_M88RS2000=m CONFIG_DVB_AF9033=m CONFIG_DVB_HORUS3A=m CONFIG_DVB_ASCOT2E=m +CONFIG_DVB_HELENE=m # # Tools to develop new frontends @@ -5135,6 +5174,7 @@ CONFIG_DRM_NOUVEAU_BACKLIGHT=y CONFIG_DRM_I915=m # CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT is not set CONFIG_DRM_I915_USERPTR=y +# CONFIG_DRM_I915_GVT is not set # # drm/i915 Debugging @@ -5562,9 +5602,14 @@ CONFIG_SND_SST_MFLD_PLATFORM=m CONFIG_SND_SST_IPC=m CONFIG_SND_SST_IPC_ACPI=m CONFIG_SND_SOC_INTEL_SST=m +CONFIG_SND_SOC_INTEL_SST_FIRMWARE=m CONFIG_SND_SOC_INTEL_SST_ACPI=m CONFIG_SND_SOC_INTEL_SST_MATCH=m +CONFIG_SND_SOC_INTEL_HASWELL=m +CONFIG_SND_SOC_INTEL_HASWELL_MACH=m +CONFIG_SND_SOC_INTEL_BXT_DA7219_MAX98357A_MACH=m CONFIG_SND_SOC_INTEL_BXT_RT298_MACH=m +CONFIG_SND_SOC_INTEL_BROADWELL_MACH=m CONFIG_SND_SOC_INTEL_BYTCR_RT5640_MACH=m CONFIG_SND_SOC_INTEL_BYTCR_RT5651_MACH=m CONFIG_SND_SOC_INTEL_CHT_BSW_RT5672_MACH=m @@ -5579,6 +5624,7 @@ CONFIG_SND_SOC_INTEL_SKL_NAU88L25_MAX98357A_MACH=m # Allwinner SoC Audio support # CONFIG_SND_SUN4I_CODEC=m +CONFIG_SND_SUN4I_I2S=m CONFIG_SND_SOC_XTFPGA_I2S=m CONFIG_SND_SOC_I2C_AND_SPI=m @@ -5587,13 +5633,16 @@ CONFIG_SND_SOC_I2C_AND_SPI=m # CONFIG_SND_SOC_AC97_CODEC=m CONFIG_SND_SOC_ADAU1701=m +CONFIG_SND_SOC_ADAU7002=m CONFIG_SND_SOC_AK4104=m CONFIG_SND_SOC_AK4554=m CONFIG_SND_SOC_AK4613=m CONFIG_SND_SOC_AK4642=m CONFIG_SND_SOC_AK5386=m CONFIG_SND_SOC_ALC5623=m +CONFIG_SND_SOC_BT_SCO=m CONFIG_SND_SOC_CS35L32=m +CONFIG_SND_SOC_CS35L33=m CONFIG_SND_SOC_CS42L51=m CONFIG_SND_SOC_CS42L51_I2C=m CONFIG_SND_SOC_CS42L52=m @@ -5607,6 +5656,8 @@ CONFIG_SND_SOC_CS4271_SPI=m CONFIG_SND_SOC_CS42XX8=m CONFIG_SND_SOC_CS42XX8_I2C=m CONFIG_SND_SOC_CS4349=m +CONFIG_SND_SOC_CS53L30=m +CONFIG_SND_SOC_DA7219=m CONFIG_SND_SOC_DMIC=m CONFIG_SND_SOC_ES8328=m CONFIG_SND_SOC_GTM601=m @@ -5614,6 +5665,8 @@ CONFIG_SND_SOC_HDAC_HDMI=m CONFIG_SND_SOC_INNO_RK3036=m CONFIG_SND_SOC_MAX98090=m CONFIG_SND_SOC_MAX98357A=m +CONFIG_SND_SOC_MAX98504=m +CONFIG_SND_SOC_MAX9860=m CONFIG_SND_SOC_PCM1681=m CONFIG_SND_SOC_PCM179X=m CONFIG_SND_SOC_PCM179X_I2C=m @@ -5678,8 +5731,10 @@ CONFIG_SND_SOC_WM8960=m CONFIG_SND_SOC_WM8962=m CONFIG_SND_SOC_WM8974=m CONFIG_SND_SOC_WM8978=m +CONFIG_SND_SOC_WM8985=m CONFIG_SND_SOC_NAU8825=m CONFIG_SND_SOC_TPA6130A2=m +CONFIG_SND_SIMPLE_CARD_UTILS=m CONFIG_SND_SIMPLE_CARD=m CONFIG_SOUND_PRIME=m CONFIG_SOUND_OSS=m @@ -5748,6 +5803,7 @@ CONFIG_HID_ICADE=m CONFIG_HID_TWINHAN=m CONFIG_HID_KENSINGTON=m CONFIG_HID_LCPOWER=m +CONFIG_HID_LED=m CONFIG_HID_LENOVO=m CONFIG_HID_LOGITECH=m CONFIG_HID_LOGITECH_DJ=m @@ -5801,6 +5857,7 @@ CONFIG_ZEROPLUS_FF=y CONFIG_HID_ZYDACRON=m CONFIG_HID_SENSOR_HUB=m # CONFIG_HID_SENSOR_CUSTOM_SENSOR is not set +CONFIG_HID_ALPS=m # # USB HID support @@ -6133,6 +6190,7 @@ CONFIG_LEDS_LM3642=m # CONFIG_LEDS_PCA9532 is not set # CONFIG_LEDS_GPIO is not set # CONFIG_LEDS_LP3944 is not set +CONFIG_LEDS_LP3952=m CONFIG_LEDS_LP55XX_COMMON=m CONFIG_LEDS_LP5521=m CONFIG_LEDS_LP5523=m @@ -6173,7 +6231,7 @@ CONFIG_LEDS_BLINKM=m CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_ONESHOT=m -CONFIG_LEDS_TRIGGER_IDE_DISK=y +# CONFIG_LEDS_TRIGGER_DISK is not set # CONFIG_LEDS_TRIGGER_MTD is not set CONFIG_LEDS_TRIGGER_HEARTBEAT=m CONFIG_LEDS_TRIGGER_BACKLIGHT=m @@ -6210,7 +6268,9 @@ CONFIG_EDAC_I5000=m CONFIG_EDAC_I5100=m CONFIG_EDAC_I7300=m CONFIG_EDAC_SBRIDGE=m +CONFIG_EDAC_SKX=m CONFIG_RTC_LIB=y +CONFIG_RTC_MC146818_LIB=y CONFIG_RTC_CLASS=y CONFIG_RTC_HCTOSYS=y CONFIG_RTC_HCTOSYS_DEVICE="rtc0" @@ -6282,6 +6342,7 @@ CONFIG_RTC_DRV_DS1305=m CONFIG_RTC_DRV_DS1343=m CONFIG_RTC_DRV_DS1347=m CONFIG_RTC_DRV_DS1390=m +CONFIG_RTC_DRV_MAX6916=m CONFIG_RTC_DRV_R9701=m CONFIG_RTC_DRV_RX4581=m CONFIG_RTC_DRV_RX6110=m @@ -6706,8 +6767,8 @@ CONFIG_SPEAKUP_SYNTH_TXPRT=m CONFIG_SPEAKUP_SYNTH_DUMMY=m CONFIG_STAGING_MEDIA=y CONFIG_I2C_BCM2048=m +# CONFIG_MEDIA_CEC is not set CONFIG_DVB_CXD2099=m -CONFIG_DVB_MN88472=m CONFIG_LIRC_STAGING=y CONFIG_LIRC_BT829=m CONFIG_LIRC_IMON=m @@ -6760,6 +6821,7 @@ CONFIG_HDM_USB=m # # Old ISDN4Linux (deprecated) # +CONFIG_KS7010=m CONFIG_X86_PLATFORM_DEVICES=y CONFIG_ACER_WMI=m CONFIG_ACERHDF=m @@ -6807,6 +6869,7 @@ CONFIG_TOSHIBA_HAPS=m CONFIG_TOSHIBA_WMI=m CONFIG_ACPI_CMPC=m CONFIG_INTEL_HID_EVENT=m +CONFIG_INTEL_VBTN=m CONFIG_INTEL_IPS=m # CONFIG_INTEL_PMC_CORE is not set CONFIG_IBM_RTL=m @@ -6846,7 +6909,7 @@ CONFIG_COMMON_CLK_PALMAS=m CONFIG_COMMON_CLK_PWM=m # CONFIG_COMMON_CLK_PXA is not set # CONFIG_COMMON_CLK_PIC32 is not set -# CONFIG_COMMON_CLK_OXNAS is not set +# CONFIG_SUNXI_CCU is not set # # Hardware Spinlock drivers @@ -6893,6 +6956,10 @@ CONFIG_STE_MODEM_RPROC=m # # SOC (System On Chip) specific Drivers # + +# +# Broadcom SoC drivers +# # CONFIG_SUNXI_SRAM is not set CONFIG_SOC_TI=y CONFIG_PM_DEVFREQ=y @@ -6936,6 +7003,7 @@ CONFIG_IIO_TRIGGERED_BUFFER=m CONFIG_IIO_CONFIGFS=m CONFIG_IIO_TRIGGER=y CONFIG_IIO_CONSUMERS_PER_TRIGGER=2 +CONFIG_IIO_SW_DEVICE=m CONFIG_IIO_SW_TRIGGER=m CONFIG_IIO_TRIGGERED_EVENT=m @@ -6943,6 +7011,7 @@ CONFIG_IIO_TRIGGERED_EVENT=m # Accelerometers # CONFIG_BMA180=m +CONFIG_BMA220=m CONFIG_BMC150_ACCEL=m CONFIG_BMC150_ACCEL_I2C=m CONFIG_BMC150_ACCEL_SPI=m @@ -6955,6 +7024,7 @@ CONFIG_KXCJK1013=m CONFIG_MMA7455=m CONFIG_MMA7455_I2C=m CONFIG_MMA7455_SPI=m +CONFIG_MMA7660=m CONFIG_MMA8452=m # CONFIG_MMA9551 is not set # CONFIG_MMA9553 is not set @@ -7185,12 +7255,14 @@ CONFIG_HID_SENSOR_DEVICE_ROTATION=m # CONFIG_IIO_HRTIMER_TRIGGER=m CONFIG_IIO_INTERRUPT_TRIGGER=m +CONFIG_IIO_TIGHTLOOP_TRIGGER=m CONFIG_IIO_SYSFS_TRIGGER=m # # Digital potentiometers # CONFIG_DS1803=m +CONFIG_MAX5487=m CONFIG_MCP4131=m CONFIG_MCP4531=m CONFIG_TPL0102=m @@ -7258,6 +7330,7 @@ CONFIG_VME_PIO2=m CONFIG_PWM=y CONFIG_PWM_SYSFS=y CONFIG_PWM_CRC=y +CONFIG_PWM_CROS_EC=m CONFIG_PWM_LP3943=m CONFIG_PWM_LPSS=m CONFIG_PWM_LPSS_PCI=m @@ -7268,6 +7341,7 @@ CONFIG_PWM_TWL_LED=m CONFIG_ARM_GIC_MAX_NR=1 # CONFIG_IPACK_BUS is not set CONFIG_RESET_CONTROLLER=y +CONFIG_TI_SYSCON_RESET=m CONFIG_FMC=m CONFIG_FMC_FAKEDEV=m CONFIG_FMC_TRIVIAL=m @@ -7362,6 +7436,7 @@ CONFIG_UEFI_CPER=y # File systems # CONFIG_DCACHE_WORD_ACCESS=y +CONFIG_FS_IOMAP=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -7372,8 +7447,7 @@ CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_EXT4_ENCRYPTION=m -CONFIG_EXT4_FS_ENCRYPTION=y +# CONFIG_EXT4_ENCRYPTION is not set # CONFIG_EXT4_DEBUG is not set CONFIG_JBD2=y # CONFIG_JBD2_DEBUG is not set @@ -7422,6 +7496,7 @@ CONFIG_F2FS_FS_SECURITY=y CONFIG_FS_DAX=y CONFIG_FS_POSIX_ACL=y CONFIG_EXPORTFS=y +# CONFIG_EXPORTFS_BLOCK_OPS is not set CONFIG_FILE_LOCKING=y CONFIG_MANDATORY_FILE_LOCKING=y CONFIG_FS_ENCRYPTION=m @@ -7558,6 +7633,9 @@ CONFIG_ROMFS_BACKED_BY_BOTH=y CONFIG_ROMFS_ON_BLOCK=y CONFIG_ROMFS_ON_MTD=y CONFIG_PSTORE=y +CONFIG_PSTORE_ZLIB_COMPRESS=y +# CONFIG_PSTORE_LZO_COMPRESS is not set +# CONFIG_PSTORE_LZ4_COMPRESS is not set # CONFIG_PSTORE_CONSOLE is not set # CONFIG_PSTORE_PMSG is not set # CONFIG_PSTORE_FTRACE is not set @@ -7605,6 +7683,7 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y # CONFIG_NFSD_BLOCKLAYOUT is not set # CONFIG_NFSD_SCSILAYOUT is not set +# CONFIG_NFSD_FLEXFILELAYOUT is not set # CONFIG_NFSD_V4_SECURITY_LABEL is not set # CONFIG_NFSD_FAULT_INJECTION is not set CONFIG_GRACE_PERIOD=m @@ -7631,8 +7710,8 @@ CONFIG_CIFS_DEBUG=y # CONFIG_CIFS_DEBUG2 is not set CONFIG_CIFS_DFS_UPCALL=y CONFIG_CIFS_SMB2=y -CONFIG_CIFS_FSCACHE=y # CONFIG_CIFS_SMB311 is not set +CONFIG_CIFS_FSCACHE=y CONFIG_NCP_FS=m CONFIG_NCPFS_PACKET_SIGNING=y # CONFIG_NCPFS_IOCTL_LOCKING is not set @@ -7950,6 +8029,7 @@ CONFIG_SECURITY_NETWORK_XFRM=y CONFIG_SECURITY_PATH=y CONFIG_INTEL_TXT=y CONFIG_LSM_MMAP_MIN_ADDR=65536 +# CONFIG_HARDENED_USERCOPY is not set CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -8009,7 +8089,11 @@ CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_RNG_DEFAULT=m CONFIG_CRYPTO_AKCIPHER2=y CONFIG_CRYPTO_AKCIPHER=m +CONFIG_CRYPTO_KPP2=y +CONFIG_CRYPTO_KPP=m CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y CONFIG_CRYPTO_USER=m @@ -8079,8 +8163,11 @@ CONFIG_CRYPTO_SHA1_SSSE3=m CONFIG_CRYPTO_SHA256_SSSE3=m CONFIG_CRYPTO_SHA512_SSSE3=m # CONFIG_CRYPTO_SHA1_MB is not set +CONFIG_CRYPTO_SHA256_MB=m +CONFIG_CRYPTO_SHA512_MB=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL=m diff --git a/kernel.spec b/kernel.spec index 2751077..020cb94 100644 --- a/kernel.spec +++ b/kernel.spec @@ -1,7 +1,7 @@ %define kernelversion 4 -%define patchlevel 7 +%define patchlevel 8 # sublevel is now used for -stable patches -%define sublevel 9 +%define sublevel 4 # Release number. Increase this before a rebuild. %define rpmrel 1 @@ -205,10 +205,10 @@ Patch108: ata-prefer-ata-drivers-over-ide-drivers-when-both-are-built.patch Patch109: fs-aufs4.patch # BFQ IO scheduler, http://algogroup.unimore.it/people/paolo/disk_sched/ -Patch111: 0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.7.0.patch -Patch112: 0002-block-introduce-the-BFQ-v7r11-I-O-sched-for-4.7.0.patch -Patch113: 0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-for.patch -Patch114: 0004-block-bfq-turn-BFQ-v7r11-for-4.7.0-into-BFQ-v8r3-for.patch +Patch111: 0001-block-cgroups-kconfig-build-bits-for-BFQ-v7r11-4.8.0.patch +Patch112: 0002-block-introduce-the-BFQ-v7r11-I-O-sched-to-be-ported.patch +Patch113: 0003-block-bfq-add-Early-Queue-Merge-EQM-to-BFQ-v7r11-to-.patch +Patch114: 0004-Turn-BFQ-v7r11-into-BFQ-v8r4-for-4.8.0.patch # Sanitizing kernel memory # We do not use "Patch:" here because apply_patched would always apply it diff --git a/linux-4.7.tar.sign b/linux-4.7.tar.sign deleted file mode 100644 index 7d4db9d..0000000 --- a/linux-4.7.tar.sign +++ /dev/null @@ -1,11 +0,0 @@ ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1 - -iQEcBAABAgAGBQJXlR20AAoJEHm+PkMAQRiGE28H/2prCblJSfXW40RNq3uQydhl -AT8Vo/VnsO3lIa41Py9Iet8ZXP+Wg5ed4nGNXs6myxwW/wxFDx1+peD1pJWWOqf9 -krt1FA7jB4gmqNSsd+AgiUy9ZaRFxTXFXqPdMbiwU8O+UEhYllMJGfobH1RMu4Ul -4uLszvNlppbYxQeB94Ft0cOGeRxJE5jBltc9KJvYOSog1upa+1vNiwHBD5BAOOUC -LJtpUbTr0p3D5/mpzhkGaam7hjRsgqsm6X84ebUdR9RqoMkYaGj2cT9n2NR1FS29 -kwH2rRB2VYIYkUGVb3ELHZTVFJyf4yQWo6/t6frrEmYi3n+Xrc6aNQ3gVKQyEH4= -=xcAK ------END PGP SIGNATURE----- diff --git a/linux-4.8.tar.sign b/linux-4.8.tar.sign new file mode 100644 index 0000000..8ba9f31 --- /dev/null +++ b/linux-4.8.tar.sign @@ -0,0 +1,10 @@ +-----BEGIN PGP SIGNATURE----- + +iQEcBAABAgAGBQJX8Zp6AAoJEHm+PkMAQRiGalEH/R1fgYvi/Miw5xbGH/H0jSBh +9EhHWoyd0H+WVMKGaOTtFPqM9qeoH6RSBl+EDShyRJTb8OOr16/3wMOQUCsgxQNo +CSXS5bRdDSAEik4hUlCWv5j21mZ0YyePFpT/539W4pHMeI7cxfvLoKBJYdSXMWD7 +hepbk5Zn31AJdK8ReUUBHHpCD4yId5YsG2NhHz6MLqZyIoKmi1WWEszzMfQzbgxa +fT45AuOXUPagVnpRDFz2MFftkvZ+LZPPmnOJxn2CedPdN3lUheHb0HdbYf670d4S +zf1s9sis2rQuZxIj70dNsh+fsB29UyUyUhXC9ZWDk0RzfAcnNLsxsIAbXuyp2BI= +=olJ5 +-----END PGP SIGNATURE----- diff --git a/patch-4.7.9.sign b/patch-4.7.9.sign deleted file mode 100644 index 35e7ecb..0000000 --- a/patch-4.7.9.sign +++ /dev/null @@ -1,16 +0,0 @@ ------BEGIN PGP SIGNATURE----- - -iQIcBAABCAAGBQJYCHoqAAoJEDjbvchgkmk+GNoP/ig4wic4ipvx56i2STZGu585 -WDlS+eRm1HXQL3pOkdL4cm2OzW4qbted0exFvHl39TXpdrnjAFd1kqW9pfc1MzQw -Vnsv6ZE+KoVBrCeEptGdV6h0+oXbeZejlEUSbZT8WYl/xSQ2RblGr2Ltv8dwB2gu -Qvpkk+RI/TH2hw8AeE2u9InZIxp3FYN60SRAqOk6OxABFuJd1Q2tYpaQuLUjPX7G -B1oUR/sqYbxEg1amtfxJ+4wZVO8w2Y1O1+87t6MCuQH1+ERbS96pKso5Z8bsaD7U -AHq6QPGch7Hd+dPWyXPgQmtj2dmnCqAgrEfTNuCrJ13xftWg7V5sYTN9Y3KnECm+ -+oEqupa9RBxx6VK92i8ANxbVXwDpEB+/s2ZH1GDgBSc5lJqHUrmM2TX6HBj8P8f+ -3itu8p6gs0RFJ8JsNxgc3QP+TEs9bQNuZYU51f+ER6UJJr92LOT18ypqNYY/cjhp -n+5lal2bLnR3T1wYGzP+2lfNNEcxa41zyfqEodiYiBJWaiEKqUFUCqW5rgvmbBV8 -QkPCRR82vmDG4+ARWFvBJwCbo68qcIHhWpCHp+nH4LC7R/Gi/OOIDus79uH2aZ/l -8jHzC6BHsBdHwnbIdBZVQesTnUFfwIml6yVQY9HNexV56/rxhqefGTdlpfkhUrqd -5ZebWo+OOnnm3BFXXXSu -=rRqk ------END PGP SIGNATURE----- diff --git a/patch-4.8.4.sign b/patch-4.8.4.sign new file mode 100644 index 0000000..ee48315 --- /dev/null +++ b/patch-4.8.4.sign @@ -0,0 +1,16 @@ +-----BEGIN PGP SIGNATURE----- + +iQIcBAABCAAGBQJYC0JLAAoJEDjbvchgkmk+BHMP/R0POh4PDO/L0XaMN011jslJ +a60kMBTGXWLLicyPvQkt30MuP/UyxNjduatJDTpoaxhLZeHBY5t7UBVPOWGLJVA6 +wZrz3Wbx4VP6OUNJlx3tIT2lCo+nkoq2xNzy1MKQL2xuiCFcXyKr/KOIJX0K0w5E ++AOqDYnBOZMrAN73G4n5VZHU0usLPPdY+0DZdOKBjLDDN9+8RBLYZ5YReaVGsn4+ +zb/XuF6L06yi+P60UqcF5flcThwsrlsJbHxvk03XFK3KuGjKrldo644vAIdmq6sN +hFCYO1SYEwmwjSUCXFTxtzrVO72k2sMnxdIDXX/H6n3NmMIiGhfiphnU4UQXuZMy +31NwzlCmIdYsxRiA9NNpITBa8Y/QnKtZuUxVgvw4AWk2IOVwaZhIUBUtVI9ctipG +HYj7SRhi0V0MIN0k3YNiAfLo10IBTtEWls2mfShO59eMnoE1RXyinofXo2JxKCxl +gLrKWYSqKt1akthMM89y6D+rOkJYNu1cSGzMyRBgeNZIoEzpWK5LCUMyM9wFVw+g +qex2tIurNOigSbVW/hSM4mf7HxJ5qwTv+T2akUzUGhkfljeeP3mTyvnrzsFcG5XZ +86qlZDfOwiUtcpERUZwUcWJOWRFr3RAV0zH5Jh0dxsi5JF9sDVDhKeHazPye074h +jKpPWgz1zI4wKEuOksZ2 +=3em6 +-----END PGP SIGNATURE----- diff --git a/sanitize-memory.patch b/sanitize-memory.patch index f166334..18a2ac7 100644 --- a/sanitize-memory.patch +++ b/sanitize-memory.patch @@ -1,8 +1,8 @@ diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 82b42c9..090568e 100644 +index a4f4d69..4af52d6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt -@@ -2862,6 +2862,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. +@@ -2888,6 +2888,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. the specified number of seconds. This is to be used if your oopses keep scrolling off the screen. @@ -14,10 +14,10 @@ index 82b42c9..090568e 100644 pcd. [PARIDE] diff --git a/fs/buffer.c b/fs/buffer.c -index 754813a..4c25e3c 100644 +index 9c8eb9b..236a1ca 100644 --- a/fs/buffer.c +++ b/fs/buffer.c -@@ -3406,7 +3406,7 @@ void __init buffer_init(void) +@@ -3476,7 +3476,7 @@ void __init buffer_init(void) bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| @@ -27,10 +27,10 @@ index 754813a..4c25e3c 100644 /* diff --git a/fs/dcache.c b/fs/dcache.c -index 1ed81bb..30f6c6b 100644 +index 5c7cc95..4d1663e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -3709,7 +3709,8 @@ void __init vfs_caches_init_early(void) +@@ -3608,7 +3608,8 @@ void __init vfs_caches_init_early(void) void __init vfs_caches_init(void) { names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, @@ -41,7 +41,7 @@ index 1ed81bb..30f6c6b 100644 dcache_init(); inode_init(); diff --git a/include/linux/slab.h b/include/linux/slab.h -index aeb3e6d..df60597 100644 +index 4293808..70e883a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -23,6 +23,13 @@ @@ -59,10 +59,10 @@ index aeb3e6d..df60597 100644 #define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ diff --git a/kernel/fork.c b/kernel/fork.c -index 4a7ec0c..49f43cb 100644 +index beb3172..5df26d4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1913,7 +1913,7 @@ void __init proc_caches_init(void) +@@ -1932,7 +1932,7 @@ void __init proc_caches_init(void) sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); @@ -72,7 +72,7 @@ index 4a7ec0c..49f43cb 100644 nsproxy_cache_init(); } diff --git a/mm/rmap.c b/mm/rmap.c -index 701b93f..22ab5d9 100644 +index 1ef3640..aead057 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -429,10 +429,10 @@ static void anon_vma_ctor(void *data) @@ -89,10 +89,10 @@ index 701b93f..22ab5d9 100644 /* diff --git a/mm/slab.c b/mm/slab.c -index cc8bbc1..6bd0823 100644 +index b672710..570a6a5 100644 --- a/mm/slab.c +++ b/mm/slab.c -@@ -3560,6 +3560,17 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, +@@ -3524,6 +3524,17 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); @@ -111,10 +111,10 @@ index cc8bbc1..6bd0823 100644 objp = cache_free_debugcheck(cachep, objp, caller); diff --git a/mm/slab.h b/mm/slab.h -index dedb1a9..1d157d4 100644 +index 9653f2e..47a0f7f 100644 --- a/mm/slab.h +++ b/mm/slab.h -@@ -70,6 +70,15 @@ extern struct list_head slab_caches; +@@ -71,6 +71,15 @@ extern struct list_head slab_caches; /* The slab cache that manages slab cache information */ extern struct kmem_cache *kmem_cache; @@ -131,7 +131,7 @@ index dedb1a9..1d157d4 100644 unsigned long align, unsigned long size); diff --git a/mm/slab_common.c b/mm/slab_common.c -index 82317ab..a5e0b77 100644 +index 71f0b28..fd97b10 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -44,7 +44,11 @@ struct kmem_cache *kmem_cache; @@ -222,13 +222,14 @@ index 5ec1580..385cdbc 100644 /* This slob page is about to become partially free. Easy! */ sp->units = units; diff --git a/mm/slub.c b/mm/slub.c -index 825ff45..c4eb91d 100644 +index 9adae58..56e456c 100644 --- a/mm/slub.c +++ b/mm/slub.c -@@ -2778,6 +2778,22 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, - +@@ -2934,6 +2934,23 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, + unsigned long addr) + { slab_free_freelist_hook(s, head, tail); - ++ +#ifdef CONFIG_PAX_MEMORY_SANITIZE + if (pax_sanitize_slab && !(s->flags & SLAB_NO_SANITIZE)) { + int offset = s->offset ? 0 : sizeof(void *); @@ -245,10 +246,10 @@ index 825ff45..c4eb91d 100644 + } +#endif + - redo: /* - * Determine the currently cpus per cpu slab. -@@ -3291,6 +3307,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) + * slab_free_freelist_hook() could have put the items into quarantine. + * If so, no need to free them. +@@ -3431,6 +3448,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) s->inuse = size; if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || @@ -259,10 +260,10 @@ index 825ff45..c4eb91d 100644 /* * Relocate free pointer after the object if it is not diff --git a/net/core/skbuff.c b/net/core/skbuff.c -index eb12d21..9d8f097 100644 +index 3864b4b6..d1dd10d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c -@@ -3426,12 +3426,14 @@ void __init skb_init(void) +@@ -3432,12 +3432,14 @@ void __init skb_init(void) skbuff_head_cache = kmem_cache_create("skbuff_head_cache", sizeof(struct sk_buff), 0, @@ -280,7 +281,7 @@ index eb12d21..9d8f097 100644 } diff --git a/security/Kconfig b/security/Kconfig -index 176758c..92b5346 100644 +index 118f454..e2a0281 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -6,6 +6,37 @@ menu "Security options"