Merge tag 'for-v3.17' of git://git.infradead.org/battery-2.6
[cascardo/linux.git] / kernel / time / timekeeping.c
index 7ca150a..f36b028 100644 (file)
@@ -44,6 +44,22 @@ static struct {
 static DEFINE_RAW_SPINLOCK(timekeeper_lock);
 static struct timekeeper shadow_timekeeper;
 
+/**
+ * struct tk_fast - NMI safe timekeeper
+ * @seq:       Sequence counter for protecting updates. The lowest bit
+ *             is the index for the tk_read_base array
+ * @base:      tk_read_base array. Access is indexed by the lowest bit of
+ *             @seq.
+ *
+ * See @update_fast_timekeeper() below.
+ */
+struct tk_fast {
+       seqcount_t              seq;
+       struct tk_read_base     base[2];
+};
+
+static struct tk_fast tk_fast_mono ____cacheline_aligned;
+
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
@@ -52,8 +68,8 @@ bool __read_mostly persistent_clock_exist = false;
 
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
-       while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
-               tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift;
+       while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
+               tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
                tk->xtime_sec++;
        }
 }
@@ -63,20 +79,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
        struct timespec64 ts;
 
        ts.tv_sec = tk->xtime_sec;
-       ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
+       ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
        return ts;
 }
 
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec = ts->tv_sec;
-       tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
+       tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
 }
 
 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec += ts->tv_sec;
-       tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
+       tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
        tk_normalize_xtime(tk);
 }
 
@@ -97,13 +113,9 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
        tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
 }
 
-static void tk_set_sleep_time(struct timekeeper *tk, struct timespec64 t)
+static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 {
-       /* Verify consistency before modifying */
-       WARN_ON_ONCE(tk->offs_boot.tv64 != timespec64_to_ktime(tk->total_sleep_time).tv64);
-
-       tk->total_sleep_time    = t;
-       tk->offs_boot           = timespec64_to_ktime(t);
+       tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
 /**
@@ -123,9 +135,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        u64 tmp, ntpinterval;
        struct clocksource *old_clock;
 
-       old_clock = tk->clock;
-       tk->clock = clock;
-       tk->cycle_last = clock->cycle_last = clock->read(clock);
+       old_clock = tk->tkr.clock;
+       tk->tkr.clock = clock;
+       tk->tkr.read = clock->read;
+       tk->tkr.mask = clock->mask;
+       tk->tkr.cycle_last = tk->tkr.read(clock);
 
        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
@@ -149,21 +163,23 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        if (old_clock) {
                int shift_change = clock->shift - old_clock->shift;
                if (shift_change < 0)
-                       tk->xtime_nsec >>= -shift_change;
+                       tk->tkr.xtime_nsec >>= -shift_change;
                else
-                       tk->xtime_nsec <<= shift_change;
+                       tk->tkr.xtime_nsec <<= shift_change;
        }
-       tk->shift = clock->shift;
+       tk->tkr.shift = clock->shift;
 
        tk->ntp_error = 0;
        tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
+       tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
 
        /*
         * The timekeeper keeps its own mult values for the currently
         * active clocksource. These value will be adjusted via NTP
         * to counteract clock drifting.
         */
-       tk->mult = clock->mult;
+       tk->tkr.mult = clock->mult;
+       tk->ntp_err_mult = 0;
 }
 
 /* Timekeeper helper functions. */
@@ -175,21 +191,19 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
 static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
 
-static inline s64 timekeeping_get_ns(struct timekeeper *tk)
+static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
-       cycle_t cycle_now, cycle_delta;
-       struct clocksource *clock;
+       cycle_t cycle_now, delta;
        s64 nsec;
 
        /* read clocksource: */
-       clock = tk->clock;
-       cycle_now = clock->read(clock);
+       cycle_now = tkr->read(tkr->clock);
 
        /* calculate the delta since the last update_wall_time: */
-       cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+       delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
 
-       nsec = cycle_delta * tk->mult + tk->xtime_nsec;
-       nsec >>= tk->shift;
+       nsec = delta * tkr->mult + tkr->xtime_nsec;
+       nsec >>= tkr->shift;
 
        /* If arch requires, add in get_arch_timeoffset() */
        return nsec + arch_gettimeoffset();
@@ -197,32 +211,138 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk)
 
 static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 {
-       cycle_t cycle_now, cycle_delta;
-       struct clocksource *clock;
+       struct clocksource *clock = tk->tkr.clock;
+       cycle_t cycle_now, delta;
        s64 nsec;
 
        /* read clocksource: */
-       clock = tk->clock;
-       cycle_now = clock->read(clock);
+       cycle_now = tk->tkr.read(clock);
 
        /* calculate the delta since the last update_wall_time: */
-       cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
 
        /* convert delta to nanoseconds. */
-       nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
+       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
 
        /* If arch requires, add in get_arch_timeoffset() */
        return nsec + arch_gettimeoffset();
 }
 
+/**
+ * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
+ * @tk:                The timekeeper from which we take the update
+ * @tkf:       The fast timekeeper to update
+ * @tbase:     The time base for the fast timekeeper (mono/raw)
+ *
+ * We want to use this from any context including NMI and tracing /
+ * instrumenting the timekeeping code itself.
+ *
+ * So we handle this differently than the other timekeeping accessor
+ * functions which retry when the sequence count has changed. The
+ * update side does:
+ *
+ * smp_wmb();  <- Ensure that the last base[1] update is visible
+ * tkf->seq++;
+ * smp_wmb();  <- Ensure that the seqcount update is visible
+ * update(tkf->base[0], tk);
+ * smp_wmb();  <- Ensure that the base[0] update is visible
+ * tkf->seq++;
+ * smp_wmb();  <- Ensure that the seqcount update is visible
+ * update(tkf->base[1], tk);
+ *
+ * The reader side does:
+ *
+ * do {
+ *     seq = tkf->seq;
+ *     smp_rmb();
+ *     idx = seq & 0x01;
+ *     now = now(tkf->base[idx]);
+ *     smp_rmb();
+ * } while (seq != tkf->seq)
+ *
+ * As long as we update base[0] readers are forced off to
+ * base[1]. Once base[0] is updated readers are redirected to base[0]
+ * and the base[1] update takes place.
+ *
+ * So if a NMI hits the update of base[0] then it will use base[1]
+ * which is still consistent. In the worst case this can result is a
+ * slightly wrong timestamp (a few nanoseconds). See
+ * @ktime_get_mono_fast_ns.
+ */
+static void update_fast_timekeeper(struct timekeeper *tk)
+{
+       struct tk_read_base *base = tk_fast_mono.base;
+
+       /* Force readers off to base[1] */
+       raw_write_seqcount_latch(&tk_fast_mono.seq);
+
+       /* Update base[0] */
+       memcpy(base, &tk->tkr, sizeof(*base));
+
+       /* Force readers back to base[0] */
+       raw_write_seqcount_latch(&tk_fast_mono.seq);
+
+       /* Update base[1] */
+       memcpy(base + 1, base, sizeof(*base));
+}
+
+/**
+ * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
+ *
+ * This timestamp is not guaranteed to be monotonic across an update.
+ * The timestamp is calculated by:
+ *
+ *     now = base_mono + clock_delta * slope
+ *
+ * So if the update lowers the slope, readers who are forced to the
+ * not yet updated second array are still using the old steeper slope.
+ *
+ * tmono
+ * ^
+ * |    o  n
+ * |   o n
+ * |  u
+ * | o
+ * |o
+ * |12345678---> reader order
+ *
+ * o = old slope
+ * u = update
+ * n = new slope
+ *
+ * So reader 6 will observe time going backwards versus reader 5.
+ *
+ * While other CPUs are likely to be able observe that, the only way
+ * for a CPU local observation is when an NMI hits in the middle of
+ * the update. Timestamps taken from that NMI context might be ahead
+ * of the following timestamps. Callers need to be aware of that and
+ * deal with it.
+ */
+u64 notrace ktime_get_mono_fast_ns(void)
+{
+       struct tk_read_base *tkr;
+       unsigned int seq;
+       u64 now;
+
+       do {
+               seq = raw_read_seqcount(&tk_fast_mono.seq);
+               tkr = tk_fast_mono.base + (seq & 0x01);
+               now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+
+       } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
+       return now;
+}
+EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
+
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
 
 static inline void update_vsyscall(struct timekeeper *tk)
 {
        struct timespec xt;
 
-       xt = tk_xtime(tk);
-       update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
+       xt = timespec64_to_timespec(tk_xtime(tk));
+       update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->tkr.clock, tk->tkr.mult,
+                           tk->tkr.cycle_last);
 }
 
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -239,11 +359,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
        * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
        * users are removed, this can be killed.
        */
-       remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
-       tk->xtime_nsec -= remainder;
-       tk->xtime_nsec += 1ULL << tk->shift;
+       remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
+       tk->tkr.xtime_nsec -= remainder;
+       tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
        tk->ntp_error += remainder << tk->ntp_error_shift;
-       tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift;
+       tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
 }
 #else
 #define old_vsyscall_fixup(tk)
@@ -291,6 +411,29 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
 
+/*
+ * Update the ktime_t based scalar nsec members of the timekeeper
+ */
+static inline void tk_update_ktime_data(struct timekeeper *tk)
+{
+       s64 nsec;
+
+       /*
+        * The xtime based monotonic readout is:
+        *      nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
+        * The ktime based monotonic readout is:
+        *      nsec = base_mono + now();
+        * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
+        */
+       nsec = (s64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
+       nsec *= NSEC_PER_SEC;
+       nsec += tk->wall_to_monotonic.tv_nsec;
+       tk->tkr.base_mono = ns_to_ktime(nsec);
+
+       /* Update the monotonic raw base */
+       tk->base_raw = timespec64_to_ktime(tk->raw_time);
+}
+
 /* must hold timekeeper_lock */
 static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 {
@@ -301,9 +444,13 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
        update_vsyscall(tk);
        update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
 
+       tk_update_ktime_data(tk);
+
        if (action & TK_MIRROR)
                memcpy(&shadow_timekeeper, &tk_core.timekeeper,
                       sizeof(tk_core.timekeeper));
+
+       update_fast_timekeeper(tk);
 }
 
 /**
@@ -315,23 +462,22 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-       cycle_t cycle_now, cycle_delta;
-       struct clocksource *clock;
+       struct clocksource *clock = tk->tkr.clock;
+       cycle_t cycle_now, delta;
        s64 nsec;
 
-       clock = tk->clock;
-       cycle_now = clock->read(clock);
-       cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
-       tk->cycle_last = clock->cycle_last = cycle_now;
+       cycle_now = tk->tkr.read(clock);
+       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
+       tk->tkr.cycle_last = cycle_now;
 
-       tk->xtime_nsec += cycle_delta * tk->mult;
+       tk->tkr.xtime_nsec += delta * tk->tkr.mult;
 
        /* If arch requires, add in get_arch_timeoffset() */
-       tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;
+       tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
 
        tk_normalize_xtime(tk);
 
-       nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
+       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
        timespec64_add_ns(&tk->raw_time, nsec);
 }
 
@@ -352,7 +498,7 @@ int __getnstimeofday64(struct timespec64 *ts)
                seq = read_seqcount_begin(&tk_core.seq);
 
                ts->tv_sec = tk->xtime_sec;
-               nsecs = timekeeping_get_ns(tk);
+               nsecs = timekeeping_get_ns(&tk->tkr);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -385,97 +531,120 @@ ktime_t ktime_get(void)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned int seq;
-       s64 secs, nsecs;
+       ktime_t base;
+       s64 nsecs;
 
        WARN_ON(timekeeping_suspended);
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-               nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
+               base = tk->tkr.base_mono;
+               nsecs = timekeeping_get_ns(&tk->tkr);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
-       return ktime_set(secs, nsecs);
+       return ktime_add_ns(base, nsecs);
 }
 EXPORT_SYMBOL_GPL(ktime_get);
 
-/**
- * ktime_get_ts64 - get the monotonic clock in timespec64 format
- * @ts:                pointer to timespec variable
- *
- * The function calculates the monotonic clock from the realtime
- * clock and the wall_to_monotonic offset and stores the result
- * in normalized timespec format in the variable pointed to by @ts.
- */
-void ktime_get_ts64(struct timespec64 *ts)
+static ktime_t *offsets[TK_OFFS_MAX] = {
+       [TK_OFFS_REAL]  = &tk_core.timekeeper.offs_real,
+       [TK_OFFS_BOOT]  = &tk_core.timekeeper.offs_boot,
+       [TK_OFFS_TAI]   = &tk_core.timekeeper.offs_tai,
+};
+
+ktime_t ktime_get_with_offset(enum tk_offsets offs)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 tomono;
-       s64 nsec;
        unsigned int seq;
+       ktime_t base, *offset = offsets[offs];
+       s64 nsecs;
 
        WARN_ON(timekeeping_suspended);
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               ts->tv_sec = tk->xtime_sec;
-               nsec = timekeeping_get_ns(tk);
-               tomono = tk->wall_to_monotonic;
+               base = ktime_add(tk->tkr.base_mono, *offset);
+               nsecs = timekeeping_get_ns(&tk->tkr);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
-       ts->tv_sec += tomono.tv_sec;
-       ts->tv_nsec = 0;
-       timespec64_add_ns(ts, nsec + tomono.tv_nsec);
-}
-EXPORT_SYMBOL_GPL(ktime_get_ts64);
+       return ktime_add_ns(base, nsecs);
 
+}
+EXPORT_SYMBOL_GPL(ktime_get_with_offset);
 
 /**
- * timekeeping_clocktai - Returns the TAI time of day in a timespec
- * @ts:                pointer to the timespec to be set
- *
- * Returns the time of day in a timespec.
+ * ktime_mono_to_any() - convert mononotic time to any other time
+ * @tmono:     time to convert.
+ * @offs:      which offset to use
  */
-void timekeeping_clocktai(struct timespec *ts)
+ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
 {
-       struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 ts64;
+       ktime_t *offset = offsets[offs];
        unsigned long seq;
-       u64 nsecs;
-
-       WARN_ON(timekeeping_suspended);
+       ktime_t tconv;
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
+               tconv = ktime_add(tmono, *offset);
+       } while (read_seqcount_retry(&tk_core.seq, seq));
 
-               ts64.tv_sec = tk->xtime_sec + tk->tai_offset;
-               nsecs = timekeeping_get_ns(tk);
+       return tconv;
+}
+EXPORT_SYMBOL_GPL(ktime_mono_to_any);
 
-       } while (read_seqcount_retry(&tk_core.seq, seq));
+/**
+ * ktime_get_raw - Returns the raw monotonic time in ktime_t format
+ */
+ktime_t ktime_get_raw(void)
+{
+       struct timekeeper *tk = &tk_core.timekeeper;
+       unsigned int seq;
+       ktime_t base;
+       s64 nsecs;
 
-       ts64.tv_nsec = 0;
-       timespec64_add_ns(&ts64, nsecs);
-       *ts = timespec64_to_timespec(ts64);
+       do {
+               seq = read_seqcount_begin(&tk_core.seq);
+               base = tk->base_raw;
+               nsecs = timekeeping_get_ns_raw(tk);
 
-}
-EXPORT_SYMBOL(timekeeping_clocktai);
+       } while (read_seqcount_retry(&tk_core.seq, seq));
 
+       return ktime_add_ns(base, nsecs);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw);
 
 /**
- * ktime_get_clocktai - Returns the TAI time of day in a ktime
+ * ktime_get_ts64 - get the monotonic clock in timespec64 format
+ * @ts:                pointer to timespec variable
  *
- * Returns the time of day in a ktime.
+ * The function calculates the monotonic clock from the realtime
+ * clock and the wall_to_monotonic offset and stores the result
+ * in normalized timespec format in the variable pointed to by @ts.
  */
-ktime_t ktime_get_clocktai(void)
+void ktime_get_ts64(struct timespec64 *ts)
 {
-       struct timespec ts;
+       struct timekeeper *tk = &tk_core.timekeeper;
+       struct timespec64 tomono;
+       s64 nsec;
+       unsigned int seq;
+
+       WARN_ON(timekeeping_suspended);
 
-       timekeeping_clocktai(&ts);
-       return timespec_to_ktime(ts);
+       do {
+               seq = read_seqcount_begin(&tk_core.seq);
+               ts->tv_sec = tk->xtime_sec;
+               nsec = timekeeping_get_ns(&tk->tkr);
+               tomono = tk->wall_to_monotonic;
+
+       } while (read_seqcount_retry(&tk_core.seq, seq));
+
+       ts->tv_sec += tomono.tv_sec;
+       ts->tv_nsec = 0;
+       timespec64_add_ns(ts, nsec + tomono.tv_nsec);
 }
-EXPORT_SYMBOL(ktime_get_clocktai);
+EXPORT_SYMBOL_GPL(ktime_get_ts64);
 
 #ifdef CONFIG_NTP_PPS
 
@@ -504,7 +673,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
                ts_real->tv_nsec = 0;
 
                nsecs_raw = timekeeping_get_ns_raw(tk);
-               nsecs_real = timekeeping_get_ns(tk);
+               nsecs_real = timekeeping_get_ns(&tk->tkr);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -688,7 +857,7 @@ static int change_clocksource(void *data)
         */
        if (try_module_get(new->owner)) {
                if (!new->enable || new->enable(new) == 0) {
-                       old = tk->clock;
+                       old = tk->tkr.clock;
                        tk_setup_internals(tk, new);
                        if (old->disable)
                                old->disable(old);
@@ -716,27 +885,12 @@ int timekeeping_notify(struct clocksource *clock)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
 
-       if (tk->clock == clock)
+       if (tk->tkr.clock == clock)
                return 0;
        stop_machine(change_clocksource, clock, NULL);
        tick_clock_notify();
-       return tk->clock == clock ? 0 : -1;
-}
-
-/**
- * ktime_get_real - get the real (wall-) time in ktime_t format
- *
- * returns the time in ktime_t format
- */
-ktime_t ktime_get_real(void)
-{
-       struct timespec64 now;
-
-       getnstimeofday64(&now);
-
-       return timespec64_to_ktime(now);
+       return tk->tkr.clock == clock ? 0 : -1;
 }
-EXPORT_SYMBOL_GPL(ktime_get_real);
 
 /**
  * getrawmonotonic - Returns the raw monotonic time in a timespec
@@ -775,7 +929,7 @@ int timekeeping_valid_for_hres(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+               ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -794,7 +948,7 @@ u64 timekeeping_max_deferment(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               ret = tk->clock->max_idle_ns;
+               ret = tk->tkr.clock->max_idle_ns;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -873,18 +1027,14 @@ void __init timekeeping_init(void)
        tk_set_xtime(tk, &now);
        tk->raw_time.tv_sec = 0;
        tk->raw_time.tv_nsec = 0;
+       tk->base_raw.tv64 = 0;
        if (boot.tv_sec == 0 && boot.tv_nsec == 0)
                boot = tk_xtime(tk);
 
        set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
        tk_set_wall_to_mono(tk, tmp);
 
-       tmp.tv_sec = 0;
-       tmp.tv_nsec = 0;
-       tk_set_sleep_time(tk, tmp);
-
-       memcpy(&shadow_timekeeper, &tk_core.timekeeper,
-              sizeof(tk_core.timekeeper));
+       timekeeping_update(tk, TK_MIRROR);
 
        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -911,7 +1061,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
        }
        tk_xtime_add(tk, delta);
        tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
-       tk_set_sleep_time(tk, timespec64_add(tk->total_sleep_time, *delta));
+       tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
        tk_debug_account_sleep_time(delta);
 }
 
@@ -965,7 +1115,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 static void timekeeping_resume(void)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-       struct clocksource *clock = tk->clock;
+       struct clocksource *clock = tk->tkr.clock;
        unsigned long flags;
        struct timespec64 ts_new, ts_delta;
        struct timespec tmp;
@@ -993,15 +1143,16 @@ static void timekeeping_resume(void)
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
-       cycle_now = clock->read(clock);
+       cycle_now = tk->tkr.read(clock);
        if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-               cycle_now > clock->cycle_last) {
+               cycle_now > tk->tkr.cycle_last) {
                u64 num, max = ULLONG_MAX;
                u32 mult = clock->mult;
                u32 shift = clock->shift;
                s64 nsec = 0;
 
-               cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+               cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
+                                               tk->tkr.mask);
 
                /*
                 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1027,7 +1178,7 @@ static void timekeeping_resume(void)
                __timekeeping_inject_sleeptime(tk, &ts_delta);
 
        /* Re-base the last cycle value */
-       tk->cycle_last = clock->cycle_last = cycle_now;
+       tk->tkr.cycle_last = cycle_now;
        tk->ntp_error = 0;
        timekeeping_suspended = 0;
        timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1108,125 +1259,34 @@ static int __init timekeeping_init_ops(void)
        register_syscore_ops(&timekeeping_syscore_ops);
        return 0;
 }
-
 device_initcall(timekeeping_init_ops);
 
 /*
- * If the error is already larger, we look ahead even further
- * to compensate for late or lost adjustments.
+ * Apply a multiplier adjustment to the timekeeper
  */
-static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
-                                                s64 error, s64 *interval,
-                                                s64 *offset)
+static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
+                                                        s64 offset,
+                                                        bool negative,
+                                                        int adj_scale)
 {
-       s64 tick_error, i;
-       u32 look_ahead, adj;
-       s32 error2, mult;
+       s64 interval = tk->cycle_interval;
+       s32 mult_adj = 1;
 
-       /*
-        * Use the current error value to determine how much to look ahead.
-        * The larger the error the slower we adjust for it to avoid problems
-        * with losing too many ticks, otherwise we would overadjust and
-        * produce an even larger error.  The smaller the adjustment the
-        * faster we try to adjust for it, as lost ticks can do less harm
-        * here.  This is tuned so that an error of about 1 msec is adjusted
-        * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
-        */
-       error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
-       error2 = abs(error2);
-       for (look_ahead = 0; error2 > 0; look_ahead++)
-               error2 >>= 2;
-
-       /*
-        * Now calculate the error in (1 << look_ahead) ticks, but first
-        * remove the single look ahead already included in the error.
-        */
-       tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1);
-       tick_error -= tk->xtime_interval >> 1;
-       error = ((error - tick_error) >> look_ahead) + tick_error;
-
-       /* Finally calculate the adjustment shift value.  */
-       i = *interval;
-       mult = 1;
-       if (error < 0) {
-               error = -error;
-               *interval = -*interval;
-               *offset = -*offset;
-               mult = -1;
-       }
-       for (adj = 0; error > i; adj++)
-               error >>= 1;
-
-       *interval <<= adj;
-       *offset <<= adj;
-       return mult << adj;
-}
-
-/*
- * Adjust the multiplier to reduce the error value,
- * this is optimized for the most common adjustments of -1,0,1,
- * for other values we can do a bit more work.
- */
-static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
-{
-       s64 error, interval = tk->cycle_interval;
-       int adj;
-
-       /*
-        * The point of this is to check if the error is greater than half
-        * an interval.
-        *
-        * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
-        *
-        * Note we subtract one in the shift, so that error is really error*2.
-        * This "saves" dividing(shifting) interval twice, but keeps the
-        * (error > interval) comparison as still measuring if error is
-        * larger than half an interval.
-        *
-        * Note: It does not "save" on aggravation when reading the code.
-        */
-       error = tk->ntp_error >> (tk->ntp_error_shift - 1);
-       if (error > interval) {
-               /*
-                * We now divide error by 4(via shift), which checks if
-                * the error is greater than twice the interval.
-                * If it is greater, we need a bigadjust, if its smaller,
-                * we can adjust by 1.
-                */
-               error >>= 2;
-               if (likely(error <= interval))
-                       adj = 1;
-               else
-                       adj = timekeeping_bigadjust(tk, error, &interval, &offset);
-       } else {
-               if (error < -interval) {
-                       /* See comment above, this is just switched for the negative */
-                       error >>= 2;
-                       if (likely(error >= -interval)) {
-                               adj = -1;
-                               interval = -interval;
-                               offset = -offset;
-                       } else {
-                               adj = timekeeping_bigadjust(tk, error, &interval, &offset);
-                       }
-               } else {
-                       goto out_adjust;
-               }
+       if (negative) {
+               mult_adj = -mult_adj;
+               interval = -interval;
+               offset  = -offset;
        }
+       mult_adj <<= adj_scale;
+       interval <<= adj_scale;
+       offset <<= adj_scale;
 
-       if (unlikely(tk->clock->maxadj &&
-               (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
-               printk_deferred_once(KERN_WARNING
-                       "Adjusting %s more than 11%% (%ld vs %ld)\n",
-                       tk->clock->name, (long)tk->mult + adj,
-                       (long)tk->clock->mult + tk->clock->maxadj);
-       }
        /*
         * So the following can be confusing.
         *
-        * To keep things simple, lets assume adj == 1 for now.
+        * To keep things simple, lets assume mult_adj == 1 for now.
         *
-        * When adj != 1, remember that the interval and offset values
+        * When mult_adj != 1, remember that the interval and offset values
         * have been appropriately scaled so the math is the same.
         *
         * The basic idea here is that we're increasing the multiplier
@@ -1270,12 +1330,78 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
         *
         * XXX - TODO: Doc ntp_error calculation.
         */
-       tk->mult += adj;
+       tk->tkr.mult += mult_adj;
        tk->xtime_interval += interval;
-       tk->xtime_nsec -= offset;
+       tk->tkr.xtime_nsec -= offset;
        tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
+}
+
+/*
+ * Calculate the multiplier adjustment needed to match the frequency
+ * specified by NTP
+ */
+static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
+                                                       s64 offset)
+{
+       s64 interval = tk->cycle_interval;
+       s64 xinterval = tk->xtime_interval;
+       s64 tick_error;
+       bool negative;
+       u32 adj;
+
+       /* Remove any current error adj from freq calculation */
+       if (tk->ntp_err_mult)
+               xinterval -= tk->cycle_interval;
+
+       tk->ntp_tick = ntp_tick_length();
+
+       /* Calculate current error per tick */
+       tick_error = ntp_tick_length() >> tk->ntp_error_shift;
+       tick_error -= (xinterval + tk->xtime_remainder);
+
+       /* Don't worry about correcting it if its small */
+       if (likely((tick_error >= 0) && (tick_error <= interval)))
+               return;
+
+       /* preserve the direction of correction */
+       negative = (tick_error < 0);
+
+       /* Sort out the magnitude of the correction */
+       tick_error = abs(tick_error);
+       for (adj = 0; tick_error > interval; adj++)
+               tick_error >>= 1;
+
+       /* scale the corrections */
+       timekeeping_apply_adjustment(tk, offset, negative, adj);
+}
+
+/*
+ * Adjust the timekeeper's multiplier to the correct frequency
+ * and also to reduce the accumulated error value.
+ */
+static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
+{
+       /* Correct for the current frequency error */
+       timekeeping_freqadjust(tk, offset);
+
+       /* Next make a small adjustment to fix any cumulative error */
+       if (!tk->ntp_err_mult && (tk->ntp_error > 0)) {
+               tk->ntp_err_mult = 1;
+               timekeeping_apply_adjustment(tk, offset, 0, 0);
+       } else if (tk->ntp_err_mult && (tk->ntp_error <= 0)) {
+               /* Undo any existing error adjustment */
+               timekeeping_apply_adjustment(tk, offset, 1, 0);
+               tk->ntp_err_mult = 0;
+       }
+
+       if (unlikely(tk->tkr.clock->maxadj &&
+               (tk->tkr.mult > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) {
+               printk_once(KERN_WARNING
+                       "Adjusting %s more than 11%% (%ld vs %ld)\n",
+                       tk->tkr.clock->name, (long)tk->tkr.mult,
+                       (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+       }
 
-out_adjust:
        /*
         * It may be possible that when we entered this function, xtime_nsec
         * was very small.  Further, if we're slightly speeding the clocksource
@@ -1290,12 +1416,11 @@ out_adjust:
         * We'll correct this error next time through this function, when
         * xtime_nsec is not as small.
         */
-       if (unlikely((s64)tk->xtime_nsec < 0)) {
-               s64 neg = -(s64)tk->xtime_nsec;
-               tk->xtime_nsec = 0;
+       if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
+               s64 neg = -(s64)tk->tkr.xtime_nsec;
+               tk->tkr.xtime_nsec = 0;
                tk->ntp_error += neg << tk->ntp_error_shift;
        }
-
 }
 
 /**
@@ -1308,13 +1433,13 @@ out_adjust:
  */
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
-       u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
+       u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
        unsigned int clock_set = 0;
 
-       while (tk->xtime_nsec >= nsecps) {
+       while (tk->tkr.xtime_nsec >= nsecps) {
                int leap;
 
-               tk->xtime_nsec -= nsecps;
+               tk->tkr.xtime_nsec -= nsecps;
                tk->xtime_sec++;
 
                /* Figure out if its a leap sec and apply if needed */
@@ -1359,9 +1484,9 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 
        /* Accumulate one shifted interval */
        offset -= interval;
-       tk->cycle_last += interval;
+       tk->tkr.cycle_last += interval;
 
-       tk->xtime_nsec += tk->xtime_interval << shift;
+       tk->tkr.xtime_nsec += tk->xtime_interval << shift;
        *clock_set |= accumulate_nsecs_to_secs(tk);
 
        /* Accumulate raw time */
@@ -1375,7 +1500,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
        tk->raw_time.tv_nsec = raw_nsecs;
 
        /* Accumulate error between NTP and clock interval */
-       tk->ntp_error += ntp_tick_length() << shift;
+       tk->ntp_error += tk->ntp_tick << shift;
        tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
                                                (tk->ntp_error_shift + shift);
 
@@ -1388,7 +1513,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
  */
 void update_wall_time(void)
 {
-       struct clocksource *clock;
        struct timekeeper *real_tk = &tk_core.timekeeper;
        struct timekeeper *tk = &shadow_timekeeper;
        cycle_t offset;
@@ -1402,12 +1526,11 @@ void update_wall_time(void)
        if (unlikely(timekeeping_suspended))
                goto out;
 
-       clock = real_tk->clock;
-
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
        offset = real_tk->cycle_interval;
 #else
-       offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
+       offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
+                                  tk->tkr.cycle_last, tk->tkr.mask);
 #endif
 
        /* Check if there's really nothing to do */
@@ -1450,8 +1573,6 @@ void update_wall_time(void)
        clock_set |= accumulate_nsecs_to_secs(tk);
 
        write_seqcount_begin(&tk_core.seq);
-       /* Update clock->cycle_last with the new value */
-       clock->cycle_last = tk->cycle_last;
        /*
         * Update the real timekeeper.
         *
@@ -1486,82 +1607,11 @@ out:
 void getboottime(struct timespec *ts)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec boottime = {
-               .tv_sec = tk->wall_to_monotonic.tv_sec +
-                               tk->total_sleep_time.tv_sec,
-               .tv_nsec = tk->wall_to_monotonic.tv_nsec +
-                               tk->total_sleep_time.tv_nsec
-       };
-
-       set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
-}
-EXPORT_SYMBOL_GPL(getboottime);
-
-/**
- * get_monotonic_boottime - Returns monotonic time since boot
- * @ts:                pointer to the timespec to be set
- *
- * Returns the monotonic time since boot in a timespec.
- *
- * This is similar to CLOCK_MONTONIC/ktime_get_ts, but also
- * includes the time spent in suspend.
- */
-void get_monotonic_boottime(struct timespec *ts)
-{
-       struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 tomono, sleep, ret;
-       s64 nsec;
-       unsigned int seq;
-
-       WARN_ON(timekeeping_suspended);
-
-       do {
-               seq = read_seqcount_begin(&tk_core.seq);
-               ret.tv_sec = tk->xtime_sec;
-               nsec = timekeeping_get_ns(tk);
-               tomono = tk->wall_to_monotonic;
-               sleep = tk->total_sleep_time;
-
-       } while (read_seqcount_retry(&tk_core.seq, seq));
+       ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
 
-       ret.tv_sec += tomono.tv_sec + sleep.tv_sec;
-       ret.tv_nsec = 0;
-       timespec64_add_ns(&ret, nsec + tomono.tv_nsec + sleep.tv_nsec);
-       *ts = timespec64_to_timespec(ret);
+       *ts = ktime_to_timespec(t);
 }
-EXPORT_SYMBOL_GPL(get_monotonic_boottime);
-
-/**
- * ktime_get_boottime - Returns monotonic time since boot in a ktime
- *
- * Returns the monotonic time since boot in a ktime
- *
- * This is similar to CLOCK_MONTONIC/ktime_get, but also
- * includes the time spent in suspend.
- */
-ktime_t ktime_get_boottime(void)
-{
-       struct timespec ts;
-
-       get_monotonic_boottime(&ts);
-       return timespec_to_ktime(ts);
-}
-EXPORT_SYMBOL_GPL(ktime_get_boottime);
-
-/**
- * monotonic_to_bootbased - Convert the monotonic time to boot based.
- * @ts:                pointer to the timespec to be converted
- */
-void monotonic_to_bootbased(struct timespec *ts)
-{
-       struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 ts64;
-
-       ts64 = timespec_to_timespec64(*ts);
-       ts64 = timespec64_add(ts64, tk->total_sleep_time);
-       *ts = timespec64_to_timespec(ts64);
-}
-EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
+EXPORT_SYMBOL_GPL(getboottime);
 
 unsigned long get_seconds(void)
 {
@@ -1634,22 +1684,22 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
                                                        ktime_t *offs_tai)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 ts;
-       ktime_t now;
        unsigned int seq;
+       ktime_t base;
+       u64 nsecs;
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               ts = tk_xtime(tk);
+               base = tk->tkr.base_mono;
+               nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
                *offs_tai = tk->offs_tai;
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
-       now = ktime_set(ts.tv_sec, ts.tv_nsec);
-       now = ktime_sub(now, *offs_real);
-       return now;
+       return ktime_add_ns(base, nsecs);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1666,45 +1716,25 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
                                                        ktime_t *offs_tai)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-       ktime_t now;
        unsigned int seq;
-       u64 secs, nsecs;
+       ktime_t base;
+       u64 nsecs;
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               secs = tk->xtime_sec;
-               nsecs = timekeeping_get_ns(tk);
+               base = tk->tkr.base_mono;
+               nsecs = timekeeping_get_ns(&tk->tkr);
 
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
                *offs_tai = tk->offs_tai;
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
-       now = ktime_add_ns(ktime_set(secs, 0), nsecs);
-       now = ktime_sub(now, *offs_real);
-       return now;
+       return ktime_add_ns(base, nsecs);
 }
 #endif
 
-/**
- * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
- */
-ktime_t ktime_get_monotonic_offset(void)
-{
-       struct timekeeper *tk = &tk_core.timekeeper;
-       unsigned long seq;
-       struct timespec64 wtom;
-
-       do {
-               seq = read_seqcount_begin(&tk_core.seq);
-               wtom = tk->wall_to_monotonic;
-       } while (read_seqcount_retry(&tk_core.seq, seq));
-
-       return timespec64_to_ktime(wtom);
-}
-EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
-
 /**
  * do_adjtimex() - Accessor function to NTP __do_adjtimex function
  */