Merge tag 'for-v3.17' of git://git.infradead.org/battery-2.6

[cascardo/linux.git] / kernel / time / timekeeping.c
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c

index 7ca150a..f36b028 100644 (file)
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -44,6 +44,22 @@ static struct {
  static DEFINE_RAW_SPINLOCK(timekeeper_lock);
  static struct timekeeper shadow_timekeeper;
  
+/**
+ * struct tk_fast - NMI safe timekeeper
+ * @seq:       Sequence counter for protecting updates. The lowest bit
+ *             is the index for the tk_read_base array
+ * @base:      tk_read_base array. Access is indexed by the lowest bit of
+ *             @seq.
+ *
+ * See @update_fast_timekeeper() below.
+ */
+struct tk_fast {
+       seqcount_t              seq;
+       struct tk_read_base     base[2];
+};
+
+static struct tk_fast tk_fast_mono ____cacheline_aligned;
+
  /* flag for if timekeeping is suspended */
  int __read_mostly timekeeping_suspended;
  
@@ -52,8 +68,8 @@ bool __read_mostly persistent_clock_exist = false;
  
  static inline void tk_normalize_xtime(struct timekeeper *tk)
  {
-       while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
-               tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift;
+       while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
+               tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
                 tk->xtime_sec++;
         }
  }
@@ -63,20 +79,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
         struct timespec64 ts;
  
         ts.tv_sec = tk->xtime_sec;
-       ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
+       ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
         return ts;
  }
  
  static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
  {
         tk->xtime_sec = ts->tv_sec;
-       tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
+       tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
  }
  
  static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
  {
         tk->xtime_sec += ts->tv_sec;
-       tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
+       tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
         tk_normalize_xtime(tk);
  }
  
@@ -97,13 +113,9 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
         tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
  }
  
-static void tk_set_sleep_time(struct timekeeper *tk, struct timespec64 t)
+static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
  {
-       /* Verify consistency before modifying */
-       WARN_ON_ONCE(tk->offs_boot.tv64 != timespec64_to_ktime(tk->total_sleep_time).tv64);
-
-       tk->total_sleep_time    = t;
-       tk->offs_boot           = timespec64_to_ktime(t);
+       tk->offs_boot = ktime_add(tk->offs_boot, delta);
  }
  
  /**
@@ -123,9 +135,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
         u64 tmp, ntpinterval;
         struct clocksource *old_clock;
  
-       old_clock = tk->clock;
-       tk->clock = clock;
-       tk->cycle_last = clock->cycle_last = clock->read(clock);
+       old_clock = tk->tkr.clock;
+       tk->tkr.clock = clock;
+       tk->tkr.read = clock->read;
+       tk->tkr.mask = clock->mask;
+       tk->tkr.cycle_last = tk->tkr.read(clock);
  
         /* Do the ns -> cycle conversion first, using original mult */
         tmp = NTP_INTERVAL_LENGTH;
@@ -149,21 +163,23 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
         if (old_clock) {
                 int shift_change = clock->shift - old_clock->shift;
                 if (shift_change < 0)
-                       tk->xtime_nsec >>= -shift_change;
+                       tk->tkr.xtime_nsec >>= -shift_change;
                 else
-                       tk->xtime_nsec <<= shift_change;
+                       tk->tkr.xtime_nsec <<= shift_change;
         }
-       tk->shift = clock->shift;
+       tk->tkr.shift = clock->shift;
  
         tk->ntp_error = 0;
         tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
+       tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
  
         /*
          * The timekeeper keeps its own mult values for the currently
          * active clocksource. These value will be adjusted via NTP
          * to counteract clock drifting.
          */
-       tk->mult = clock->mult;
+       tk->tkr.mult = clock->mult;
+       tk->ntp_err_mult = 0;
  }
  
  /* Timekeeper helper functions. */
@@ -175,21 +191,19 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
  static inline u32 arch_gettimeoffset(void) { return 0; }
  #endif
  
-static inline s64 timekeeping_get_ns(struct timekeeper *tk)
+static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
  {
-       cycle_t cycle_now, cycle_delta;
-       struct clocksource *clock;
+       cycle_t cycle_now, delta;
         s64 nsec;
  
         /* read clocksource: */
-       clock = tk->clock;
-       cycle_now = clock->read(clock);
+       cycle_now = tkr->read(tkr->clock);
  
         /* calculate the delta since the last update_wall_time: */
-       cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+       delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
  
-       nsec = cycle_delta * tk->mult + tk->xtime_nsec;
-       nsec >>= tk->shift;
+       nsec = delta * tkr->mult + tkr->xtime_nsec;
+       nsec >>= tkr->shift;
  
         /* If arch requires, add in get_arch_timeoffset() */
         return nsec + arch_gettimeoffset();
@@ -197,32 +211,138 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk)
  
  static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
  {
-       cycle_t cycle_now, cycle_delta;
-       struct clocksource *clock;
+       struct clocksource *clock = tk->tkr.clock;
+       cycle_t cycle_now, delta;
         s64 nsec;
  
         /* read clocksource: */
-       clock = tk->clock;
-       cycle_now = clock->read(clock);
+       cycle_now = tk->tkr.read(clock);
  
         /* calculate the delta since the last update_wall_time: */
-       cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
  
         /* convert delta to nanoseconds. */
-       nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
+       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
  
         /* If arch requires, add in get_arch_timeoffset() */
         return nsec + arch_gettimeoffset();
  }
  
+/**
+ * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
+ * @tk:                The timekeeper from which we take the update
+ * @tkf:       The fast timekeeper to update
+ * @tbase:     The time base for the fast timekeeper (mono/raw)
+ *
+ * We want to use this from any context including NMI and tracing /
+ * instrumenting the timekeeping code itself.
+ *
+ * So we handle this differently than the other timekeeping accessor
+ * functions which retry when the sequence count has changed. The
+ * update side does:
+ *
+ * smp_wmb();  <- Ensure that the last base[1] update is visible
+ * tkf->seq++;
+ * smp_wmb();  <- Ensure that the seqcount update is visible
+ * update(tkf->base[0], tk);
+ * smp_wmb();  <- Ensure that the base[0] update is visible
+ * tkf->seq++;
+ * smp_wmb();  <- Ensure that the seqcount update is visible
+ * update(tkf->base[1], tk);
+ *
+ * The reader side does:
+ *
+ * do {
+ *     seq = tkf->seq;
+ *     smp_rmb();
+ *     idx = seq & 0x01;
+ *     now = now(tkf->base[idx]);
+ *     smp_rmb();
+ * } while (seq != tkf->seq)
+ *
+ * As long as we update base[0] readers are forced off to
+ * base[1]. Once base[0] is updated readers are redirected to base[0]
+ * and the base[1] update takes place.
+ *
+ * So if a NMI hits the update of base[0] then it will use base[1]
+ * which is still consistent. In the worst case this can result is a
+ * slightly wrong timestamp (a few nanoseconds). See
+ * @ktime_get_mono_fast_ns.
+ */
+static void update_fast_timekeeper(struct timekeeper *tk)
+{
+       struct tk_read_base *base = tk_fast_mono.base;
+
+       /* Force readers off to base[1] */
+       raw_write_seqcount_latch(&tk_fast_mono.seq);
+
+       /* Update base[0] */
+       memcpy(base, &tk->tkr, sizeof(*base));
+
+       /* Force readers back to base[0] */
+       raw_write_seqcount_latch(&tk_fast_mono.seq);
+
+       /* Update base[1] */
+       memcpy(base + 1, base, sizeof(*base));
+}
+
+/**
+ * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
+ *
+ * This timestamp is not guaranteed to be monotonic across an update.
+ * The timestamp is calculated by:
+ *
+ *     now = base_mono + clock_delta * slope
+ *
+ * So if the update lowers the slope, readers who are forced to the
+ * not yet updated second array are still using the old steeper slope.
+ *
+ * tmono
+ * ^
+ * |    o  n
+ * |   o n
+ * |  u
+ * | o
+ * |o
+ * |12345678---> reader order
+ *
+ * o = old slope
+ * u = update
+ * n = new slope
+ *
+ * So reader 6 will observe time going backwards versus reader 5.
+ *
+ * While other CPUs are likely to be able observe that, the only way
+ * for a CPU local observation is when an NMI hits in the middle of
+ * the update. Timestamps taken from that NMI context might be ahead
+ * of the following timestamps. Callers need to be aware of that and
+ * deal with it.
+ */
+u64 notrace ktime_get_mono_fast_ns(void)
+{
+       struct tk_read_base *tkr;
+       unsigned int seq;
+       u64 now;
+
+       do {
+               seq = raw_read_seqcount(&tk_fast_mono.seq);
+               tkr = tk_fast_mono.base + (seq & 0x01);
+               now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+
+       } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
+       return now;
+}
+EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
+
  #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
  
  static inline void update_vsyscall(struct timekeeper *tk)
  {
         struct timespec xt;
  
-       xt = tk_xtime(tk);
-       update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
+       xt = timespec64_to_timespec(tk_xtime(tk));
+       update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->tkr.clock, tk->tkr.mult,
+                           tk->tkr.cycle_last);
  }
  
  static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -239,11 +359,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
         * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
         * users are removed, this can be killed.
         */
-       remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
-       tk->xtime_nsec -= remainder;
-       tk->xtime_nsec += 1ULL << tk->shift;
+       remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
+       tk->tkr.xtime_nsec -= remainder;
+       tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
         tk->ntp_error += remainder << tk->ntp_error_shift;
-       tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift;
+       tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
  }
  #else
  #define old_vsyscall_fixup(tk)
@@ -291,6 +411,29 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
  }
  EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
  
+/*
+ * Update the ktime_t based scalar nsec members of the timekeeper
+ */
+static inline void tk_update_ktime_data(struct timekeeper *tk)
+{
+       s64 nsec;
+
+       /*
+        * The xtime based monotonic readout is:
+        *      nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
+        * The ktime based monotonic readout is:
+        *      nsec = base_mono + now();
+        * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
+        */
+       nsec = (s64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
+       nsec *= NSEC_PER_SEC;
+       nsec += tk->wall_to_monotonic.tv_nsec;
+       tk->tkr.base_mono = ns_to_ktime(nsec);
+
+       /* Update the monotonic raw base */
+       tk->base_raw = timespec64_to_ktime(tk->raw_time);
+}
+
  /* must hold timekeeper_lock */
  static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  {
@@ -301,9 +444,13 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
         update_vsyscall(tk);
         update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
  
+       tk_update_ktime_data(tk);
+
         if (action & TK_MIRROR)
                 memcpy(&shadow_timekeeper, &tk_core.timekeeper,
                        sizeof(tk_core.timekeeper));
+
+       update_fast_timekeeper(tk);
  }
  
  /**
@@ -315,23 +462,22 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
   */
  static void timekeeping_forward_now(struct timekeeper *tk)
  {
-       cycle_t cycle_now, cycle_delta;
-       struct clocksource *clock;
+       struct clocksource *clock = tk->tkr.clock;
+       cycle_t cycle_now, delta;
         s64 nsec;
  
-       clock = tk->clock;
-       cycle_now = clock->read(clock);
-       cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
-       tk->cycle_last = clock->cycle_last = cycle_now;
+       cycle_now = tk->tkr.read(clock);
+       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
+       tk->tkr.cycle_last = cycle_now;
  
-       tk->xtime_nsec += cycle_delta * tk->mult;
+       tk->tkr.xtime_nsec += delta * tk->tkr.mult;
  
         /* If arch requires, add in get_arch_timeoffset() */
-       tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;
+       tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
  
         tk_normalize_xtime(tk);
  
-       nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
+       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
         timespec64_add_ns(&tk->raw_time, nsec);
  }
  
@@ -352,7 +498,7 @@ int __getnstimeofday64(struct timespec64 *ts)
                 seq = read_seqcount_begin(&tk_core.seq);
  
                 ts->tv_sec = tk->xtime_sec;
-               nsecs = timekeeping_get_ns(tk);
+               nsecs = timekeeping_get_ns(&tk->tkr);
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -385,97 +531,120 @@ ktime_t ktime_get(void)
  {
         struct timekeeper *tk = &tk_core.timekeeper;
         unsigned int seq;
-       s64 secs, nsecs;
+       ktime_t base;
+       s64 nsecs;
  
         WARN_ON(timekeeping_suspended);
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
-               secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-               nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
+               base = tk->tkr.base_mono;
+               nsecs = timekeeping_get_ns(&tk->tkr);
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
-       return ktime_set(secs, nsecs);
+       return ktime_add_ns(base, nsecs);
  }
  EXPORT_SYMBOL_GPL(ktime_get);
  
-/**
- * ktime_get_ts64 - get the monotonic clock in timespec64 format
- * @ts:                pointer to timespec variable
- *
- * The function calculates the monotonic clock from the realtime
- * clock and the wall_to_monotonic offset and stores the result
- * in normalized timespec format in the variable pointed to by @ts.
- */
-void ktime_get_ts64(struct timespec64 *ts)
+static ktime_t *offsets[TK_OFFS_MAX] = {
+       [TK_OFFS_REAL]  = &tk_core.timekeeper.offs_real,
+       [TK_OFFS_BOOT]  = &tk_core.timekeeper.offs_boot,
+       [TK_OFFS_TAI]   = &tk_core.timekeeper.offs_tai,
+};
+
+ktime_t ktime_get_with_offset(enum tk_offsets offs)
  {
         struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 tomono;
-       s64 nsec;
         unsigned int seq;
+       ktime_t base, *offset = offsets[offs];
+       s64 nsecs;
  
         WARN_ON(timekeeping_suspended);
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
-               ts->tv_sec = tk->xtime_sec;
-               nsec = timekeeping_get_ns(tk);
-               tomono = tk->wall_to_monotonic;
+               base = ktime_add(tk->tkr.base_mono, *offset);
+               nsecs = timekeeping_get_ns(&tk->tkr);
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
-       ts->tv_sec += tomono.tv_sec;
-       ts->tv_nsec = 0;
-       timespec64_add_ns(ts, nsec + tomono.tv_nsec);
-}
-EXPORT_SYMBOL_GPL(ktime_get_ts64);
+       return ktime_add_ns(base, nsecs);
  
+}
+EXPORT_SYMBOL_GPL(ktime_get_with_offset);
  
  /**
- * timekeeping_clocktai - Returns the TAI time of day in a timespec
- * @ts:                pointer to the timespec to be set
- *
- * Returns the time of day in a timespec.
+ * ktime_mono_to_any() - convert mononotic time to any other time
+ * @tmono:     time to convert.
+ * @offs:      which offset to use
   */
-void timekeeping_clocktai(struct timespec *ts)
+ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
  {
-       struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 ts64;
+       ktime_t *offset = offsets[offs];
         unsigned long seq;
-       u64 nsecs;
-
-       WARN_ON(timekeeping_suspended);
+       ktime_t tconv;
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
+               tconv = ktime_add(tmono, *offset);
+       } while (read_seqcount_retry(&tk_core.seq, seq));
  
-               ts64.tv_sec = tk->xtime_sec + tk->tai_offset;
-               nsecs = timekeeping_get_ns(tk);
+       return tconv;
+}
+EXPORT_SYMBOL_GPL(ktime_mono_to_any);
  
-       } while (read_seqcount_retry(&tk_core.seq, seq));
+/**
+ * ktime_get_raw - Returns the raw monotonic time in ktime_t format
+ */
+ktime_t ktime_get_raw(void)
+{
+       struct timekeeper *tk = &tk_core.timekeeper;
+       unsigned int seq;
+       ktime_t base;
+       s64 nsecs;
  
-       ts64.tv_nsec = 0;
-       timespec64_add_ns(&ts64, nsecs);
-       *ts = timespec64_to_timespec(ts64);
+       do {
+               seq = read_seqcount_begin(&tk_core.seq);
+               base = tk->base_raw;
+               nsecs = timekeeping_get_ns_raw(tk);
  
-}
-EXPORT_SYMBOL(timekeeping_clocktai);
+       } while (read_seqcount_retry(&tk_core.seq, seq));
  
+       return ktime_add_ns(base, nsecs);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw);
  
  /**
- * ktime_get_clocktai - Returns the TAI time of day in a ktime
+ * ktime_get_ts64 - get the monotonic clock in timespec64 format
+ * @ts:                pointer to timespec variable
   *
- * Returns the time of day in a ktime.
+ * The function calculates the monotonic clock from the realtime
+ * clock and the wall_to_monotonic offset and stores the result
+ * in normalized timespec format in the variable pointed to by @ts.
   */
-ktime_t ktime_get_clocktai(void)
+void ktime_get_ts64(struct timespec64 *ts)
  {
-       struct timespec ts;
+       struct timekeeper *tk = &tk_core.timekeeper;
+       struct timespec64 tomono;
+       s64 nsec;
+       unsigned int seq;
+
+       WARN_ON(timekeeping_suspended);
  
-       timekeeping_clocktai(&ts);
-       return timespec_to_ktime(ts);
+       do {
+               seq = read_seqcount_begin(&tk_core.seq);
+               ts->tv_sec = tk->xtime_sec;
+               nsec = timekeeping_get_ns(&tk->tkr);
+               tomono = tk->wall_to_monotonic;
+
+       } while (read_seqcount_retry(&tk_core.seq, seq));
+
+       ts->tv_sec += tomono.tv_sec;
+       ts->tv_nsec = 0;
+       timespec64_add_ns(ts, nsec + tomono.tv_nsec);
  }
-EXPORT_SYMBOL(ktime_get_clocktai);
+EXPORT_SYMBOL_GPL(ktime_get_ts64);
  
  #ifdef CONFIG_NTP_PPS
  
@@ -504,7 +673,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
                 ts_real->tv_nsec = 0;
  
                 nsecs_raw = timekeeping_get_ns_raw(tk);
-               nsecs_real = timekeeping_get_ns(tk);
+               nsecs_real = timekeeping_get_ns(&tk->tkr);
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -688,7 +857,7 @@ static int change_clocksource(void *data)
          */
         if (try_module_get(new->owner)) {
                 if (!new->enable || new->enable(new) == 0) {
-                       old = tk->clock;
+                       old = tk->tkr.clock;
                         tk_setup_internals(tk, new);
                         if (old->disable)
                                 old->disable(old);
@@ -716,27 +885,12 @@ int timekeeping_notify(struct clocksource *clock)
  {
         struct timekeeper *tk = &tk_core.timekeeper;
  
-       if (tk->clock == clock)
+       if (tk->tkr.clock == clock)
                 return 0;
         stop_machine(change_clocksource, clock, NULL);
         tick_clock_notify();
-       return tk->clock == clock ? 0 : -1;
-}
-
-/**
- * ktime_get_real - get the real (wall-) time in ktime_t format
- *
- * returns the time in ktime_t format
- */
-ktime_t ktime_get_real(void)
-{
-       struct timespec64 now;
-
-       getnstimeofday64(&now);
-
-       return timespec64_to_ktime(now);
+       return tk->tkr.clock == clock ? 0 : -1;
  }
-EXPORT_SYMBOL_GPL(ktime_get_real);
  
  /**
   * getrawmonotonic - Returns the raw monotonic time in a timespec
@@ -775,7 +929,7 @@ int timekeeping_valid_for_hres(void)
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
  
-               ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+               ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -794,7 +948,7 @@ u64 timekeeping_max_deferment(void)
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
  
-               ret = tk->clock->max_idle_ns;
+               ret = tk->tkr.clock->max_idle_ns;
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -873,18 +1027,14 @@ void __init timekeeping_init(void)
         tk_set_xtime(tk, &now);
         tk->raw_time.tv_sec = 0;
         tk->raw_time.tv_nsec = 0;
+       tk->base_raw.tv64 = 0;
         if (boot.tv_sec == 0 && boot.tv_nsec == 0)
                 boot = tk_xtime(tk);
  
         set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
         tk_set_wall_to_mono(tk, tmp);
  
-       tmp.tv_sec = 0;
-       tmp.tv_nsec = 0;
-       tk_set_sleep_time(tk, tmp);
-
-       memcpy(&shadow_timekeeper, &tk_core.timekeeper,
-              sizeof(tk_core.timekeeper));
+       timekeeping_update(tk, TK_MIRROR);
  
         write_seqcount_end(&tk_core.seq);
         raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -911,7 +1061,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
         }
         tk_xtime_add(tk, delta);
         tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
-       tk_set_sleep_time(tk, timespec64_add(tk->total_sleep_time, *delta));
+       tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
         tk_debug_account_sleep_time(delta);
  }
  
@@ -965,7 +1115,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
  static void timekeeping_resume(void)
  {
         struct timekeeper *tk = &tk_core.timekeeper;
-       struct clocksource *clock = tk->clock;
+       struct clocksource *clock = tk->tkr.clock;
         unsigned long flags;
         struct timespec64 ts_new, ts_delta;
         struct timespec tmp;
@@ -993,15 +1143,16 @@ static void timekeeping_resume(void)
          * The less preferred source will only be tried if there is no better
          * usable source. The rtc part is handled separately in rtc core code.
          */
-       cycle_now = clock->read(clock);
+       cycle_now = tk->tkr.read(clock);
         if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-               cycle_now > clock->cycle_last) {
+               cycle_now > tk->tkr.cycle_last) {
                 u64 num, max = ULLONG_MAX;
                 u32 mult = clock->mult;
                 u32 shift = clock->shift;
                 s64 nsec = 0;
  
-               cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
+               cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
+                                               tk->tkr.mask);
  
                 /*
                  * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1027,7 +1178,7 @@ static void timekeeping_resume(void)
                 __timekeeping_inject_sleeptime(tk, &ts_delta);
  
         /* Re-base the last cycle value */
-       tk->cycle_last = clock->cycle_last = cycle_now;
+       tk->tkr.cycle_last = cycle_now;
         tk->ntp_error = 0;
         timekeeping_suspended = 0;
         timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1108,125 +1259,34 @@ static int __init timekeeping_init_ops(void)
         register_syscore_ops(&timekeeping_syscore_ops);
         return 0;
  }
-
  device_initcall(timekeeping_init_ops);
  
  /*
- * If the error is already larger, we look ahead even further
- * to compensate for late or lost adjustments.
+ * Apply a multiplier adjustment to the timekeeper
   */
-static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
-                                                s64 error, s64 *interval,
-                                                s64 *offset)
+static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
+                                                        s64 offset,
+                                                        bool negative,
+                                                        int adj_scale)
  {
-       s64 tick_error, i;
-       u32 look_ahead, adj;
-       s32 error2, mult;
+       s64 interval = tk->cycle_interval;
+       s32 mult_adj = 1;
  
-       /*
-        * Use the current error value to determine how much to look ahead.
-        * The larger the error the slower we adjust for it to avoid problems
-        * with losing too many ticks, otherwise we would overadjust and
-        * produce an even larger error.  The smaller the adjustment the
-        * faster we try to adjust for it, as lost ticks can do less harm
-        * here.  This is tuned so that an error of about 1 msec is adjusted
-        * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
-        */
-       error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
-       error2 = abs(error2);
-       for (look_ahead = 0; error2 > 0; look_ahead++)
-               error2 >>= 2;
-
-       /*
-        * Now calculate the error in (1 << look_ahead) ticks, but first
-        * remove the single look ahead already included in the error.
-        */
-       tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1);
-       tick_error -= tk->xtime_interval >> 1;
-       error = ((error - tick_error) >> look_ahead) + tick_error;
-
-       /* Finally calculate the adjustment shift value.  */
-       i = *interval;
-       mult = 1;
-       if (error < 0) {
-               error = -error;
-               *interval = -*interval;
-               *offset = -*offset;
-               mult = -1;
-       }
-       for (adj = 0; error > i; adj++)
-               error >>= 1;
-
-       *interval <<= adj;
-       *offset <<= adj;
-       return mult << adj;
-}
-
-/*
- * Adjust the multiplier to reduce the error value,
- * this is optimized for the most common adjustments of -1,0,1,
- * for other values we can do a bit more work.
- */
-static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
-{
-       s64 error, interval = tk->cycle_interval;
-       int adj;
-
-       /*
-        * The point of this is to check if the error is greater than half
-        * an interval.
-        *
-        * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
-        *
-        * Note we subtract one in the shift, so that error is really error*2.
-        * This "saves" dividing(shifting) interval twice, but keeps the
-        * (error > interval) comparison as still measuring if error is
-        * larger than half an interval.
-        *
-        * Note: It does not "save" on aggravation when reading the code.
-        */
-       error = tk->ntp_error >> (tk->ntp_error_shift - 1);
-       if (error > interval) {
-               /*
-                * We now divide error by 4(via shift), which checks if
-                * the error is greater than twice the interval.
-                * If it is greater, we need a bigadjust, if its smaller,
-                * we can adjust by 1.
-                */
-               error >>= 2;
-               if (likely(error <= interval))
-                       adj = 1;
-               else
-                       adj = timekeeping_bigadjust(tk, error, &interval, &offset);
-       } else {
-               if (error < -interval) {
-                       /* See comment above, this is just switched for the negative */
-                       error >>= 2;
-                       if (likely(error >= -interval)) {
-                               adj = -1;
-                               interval = -interval;
-                               offset = -offset;
-                       } else {
-                               adj = timekeeping_bigadjust(tk, error, &interval, &offset);
-                       }
-               } else {
-                       goto out_adjust;
-               }
+       if (negative) {
+               mult_adj = -mult_adj;
+               interval = -interval;
+               offset  = -offset;
         }
+       mult_adj <<= adj_scale;
+       interval <<= adj_scale;
+       offset <<= adj_scale;
  
-       if (unlikely(tk->clock->maxadj &&
-               (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
-               printk_deferred_once(KERN_WARNING
-                       "Adjusting %s more than 11%% (%ld vs %ld)\n",
-                       tk->clock->name, (long)tk->mult + adj,
-                       (long)tk->clock->mult + tk->clock->maxadj);
-       }
         /*
          * So the following can be confusing.
          *
-        * To keep things simple, lets assume adj == 1 for now.
+        * To keep things simple, lets assume mult_adj == 1 for now.
          *
-        * When adj != 1, remember that the interval and offset values
+        * When mult_adj != 1, remember that the interval and offset values
          * have been appropriately scaled so the math is the same.
          *
          * The basic idea here is that we're increasing the multiplier
@@ -1270,12 +1330,78 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
          *
          * XXX - TODO: Doc ntp_error calculation.
          */
-       tk->mult += adj;
+       tk->tkr.mult += mult_adj;
         tk->xtime_interval += interval;
-       tk->xtime_nsec -= offset;
+       tk->tkr.xtime_nsec -= offset;
         tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
+}
+
+/*
+ * Calculate the multiplier adjustment needed to match the frequency
+ * specified by NTP
+ */
+static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
+                                                       s64 offset)
+{
+       s64 interval = tk->cycle_interval;
+       s64 xinterval = tk->xtime_interval;
+       s64 tick_error;
+       bool negative;
+       u32 adj;
+
+       /* Remove any current error adj from freq calculation */
+       if (tk->ntp_err_mult)
+               xinterval -= tk->cycle_interval;
+
+       tk->ntp_tick = ntp_tick_length();
+
+       /* Calculate current error per tick */
+       tick_error = ntp_tick_length() >> tk->ntp_error_shift;
+       tick_error -= (xinterval + tk->xtime_remainder);
+
+       /* Don't worry about correcting it if its small */
+       if (likely((tick_error >= 0) && (tick_error <= interval)))
+               return;
+
+       /* preserve the direction of correction */
+       negative = (tick_error < 0);
+
+       /* Sort out the magnitude of the correction */
+       tick_error = abs(tick_error);
+       for (adj = 0; tick_error > interval; adj++)
+               tick_error >>= 1;
+
+       /* scale the corrections */
+       timekeeping_apply_adjustment(tk, offset, negative, adj);
+}
+
+/*
+ * Adjust the timekeeper's multiplier to the correct frequency
+ * and also to reduce the accumulated error value.
+ */
+static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
+{
+       /* Correct for the current frequency error */
+       timekeeping_freqadjust(tk, offset);
+
+       /* Next make a small adjustment to fix any cumulative error */
+       if (!tk->ntp_err_mult && (tk->ntp_error > 0)) {
+               tk->ntp_err_mult = 1;
+               timekeeping_apply_adjustment(tk, offset, 0, 0);
+       } else if (tk->ntp_err_mult && (tk->ntp_error <= 0)) {
+               /* Undo any existing error adjustment */
+               timekeeping_apply_adjustment(tk, offset, 1, 0);
+               tk->ntp_err_mult = 0;
+       }
+
+       if (unlikely(tk->tkr.clock->maxadj &&
+               (tk->tkr.mult > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) {
+               printk_once(KERN_WARNING
+                       "Adjusting %s more than 11%% (%ld vs %ld)\n",
+                       tk->tkr.clock->name, (long)tk->tkr.mult,
+                       (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+       }
  
-out_adjust:
         /*
          * It may be possible that when we entered this function, xtime_nsec
          * was very small.  Further, if we're slightly speeding the clocksource
@@ -1290,12 +1416,11 @@ out_adjust:
          * We'll correct this error next time through this function, when
          * xtime_nsec is not as small.
          */
-       if (unlikely((s64)tk->xtime_nsec < 0)) {
-               s64 neg = -(s64)tk->xtime_nsec;
-               tk->xtime_nsec = 0;
+       if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
+               s64 neg = -(s64)tk->tkr.xtime_nsec;
+               tk->tkr.xtime_nsec = 0;
                 tk->ntp_error += neg << tk->ntp_error_shift;
         }
-
  }
  
  /**
@@ -1308,13 +1433,13 @@ out_adjust:
   */
  static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
  {
-       u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
+       u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
         unsigned int clock_set = 0;
  
-       while (tk->xtime_nsec >= nsecps) {
+       while (tk->tkr.xtime_nsec >= nsecps) {
                 int leap;
  
-               tk->xtime_nsec -= nsecps;
+               tk->tkr.xtime_nsec -= nsecps;
                 tk->xtime_sec++;
  
                 /* Figure out if its a leap sec and apply if needed */
@@ -1359,9 +1484,9 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
  
         /* Accumulate one shifted interval */
         offset -= interval;
-       tk->cycle_last += interval;
+       tk->tkr.cycle_last += interval;
  
-       tk->xtime_nsec += tk->xtime_interval << shift;
+       tk->tkr.xtime_nsec += tk->xtime_interval << shift;
         *clock_set |= accumulate_nsecs_to_secs(tk);
  
         /* Accumulate raw time */
@@ -1375,7 +1500,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
         tk->raw_time.tv_nsec = raw_nsecs;
  
         /* Accumulate error between NTP and clock interval */
-       tk->ntp_error += ntp_tick_length() << shift;
+       tk->ntp_error += tk->ntp_tick << shift;
         tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
                                                 (tk->ntp_error_shift + shift);
  
@@ -1388,7 +1513,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
   */
  void update_wall_time(void)
  {
-       struct clocksource *clock;
         struct timekeeper *real_tk = &tk_core.timekeeper;
         struct timekeeper *tk = &shadow_timekeeper;
         cycle_t offset;
@@ -1402,12 +1526,11 @@ void update_wall_time(void)
         if (unlikely(timekeeping_suspended))
                 goto out;
  
-       clock = real_tk->clock;
-
  #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
         offset = real_tk->cycle_interval;
  #else
-       offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
+       offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
+                                  tk->tkr.cycle_last, tk->tkr.mask);
  #endif
  
         /* Check if there's really nothing to do */
@@ -1450,8 +1573,6 @@ void update_wall_time(void)
         clock_set |= accumulate_nsecs_to_secs(tk);
  
         write_seqcount_begin(&tk_core.seq);
-       /* Update clock->cycle_last with the new value */
-       clock->cycle_last = tk->cycle_last;
         /*
          * Update the real timekeeper.
          *
@@ -1486,82 +1607,11 @@ out:
  void getboottime(struct timespec *ts)
  {
         struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec boottime = {
-               .tv_sec = tk->wall_to_monotonic.tv_sec +
-                               tk->total_sleep_time.tv_sec,
-               .tv_nsec = tk->wall_to_monotonic.tv_nsec +
-                               tk->total_sleep_time.tv_nsec
-       };
-
-       set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
-}
-EXPORT_SYMBOL_GPL(getboottime);
-
-/**
- * get_monotonic_boottime - Returns monotonic time since boot
- * @ts:                pointer to the timespec to be set
- *
- * Returns the monotonic time since boot in a timespec.
- *
- * This is similar to CLOCK_MONTONIC/ktime_get_ts, but also
- * includes the time spent in suspend.
- */
-void get_monotonic_boottime(struct timespec *ts)
-{
-       struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 tomono, sleep, ret;
-       s64 nsec;
-       unsigned int seq;
-
-       WARN_ON(timekeeping_suspended);
-
-       do {
-               seq = read_seqcount_begin(&tk_core.seq);
-               ret.tv_sec = tk->xtime_sec;
-               nsec = timekeeping_get_ns(tk);
-               tomono = tk->wall_to_monotonic;
-               sleep = tk->total_sleep_time;
-
-       } while (read_seqcount_retry(&tk_core.seq, seq));
+       ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
  
-       ret.tv_sec += tomono.tv_sec + sleep.tv_sec;
-       ret.tv_nsec = 0;
-       timespec64_add_ns(&ret, nsec + tomono.tv_nsec + sleep.tv_nsec);
-       *ts = timespec64_to_timespec(ret);
+       *ts = ktime_to_timespec(t);
  }
-EXPORT_SYMBOL_GPL(get_monotonic_boottime);
-
-/**
- * ktime_get_boottime - Returns monotonic time since boot in a ktime
- *
- * Returns the monotonic time since boot in a ktime
- *
- * This is similar to CLOCK_MONTONIC/ktime_get, but also
- * includes the time spent in suspend.
- */
-ktime_t ktime_get_boottime(void)
-{
-       struct timespec ts;
-
-       get_monotonic_boottime(&ts);
-       return timespec_to_ktime(ts);
-}
-EXPORT_SYMBOL_GPL(ktime_get_boottime);
-
-/**
- * monotonic_to_bootbased - Convert the monotonic time to boot based.
- * @ts:                pointer to the timespec to be converted
- */
-void monotonic_to_bootbased(struct timespec *ts)
-{
-       struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 ts64;
-
-       ts64 = timespec_to_timespec64(*ts);
-       ts64 = timespec64_add(ts64, tk->total_sleep_time);
-       *ts = timespec64_to_timespec(ts64);
-}
-EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
+EXPORT_SYMBOL_GPL(getboottime);
  
  unsigned long get_seconds(void)
  {
@@ -1634,22 +1684,22 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
                                                         ktime_t *offs_tai)
  {
         struct timekeeper *tk = &tk_core.timekeeper;
-       struct timespec64 ts;
-       ktime_t now;
         unsigned int seq;
+       ktime_t base;
+       u64 nsecs;
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
  
-               ts = tk_xtime(tk);
+               base = tk->tkr.base_mono;
+               nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+
                 *offs_real = tk->offs_real;
                 *offs_boot = tk->offs_boot;
                 *offs_tai = tk->offs_tai;
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
-       now = ktime_set(ts.tv_sec, ts.tv_nsec);
-       now = ktime_sub(now, *offs_real);
-       return now;
+       return ktime_add_ns(base, nsecs);
  }
  
  #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1666,45 +1716,25 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
                                                         ktime_t *offs_tai)
  {
         struct timekeeper *tk = &tk_core.timekeeper;
-       ktime_t now;
         unsigned int seq;
-       u64 secs, nsecs;
+       ktime_t base;
+       u64 nsecs;
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
  
-               secs = tk->xtime_sec;
-               nsecs = timekeeping_get_ns(tk);
+               base = tk->tkr.base_mono;
+               nsecs = timekeeping_get_ns(&tk->tkr);
  
                 *offs_real = tk->offs_real;
                 *offs_boot = tk->offs_boot;
                 *offs_tai = tk->offs_tai;
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
-       now = ktime_add_ns(ktime_set(secs, 0), nsecs);
-       now = ktime_sub(now, *offs_real);
-       return now;
+       return ktime_add_ns(base, nsecs);
  }
  #endif
  
-/**
- * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
- */
-ktime_t ktime_get_monotonic_offset(void)
-{
-       struct timekeeper *tk = &tk_core.timekeeper;
-       unsigned long seq;
-       struct timespec64 wtom;
-
-       do {
-               seq = read_seqcount_begin(&tk_core.seq);
-               wtom = tk->wall_to_monotonic;
-       } while (read_seqcount_retry(&tk_core.seq, seq));
-
-       return timespec64_to_ktime(wtom);
-}
-EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
-
  /**
   * do_adjtimex() - Accessor function to NTP __do_adjtimex function
   */