drm/i915: Flush the RPS bottom-half when the GPU idles
[cascardo/linux.git] / drivers / gpu / drm / i915 / intel_pm.c
index 3cf36dc..814b0df 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include <linux/cpufreq.h>
+#include <drm/drm_plane_helper.h>
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "../../../platform/x86/intel_ips.h"
 #define INTEL_RC6p_ENABLE                      (1<<1)
 #define INTEL_RC6pp_ENABLE                     (1<<2)
 
-static void bxt_init_clock_gating(struct drm_device *dev)
+static void gen9_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       /* See Bspec note for PSR2_CTL bit 31, Wa#828:bxt */
+       /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */
        I915_WRITE(CHICKEN_PAR1_1,
                   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
 
+       I915_WRITE(GEN8_CONFIG0,
+                  I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES);
+
+       /* WaEnableChickenDCPR:skl,bxt,kbl */
+       I915_WRITE(GEN8_CHICKEN_DCPR_1,
+                  I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
+
+       /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */
+       /* WaFbcWakeMemOn:skl,bxt,kbl */
+       I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
+                  DISP_FBC_WM_DIS |
+                  DISP_FBC_MEMORY_WAKE);
+
+       /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl */
+       I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+                  ILK_DPFC_DISABLE_DUMMY0);
+}
+
+static void bxt_init_clock_gating(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       gen9_init_clock_gating(dev);
+
        /* WaDisableSDEUnitClockGating:bxt */
        I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
                   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
@@ -2949,6 +2974,46 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
        }
 }
 
+/*
+ * Determines the downscale amount of a plane for the purposes of watermark calculations.
+ * The bspec defines downscale amount as:
+ *
+ * """
+ * Horizontal down scale amount = maximum[1, Horizontal source size /
+ *                                           Horizontal destination size]
+ * Vertical down scale amount = maximum[1, Vertical source size /
+ *                                         Vertical destination size]
+ * Total down scale amount = Horizontal down scale amount *
+ *                           Vertical down scale amount
+ * """
+ *
+ * Return value is provided in 16.16 fixed point form to retain fractional part.
+ * Caller should take care of dividing & rounding off the value.
+ */
+static uint32_t
+skl_plane_downscale_amount(const struct intel_plane_state *pstate)
+{
+       uint32_t downscale_h, downscale_w;
+       uint32_t src_w, src_h, dst_w, dst_h;
+
+       if (WARN_ON(!pstate->visible))
+               return DRM_PLANE_HELPER_NO_SCALING;
+
+       /* n.b., src is 16.16 fixed point, dst is whole integer */
+       src_w = drm_rect_width(&pstate->src);
+       src_h = drm_rect_height(&pstate->src);
+       dst_w = drm_rect_width(&pstate->dst);
+       dst_h = drm_rect_height(&pstate->dst);
+       if (intel_rotation_90_or_270(pstate->base.rotation))
+               swap(dst_w, dst_h);
+
+       downscale_h = max(src_h / dst_h, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
+       downscale_w = max(src_w / dst_w, (uint32_t)DRM_PLANE_HELPER_NO_SCALING);
+
+       /* Provide result in 16.16 fixed point */
+       return (uint64_t)downscale_w * downscale_h >> 16;
+}
+
 static unsigned int
 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
                             const struct drm_plane_state *pstate,
@@ -2956,6 +3021,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 {
        struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
        struct drm_framebuffer *fb = pstate->fb;
+       uint32_t down_scale_amount, data_rate;
        uint32_t width = 0, height = 0;
        unsigned format = fb ? fb->pixel_format : DRM_FORMAT_XRGB8888;
 
@@ -2975,15 +3041,19 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
        /* for planar format */
        if (format == DRM_FORMAT_NV12) {
                if (y)  /* y-plane data rate */
-                       return width * height *
+                       data_rate = width * height *
                                drm_format_plane_cpp(format, 0);
                else    /* uv-plane data rate */
-                       return (width / 2) * (height / 2) *
+                       data_rate = (width / 2) * (height / 2) *
                                drm_format_plane_cpp(format, 1);
+       } else {
+               /* for packed formats */
+               data_rate = width * height * drm_format_plane_cpp(format, 0);
        }
 
-       /* for packed formats */
-       return width * height * drm_format_plane_cpp(format, 0);
+       down_scale_amount = skl_plane_downscale_amount(intel_pstate);
+
+       return (uint64_t)data_rate * down_scale_amount >> 16;
 }
 
 /*
@@ -3042,6 +3112,69 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate)
        return total_data_rate;
 }
 
+static uint16_t
+skl_ddb_min_alloc(const struct drm_plane_state *pstate,
+                 const int y)
+{
+       struct drm_framebuffer *fb = pstate->fb;
+       struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
+       uint32_t src_w, src_h;
+       uint32_t min_scanlines = 8;
+       uint8_t plane_bpp;
+
+       if (WARN_ON(!fb))
+               return 0;
+
+       /* For packed formats, no y-plane, return 0 */
+       if (y && fb->pixel_format != DRM_FORMAT_NV12)
+               return 0;
+
+       /* For Non Y-tile return 8-blocks */
+       if (fb->modifier[0] != I915_FORMAT_MOD_Y_TILED &&
+           fb->modifier[0] != I915_FORMAT_MOD_Yf_TILED)
+               return 8;
+
+       src_w = drm_rect_width(&intel_pstate->src) >> 16;
+       src_h = drm_rect_height(&intel_pstate->src) >> 16;
+
+       if (intel_rotation_90_or_270(pstate->rotation))
+               swap(src_w, src_h);
+
+       /* Halve UV plane width and height for NV12 */
+       if (fb->pixel_format == DRM_FORMAT_NV12 && !y) {
+               src_w /= 2;
+               src_h /= 2;
+       }
+
+       if (fb->pixel_format == DRM_FORMAT_NV12 && !y)
+               plane_bpp = drm_format_plane_cpp(fb->pixel_format, 1);
+       else
+               plane_bpp = drm_format_plane_cpp(fb->pixel_format, 0);
+
+       if (intel_rotation_90_or_270(pstate->rotation)) {
+               switch (plane_bpp) {
+               case 1:
+                       min_scanlines = 32;
+                       break;
+               case 2:
+                       min_scanlines = 16;
+                       break;
+               case 4:
+                       min_scanlines = 8;
+                       break;
+               case 8:
+                       min_scanlines = 4;
+                       break;
+               default:
+                       WARN(1, "Unsupported pixel depth %u for rotation",
+                            plane_bpp);
+                       min_scanlines = 32;
+               }
+       }
+
+       return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3;
+}
+
 static int
 skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
                      struct skl_ddb_allocation *ddb /* out */)
@@ -3104,11 +3237,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
                        continue;
                }
 
-               minimum[id] = 8;
-               if (pstate->fb->pixel_format == DRM_FORMAT_NV12)
-                       y_minimum[id] = 8;
-               else
-                       y_minimum[id] = 0;
+               minimum[id] = skl_ddb_min_alloc(pstate, 0);
+               y_minimum[id] = skl_ddb_min_alloc(pstate, 1);
        }
 
        for (i = 0; i < PLANE_CURSOR; i++) {
@@ -3225,6 +3355,30 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
        return ret;
 }
 
+static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cstate,
+                                             struct intel_plane_state *pstate)
+{
+       uint64_t adjusted_pixel_rate;
+       uint64_t downscale_amount;
+       uint64_t pixel_rate;
+
+       /* Shouldn't reach here on disabled planes... */
+       if (WARN_ON(!pstate->visible))
+               return 0;
+
+       /*
+        * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
+        * with additional adjustments for plane-specific scaling.
+        */
+       adjusted_pixel_rate = skl_pipe_pixel_rate(cstate);
+       downscale_amount = skl_plane_downscale_amount(pstate);
+
+       pixel_rate = adjusted_pixel_rate * downscale_amount >> 16;
+       WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0));
+
+       return pixel_rate;
+}
+
 static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
                                struct intel_crtc_state *cstate,
                                struct intel_plane_state *intel_pstate,
@@ -3243,6 +3397,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
        uint32_t selected_result;
        uint8_t cpp;
        uint32_t width = 0, height = 0;
+       uint32_t plane_pixel_rate;
 
        if (latency == 0 || !cstate->base.active || !intel_pstate->visible) {
                *enabled = false;
@@ -3256,9 +3411,10 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
                swap(width, height);
 
        cpp = drm_format_plane_cpp(fb->pixel_format, 0);
-       method1 = skl_wm_method1(skl_pipe_pixel_rate(cstate),
-                                cpp, latency);
-       method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate),
+       plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate);
+
+       method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
+       method2 = skl_wm_method2(plane_pixel_rate,
                                 cstate->base.adjusted_mode.crtc_htotal,
                                 width,
                                 cpp,
@@ -3723,6 +3879,19 @@ static int skl_update_pipe_wm(struct drm_crtc_state *cstate,
        return 0;
 }
 
+static uint32_t
+pipes_modified(struct drm_atomic_state *state)
+{
+       struct drm_crtc *crtc;
+       struct drm_crtc_state *cstate;
+       uint32_t i, ret = 0;
+
+       for_each_crtc_in_state(state, crtc, cstate, i)
+               ret |= drm_crtc_mask(crtc);
+
+       return ret;
+}
+
 static int
 skl_compute_ddb(struct drm_atomic_state *state)
 {
@@ -3731,7 +3900,7 @@ skl_compute_ddb(struct drm_atomic_state *state)
        struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
        struct intel_crtc *intel_crtc;
        struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
-       unsigned realloc_pipes = dev_priv->active_crtcs;
+       uint32_t realloc_pipes = pipes_modified(state);
        int ret;
 
        /*
@@ -4498,19 +4667,23 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
        new_power = dev_priv->rps.power;
        switch (dev_priv->rps.power) {
        case LOW_POWER:
-               if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
+               if (val > dev_priv->rps.efficient_freq + 1 &&
+                   val > dev_priv->rps.cur_freq)
                        new_power = BETWEEN;
                break;
 
        case BETWEEN:
-               if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
+               if (val <= dev_priv->rps.efficient_freq &&
+                   val < dev_priv->rps.cur_freq)
                        new_power = LOW_POWER;
-               else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
+               else if (val >= dev_priv->rps.rp0_freq &&
+                        val > dev_priv->rps.cur_freq)
                        new_power = HIGH_POWER;
                break;
 
        case HIGH_POWER:
-               if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
+               if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 &&
+                   val < dev_priv->rps.cur_freq)
                        new_power = BETWEEN;
                break;
        }
@@ -4556,22 +4729,24 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
        }
 
        I915_WRITE(GEN6_RP_UP_EI,
-               GT_INTERVAL_FROM_US(dev_priv, ei_up));
+                  GT_INTERVAL_FROM_US(dev_priv, ei_up));
        I915_WRITE(GEN6_RP_UP_THRESHOLD,
-               GT_INTERVAL_FROM_US(dev_priv, (ei_up * threshold_up / 100)));
+                  GT_INTERVAL_FROM_US(dev_priv,
+                                      ei_up * threshold_up / 100));
 
        I915_WRITE(GEN6_RP_DOWN_EI,
-               GT_INTERVAL_FROM_US(dev_priv, ei_down));
+                  GT_INTERVAL_FROM_US(dev_priv, ei_down));
        I915_WRITE(GEN6_RP_DOWN_THRESHOLD,
-               GT_INTERVAL_FROM_US(dev_priv, (ei_down * threshold_down / 100)));
+                  GT_INTERVAL_FROM_US(dev_priv,
+                                      ei_down * threshold_down / 100));
 
-        I915_WRITE(GEN6_RP_CONTROL,
-                   GEN6_RP_MEDIA_TURBO |
-                   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-                   GEN6_RP_MEDIA_IS_GFX |
-                   GEN6_RP_ENABLE |
-                   GEN6_RP_UP_BUSY_AVG |
-                   GEN6_RP_DOWN_IDLE_AVG);
+       I915_WRITE(GEN6_RP_CONTROL,
+                  GEN6_RP_MEDIA_TURBO |
+                  GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                  GEN6_RP_MEDIA_IS_GFX |
+                  GEN6_RP_ENABLE |
+                  GEN6_RP_UP_BUSY_AVG |
+                  GEN6_RP_DOWN_IDLE_AVG);
 
        dev_priv->rps.power = new_power;
        dev_priv->rps.up_threshold = threshold_up;
@@ -4688,12 +4863,27 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
                        gen6_rps_reset_ei(dev_priv);
                I915_WRITE(GEN6_PMINTRMSK,
                           gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+
+               gen6_enable_rps_interrupts(dev_priv);
+
+               /* Ensure we start at the user's desired frequency */
+               intel_set_rps(dev_priv,
+                             clamp(dev_priv->rps.cur_freq,
+                                   dev_priv->rps.min_freq_softlimit,
+                                   dev_priv->rps.max_freq_softlimit));
        }
        mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
 {
+       /* Flush our bottom-half so that it does not race with us
+        * setting the idle frequency and so that it is bounded by
+        * our rpm wakeref. And then disable the interrupts to stop any
+        * futher RPS reclocking whilst we are asleep.
+        */
+       gen6_disable_rps_interrupts(dev_priv);
+
        mutex_lock(&dev_priv->rps.hw_lock);
        if (dev_priv->rps.enabled) {
                if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
@@ -4718,7 +4908,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv,
        /* This is intentionally racy! We peek at the state here, then
         * validate inside the RPS worker.
         */
-       if (!(dev_priv->mm.busy &&
+       if (!(dev_priv->gt.awake &&
              dev_priv->rps.enabled &&
              dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
                return;
@@ -4734,7 +4924,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv,
                spin_lock_irq(&dev_priv->irq_lock);
                if (dev_priv->rps.interrupts_enabled) {
                        dev_priv->rps.client_boost = true;
-                       queue_work(dev_priv->wq, &dev_priv->rps.work);
+                       schedule_work(&dev_priv->rps.work);
                }
                spin_unlock_irq(&dev_priv->irq_lock);
 
@@ -4798,14 +4988,15 @@ static void intel_print_rc6_info(struct drm_i915_private *dev_priv, u32 mode)
                        mode = 0;
        }
        if (HAS_RC6p(dev_priv))
-               DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
-                             onoff(mode & GEN6_RC_CTL_RC6_ENABLE),
-                             onoff(mode & GEN6_RC_CTL_RC6p_ENABLE),
-                             onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE));
+               DRM_DEBUG_DRIVER("Enabling RC6 states: "
+                                "RC6 %s RC6p %s RC6pp %s\n",
+                                onoff(mode & GEN6_RC_CTL_RC6_ENABLE),
+                                onoff(mode & GEN6_RC_CTL_RC6p_ENABLE),
+                                onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE));
 
        else
-               DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
-                             onoff(mode & GEN6_RC_CTL_RC6_ENABLE));
+               DRM_DEBUG_DRIVER("Enabling RC6 states: RC6 %s\n",
+                                onoff(mode & GEN6_RC_CTL_RC6_ENABLE));
 }
 
 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
@@ -4813,9 +5004,20 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
        struct i915_ggtt *ggtt = &dev_priv->ggtt;
        bool enable_rc6 = true;
        unsigned long rc6_ctx_base;
+       u32 rc_ctl;
+       int rc_sw_target;
+
+       rc_ctl = I915_READ(GEN6_RC_CONTROL);
+       rc_sw_target = (I915_READ(GEN6_RC_STATE) & RC_SW_TARGET_STATE_MASK) >>
+                      RC_SW_TARGET_STATE_SHIFT;
+       DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+                        "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
+                        onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
+                        onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
+                        rc_sw_target);
 
        if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
-               DRM_DEBUG_KMS("RC6 Base location not set properly.\n");
+               DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
                enable_rc6 = false;
        }
 
@@ -4827,7 +5029,7 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
        if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) &&
              (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base +
                                        ggtt->stolen_reserved_size))) {
-               DRM_DEBUG_KMS("RC6 Base address not as expected.\n");
+               DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
                enable_rc6 = false;
        }
 
@@ -4835,15 +5037,24 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
              ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
              ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
              ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
-               DRM_DEBUG_KMS("Engine Idle wait time not set properly.\n");
+               DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+               enable_rc6 = false;
+       }
+
+       if (!I915_READ(GEN8_PUSHBUS_CONTROL) ||
+           !I915_READ(GEN8_PUSHBUS_ENABLE) ||
+           !I915_READ(GEN8_PUSHBUS_SHIFT)) {
+               DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+               enable_rc6 = false;
+       }
+
+       if (!I915_READ(GEN6_GFXPAUSE)) {
+               DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
                enable_rc6 = false;
        }
 
-       if (!(I915_READ(GEN6_RC_CONTROL) & (GEN6_RC_CTL_RC6_ENABLE |
-                                           GEN6_RC_CTL_HW_ENABLE)) &&
-           ((I915_READ(GEN6_RC_CONTROL) & GEN6_RC_CTL_HW_ENABLE) ||
-            !(I915_READ(GEN6_RC_STATE) & RC6_STATE))) {
-               DRM_DEBUG_KMS("HW/SW RC6 is not enabled by BIOS.\n");
+       if (!I915_READ(GEN8_MISC_CTRL0)) {
+               DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
                enable_rc6 = false;
        }
 
@@ -4875,8 +5086,9 @@ int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6)
                        mask = INTEL_RC6_ENABLE;
 
                if ((enable_rc6 & mask) != enable_rc6)
-                       DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
-                                     enable_rc6 & mask, enable_rc6, mask);
+                       DRM_DEBUG_DRIVER("Adjusting RC6 mask to %d "
+                                        "(requested %d, valid %d)\n",
+                                        enable_rc6 & mask, enable_rc6, mask);
 
                return enable_rc6 & mask;
        }
@@ -6831,13 +7043,40 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
        I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 }
 
+static void kabylake_init_clock_gating(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       gen9_init_clock_gating(dev);
+
+       /* WaDisableSDEUnitClockGating:kbl */
+       if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+               I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+                          GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+       /* WaDisableGamClockGating:kbl */
+       if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+               I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
+                          GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
+
+       /* WaFbcNukeOnHostModify:kbl */
+       I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+                  ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
+}
+
 static void skylake_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,kbl */
-       I915_WRITE(CHICKEN_PAR1_1,
-                  I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
+       gen9_init_clock_gating(dev);
+
+       /* WAC6entrylatency:skl */
+       I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
+                  FBC_LLC_FULLY_OPEN);
+
+       /* WaFbcNukeOnHostModify:skl */
+       I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
+                  ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
 }
 
 static void broadwell_init_clock_gating(struct drm_device *dev)
@@ -6884,6 +7123,10 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
         */
        I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
 
+       /* WaKVMNotificationOnConfigChange:bdw */
+       I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
+                  | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
+
        lpt_init_clock_gating(dev);
 }
 
@@ -7301,7 +7544,7 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
        if (IS_SKYLAKE(dev_priv))
                dev_priv->display.init_clock_gating = skylake_init_clock_gating;
        else if (IS_KABYLAKE(dev_priv))
-               dev_priv->display.init_clock_gating = skylake_init_clock_gating;
+               dev_priv->display.init_clock_gating = kabylake_init_clock_gating;
        else if (IS_BROXTON(dev_priv))
                dev_priv->display.init_clock_gating = bxt_init_clock_gating;
        else if (IS_BROADWELL(dev_priv))
@@ -7417,46 +7660,59 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
 {
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
-       if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
+       /* GEN6_PCODE_* are outside of the forcewake domain, we can
+        * use te fw I915_READ variants to reduce the amount of work
+        * required when reading/writing.
+        */
+
+       if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
                DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
                return -EAGAIN;
        }
 
-       I915_WRITE(GEN6_PCODE_DATA, *val);
-       I915_WRITE(GEN6_PCODE_DATA1, 0);
-       I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
+       I915_WRITE_FW(GEN6_PCODE_DATA, *val);
+       I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
+       I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
 
-       if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-                    500)) {
+       if (intel_wait_for_register_fw(dev_priv,
+                                      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+                                      500)) {
                DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
                return -ETIMEDOUT;
        }
 
-       *val = I915_READ(GEN6_PCODE_DATA);
-       I915_WRITE(GEN6_PCODE_DATA, 0);
+       *val = I915_READ_FW(GEN6_PCODE_DATA);
+       I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
        return 0;
 }
 
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
+int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
+                              u32 mbox, u32 val)
 {
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
-       if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
+       /* GEN6_PCODE_* are outside of the forcewake domain, we can
+        * use te fw I915_READ variants to reduce the amount of work
+        * required when reading/writing.
+        */
+
+       if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
                DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
                return -EAGAIN;
        }
 
-       I915_WRITE(GEN6_PCODE_DATA, val);
-       I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
+       I915_WRITE_FW(GEN6_PCODE_DATA, val);
+       I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
 
-       if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
-                    500)) {
+       if (intel_wait_for_register_fw(dev_priv,
+                                      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
+                                      500)) {
                DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
                return -ETIMEDOUT;
        }
 
-       I915_WRITE(GEN6_PCODE_DATA, 0);
+       I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
        return 0;
 }
@@ -7526,7 +7782,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
        struct request_boost *boost = container_of(work, struct request_boost, work);
        struct drm_i915_gem_request *req = boost->req;
 
-       if (!i915_gem_request_completed(req, true))
+       if (!i915_gem_request_completed(req))
                gen6_rps_boost(req->i915, NULL, req->emitted_jiffies);
 
        i915_gem_request_unreference(req);
@@ -7540,7 +7796,7 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req)
        if (req == NULL || INTEL_GEN(req->i915) < 6)
                return;
 
-       if (i915_gem_request_completed(req, true))
+       if (i915_gem_request_completed(req))
                return;
 
        boost = kmalloc(sizeof(*boost), GFP_ATOMIC);