greybus: bootrom: Implement timeouts to detect Module failures
authorViresh Kumar <viresh.kumar@linaro.org>
Mon, 9 May 2016 05:29:00 +0000 (10:59 +0530)
committerGreg Kroah-Hartman <gregkh@google.com>
Mon, 9 May 2016 06:48:10 +0000 (08:48 +0200)
Its possible that the Module may fail to download the next stage
firmware, or to jump into it and boot into the new personality.

We have already seen examples of both of these cases on EVT 1.5.

This patch implements timeouts in the bootrom bundle driver, which now
expects the next request from the Module to be received at the AP within
1 second of the previous request/response. The time interval can be
increased later if required.

The timeouts are added between:
- AP_READY and FIRMWARE_SIZE operations
- FIRMWARE_SIZE and GET_FIRMWARE operations
- Two GET_FIRMWARE operations
- GET_FIRMWARE and READY_TO_BOOT operations
- READY_TO_BOOT operation and the call to the ->disconnect() event of
  the bootrom bundle (once the new hotplug request is received).

The timeout for the last case is kept at 5 seconds right now (random
value), as it may take a bit longer.

Because 'bootrom->fw' can be accessed simultaneously (from timeout
handler and incoming requests) and one of them can potentially free the
'->fw' structure, a mutex is also added to take care of such races while
accessing 'bootrom->fw' structure.

Also note that the '!bootrom->fw' check is moved to free_firmware()
routine.

Note that this version uses delayed-work (instead of timers used in
earlier attempt), as we need to call routines that can sleep from the
timeout handler.

Tested on EVT 1.5, by faking errors on certain requests, so that the
bootrom doesn't send any more requests. Normal case is working just fine
for audio and GP modules. This is tested with build arche_440.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
drivers/staging/greybus/bootrom.c

index cf750be..f375a87 100644 (file)
@@ -8,24 +8,49 @@
  */
 
 #include <linux/firmware.h>
+#include <linux/jiffies.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
 
 #include "bootrom.h"
 #include "greybus.h"
 
+/* Timeout, in jiffies, within which the next request must be received */
+#define NEXT_REQ_TIMEOUT_J     msecs_to_jiffies(1000)
 
 struct gb_bootrom {
        struct gb_connection    *connection;
        const struct firmware   *fw;
        u8                      protocol_major;
        u8                      protocol_minor;
+       struct delayed_work     dwork;
+       struct mutex            mutex; /* Protects bootrom->fw */
 };
 
 static void free_firmware(struct gb_bootrom *bootrom)
 {
+       if (!bootrom->fw)
+               return;
+
        release_firmware(bootrom->fw);
        bootrom->fw = NULL;
 }
 
+static void gb_bootrom_timedout(struct work_struct *work)
+{
+       struct delayed_work *dwork = to_delayed_work(work);
+       struct gb_bootrom *bootrom = container_of(dwork, struct gb_bootrom, dwork);
+       struct device *dev = &bootrom->connection->bundle->dev;
+
+       dev_err(dev, "Timed out waiting for request from the Module\n");
+
+       mutex_lock(&bootrom->mutex);
+       free_firmware(bootrom);
+       mutex_unlock(&bootrom->mutex);
+
+       /* TODO: Power-off Module ? */
+}
+
 /*
  * The es2 chip doesn't have VID/PID programmed into the hardware and we need to
  * hack that up to distinguish different modules and their firmware blobs.
@@ -77,8 +102,7 @@ static int download_firmware(struct gb_bootrom *bootrom, u8 stage)
        int rc;
 
        /* Already have a firmware, free it */
-       if (bootrom->fw)
-               free_firmware(bootrom);
+       free_firmware(bootrom);
 
        /*
         * Create firmware name
@@ -114,25 +138,32 @@ static int gb_bootrom_firmware_size_request(struct gb_operation *op)
        struct device *dev = &op->connection->bundle->dev;
        int ret;
 
+       /* Disable timeouts */
+       cancel_delayed_work_sync(&bootrom->dwork);
+
        if (op->request->payload_size != sizeof(*size_request)) {
                dev_err(dev, "%s: illegal size of firmware size request (%zu != %zu)\n",
                        __func__, op->request->payload_size,
                        sizeof(*size_request));
-               return -EINVAL;
+               ret = -EINVAL;
+               goto queue_work;
        }
 
+       mutex_lock(&bootrom->mutex);
+
        ret = download_firmware(bootrom, size_request->stage);
        if (ret) {
                dev_err(dev, "%s: failed to download firmware (%d)\n", __func__,
                        ret);
-               return ret;
+               goto unlock;
        }
 
        if (!gb_operation_response_alloc(op, sizeof(*size_response),
                                         GFP_KERNEL)) {
                dev_err(dev, "%s: error allocating response\n", __func__);
                free_firmware(bootrom);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto unlock;
        }
 
        size_response = op->response->payload;
@@ -140,28 +171,44 @@ static int gb_bootrom_firmware_size_request(struct gb_operation *op)
 
        dev_dbg(dev, "%s: firmware size %d bytes\n", __func__, size_response->size);
 
-       return 0;
+unlock:
+       mutex_unlock(&bootrom->mutex);
+
+queue_work:
+       /* Refresh timeout */
+       schedule_delayed_work(&bootrom->dwork, NEXT_REQ_TIMEOUT_J);
+
+       return ret;
 }
 
 static int gb_bootrom_get_firmware(struct gb_operation *op)
 {
        struct gb_bootrom *bootrom = gb_connection_get_data(op->connection);
-       const struct firmware *fw = bootrom->fw;
+       const struct firmware *fw;
        struct gb_bootrom_get_firmware_request *firmware_request;
        struct gb_bootrom_get_firmware_response *firmware_response;
        struct device *dev = &op->connection->bundle->dev;
        unsigned int offset, size;
+       int ret = 0;
+
+       /* Disable timeouts */
+       cancel_delayed_work_sync(&bootrom->dwork);
 
        if (op->request->payload_size != sizeof(*firmware_request)) {
                dev_err(dev, "%s: Illegal size of get firmware request (%zu %zu)\n",
                        __func__, op->request->payload_size,
                        sizeof(*firmware_request));
-               return -EINVAL;
+               ret = -EINVAL;
+               goto queue_work;
        }
 
+       mutex_lock(&bootrom->mutex);
+
+       fw = bootrom->fw;
        if (!fw) {
                dev_err(dev, "%s: firmware not available\n", __func__);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto unlock;
        }
 
        firmware_request = op->request->payload;
@@ -171,13 +218,15 @@ static int gb_bootrom_get_firmware(struct gb_operation *op)
        if (offset >= fw->size || size > fw->size - offset) {
                dev_warn(dev, "bad firmware request (offs = %u, size = %u)\n",
                                offset, size);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto unlock;
        }
 
        if (!gb_operation_response_alloc(op, sizeof(*firmware_response) + size,
                                         GFP_KERNEL)) {
                dev_err(dev, "%s: error allocating response\n", __func__);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto unlock;
        }
 
        firmware_response = op->response->payload;
@@ -186,36 +235,59 @@ static int gb_bootrom_get_firmware(struct gb_operation *op)
        dev_dbg(dev, "responding with firmware (offs = %u, size = %u)\n", offset,
                size);
 
-       return 0;
+unlock:
+       mutex_unlock(&bootrom->mutex);
+
+queue_work:
+       /* Refresh timeout */
+       schedule_delayed_work(&bootrom->dwork, NEXT_REQ_TIMEOUT_J);
+
+       return ret;
 }
 
 static int gb_bootrom_ready_to_boot(struct gb_operation *op)
 {
        struct gb_connection *connection = op->connection;
+       struct gb_bootrom *bootrom = gb_connection_get_data(connection);
        struct gb_bootrom_ready_to_boot_request *rtb_request;
        struct device *dev = &connection->bundle->dev;
        u8 status;
+       int ret = 0;
+
+       /* Disable timeouts */
+       cancel_delayed_work_sync(&bootrom->dwork);
 
        if (op->request->payload_size != sizeof(*rtb_request)) {
                dev_err(dev, "%s: Illegal size of ready to boot request (%zu %zu)\n",
                        __func__, op->request->payload_size,
                        sizeof(*rtb_request));
-               return -EINVAL;
+               ret = -EINVAL;
+               goto queue_work;
        }
 
        rtb_request = op->request->payload;
        status = rtb_request->status;
 
        /* Return error if the blob was invalid */
-       if (status == GB_BOOTROM_BOOT_STATUS_INVALID)
-               return -EINVAL;
+       if (status == GB_BOOTROM_BOOT_STATUS_INVALID) {
+               ret = -EINVAL;
+               goto queue_work;
+       }
 
        /*
         * XXX Should we return error for insecure firmware?
         */
        dev_dbg(dev, "ready to boot: 0x%x, 0\n", status);
 
-       return 0;
+queue_work:
+       /*
+        * Refresh timeout, the Interface shall load the new personality and
+        * send a new hotplug request, which shall get rid of the bootrom
+        * connection. As that can take some time, increase the timeout a bit.
+        */
+       schedule_delayed_work(&bootrom->dwork, 5 * NEXT_REQ_TIMEOUT_J);
+
+       return ret;
 }
 
 static int gb_bootrom_request_handler(struct gb_operation *op)
@@ -304,6 +376,8 @@ static int gb_bootrom_probe(struct gb_bundle *bundle,
 
        bootrom->connection = connection;
 
+       mutex_init(&bootrom->mutex);
+       INIT_DELAYED_WORK(&bootrom->dwork, gb_bootrom_timedout);
        greybus_set_drvdata(bundle, bootrom);
 
        ret = gb_connection_enable_tx(connection);
@@ -329,6 +403,9 @@ static int gb_bootrom_probe(struct gb_bundle *bundle,
                goto err_connection_disable;
        }
 
+       /* Refresh timeout */
+       schedule_delayed_work(&bootrom->dwork, NEXT_REQ_TIMEOUT_J);
+
        dev_dbg(&bundle->dev, "AP_READY sent\n");
 
        return 0;
@@ -351,9 +428,16 @@ static void gb_bootrom_disconnect(struct gb_bundle *bundle)
 
        gb_connection_disable(bootrom->connection);
 
-       /* Release firmware */
-       if (bootrom->fw)
-               free_firmware(bootrom);
+       /* Disable timeouts */
+       cancel_delayed_work_sync(&bootrom->dwork);
+
+       /*
+        * Release firmware:
+        *
+        * As the connection and the delayed work are already disabled, we don't
+        * need to lock access to bootrom->fw here.
+        */
+       free_firmware(bootrom);
 
        gb_connection_destroy(bootrom->connection);
        kfree(bootrom);