summaryrefslogtreecommitdiffstats
path: root/drivers/soundwire/stream.c
diff options
context:
space:
mode:
authorPierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>2022-06-22 00:56:38 +0200
committerVinod Koul <vkoul@kernel.org>2022-07-06 09:04:21 +0200
commitbd29c00edd0a5dac8b6e7332bb470cd50f92e893 (patch)
treeccc271bbe2966489d79222fa4e5b5747fe399c3b /drivers/soundwire/stream.c
parentsoundwire: bus_type: fix remove and shutdown support (diff)
downloadlinux-bd29c00edd0a5dac8b6e7332bb470cd50f92e893.tar.xz
linux-bd29c00edd0a5dac8b6e7332bb470cd50f92e893.zip
soundwire: revisit driver bind/unbind and callbacks
In the SoundWire probe, we store a pointer from the driver ops into the 'slave' structure. This can lead to kernel oopses when unbinding codec drivers, e.g. with the following sequence to remove machine driver and codec driver. /sbin/modprobe -r snd_soc_sof_sdw /sbin/modprobe -r snd_soc_rt711 The full details can be found in the BugLink below, for reference the two following examples show different cases of driver ops/callbacks being invoked after the driver .remove(). kernel: BUG: kernel NULL pointer dereference, address: 0000000000000150 kernel: Workqueue: events cdns_update_slave_status_work [soundwire_cadence] kernel: RIP: 0010:mutex_lock+0x19/0x30 kernel: Call Trace: kernel: ? sdw_handle_slave_status+0x426/0xe00 [soundwire_bus 94ff184bf398570c3f8ff7efe9e32529f532e4ae] kernel: ? newidle_balance+0x26a/0x400 kernel: ? cdns_update_slave_status_work+0x1e9/0x200 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82] kernel: BUG: unable to handle page fault for address: ffffffffc07654c8 kernel: Workqueue: pm pm_runtime_work kernel: RIP: 0010:sdw_bus_prep_clk_stop+0x6f/0x160 [soundwire_bus] kernel: Call Trace: kernel: <TASK> kernel: sdw_cdns_clock_stop+0xb5/0x1b0 [soundwire_cadence 1bcf98eebe5ba9833cd433323769ac923c9c6f82] kernel: intel_suspend_runtime+0x5f/0x120 [soundwire_intel aca858f7c87048d3152a4a41bb68abb9b663a1dd] kernel: ? dpm_sysfs_remove+0x60/0x60 This was not detected earlier in Intel tests since the tests first remove the parent PCI device and shut down the bus. The sequence above is a corner case which keeps the bus operational but without a driver bound. While trying to solve this kernel oopses, it became clear that the existing SoundWire bus does not deal well with the unbind case. Commit 528be501b7d4a ("soundwire: sdw_slave: add probe_complete structure and new fields") added a 'probed' status variable and a 'probe_complete' struct completion. This status is however not reset on remove and likewise the 'probe complete' is not re-initialized, so the bind/unbind/bind test cases would fail. The timeout used before the 'update_status' callback was also a bad idea in hindsight, there should really be no timing assumption as to if and when a driver is bound to a device. An initial draft was based on device_lock() and device_unlock() was tested. This proved too complicated, with deadlocks created during the suspend-resume sequences, which also use the same device_lock/unlock() as the bind/unbind sequences. On a CometLake device, a bad DSDT/BIOS caused spurious resumes and the use of device_lock() caused hangs during suspend. After multiple weeks or testing and painful reverse-engineering of deadlocks on different devices, we looked for alternatives that did not interfere with the device core. A bus notifier was used successfully to keep track of DRIVER_BOUND and DRIVER_UNBIND events. This solved the bind-unbind-bind case in tests, but it can still be defeated with a theoretical corner case where the memory is freed by a .remove while the callback is in use. The notifier only helps make sure the driver callbacks are valid, but not that the memory allocated in probe remains valid while the callbacks are invoked. This patch suggests the introduction of a new 'sdw_dev_lock' mutex protecting probe/remove and all driver callbacks. Since this mutex is 'local' to SoundWire only, it does not interfere with existing locks and does not create deadlocks. In addition, this patch removes the 'probe_complete' completion, instead we directly invoke the 'update_status' from the probe routine. That removes any sort of timing dependency and a much better support for the device/driver model, the driver could be bound before the bus started, or eons after the bus started and the hardware would be properly initialized in all cases. BugLink: https://github.com/thesofproject/linux/issues/3531 Fixes: 56d4fe31af77 ("soundwire: Add MIPI DisCo property helpers") Fixes: 528be501b7d4a ("soundwire: sdw_slave: add probe_complete structure and new fields") Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> Reviewed-by: Rander Wang <rander.wang@intel.com> Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com> Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com> Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com> Link: https://lore.kernel.org/r/20220621225641.221170-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul <vkoul@kernel.org>
Diffstat (limited to 'drivers/soundwire/stream.c')
-rw-r--r--drivers/soundwire/stream.c53
1 files changed, 35 insertions, 18 deletions
diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index d34150559142..bd502368339e 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/soundwire/sdw_registers.h>
#include <linux/soundwire/sdw.h>
+#include <linux/soundwire/sdw_type.h>
#include <sound/soc.h>
#include "bus.h"
@@ -401,20 +402,26 @@ static int sdw_do_port_prep(struct sdw_slave_runtime *s_rt,
struct sdw_prepare_ch prep_ch,
enum sdw_port_prep_ops cmd)
{
- const struct sdw_slave_ops *ops = s_rt->slave->ops;
- int ret;
+ int ret = 0;
+ struct sdw_slave *slave = s_rt->slave;
- if (ops->port_prep) {
- ret = ops->port_prep(s_rt->slave, &prep_ch, cmd);
- if (ret < 0) {
- dev_err(&s_rt->slave->dev,
- "Slave Port Prep cmd %d failed: %d\n",
- cmd, ret);
- return ret;
+ mutex_lock(&slave->sdw_dev_lock);
+
+ if (slave->probed) {
+ struct device *dev = &slave->dev;
+ struct sdw_driver *drv = drv_to_sdw_driver(dev->driver);
+
+ if (drv->ops && drv->ops->port_prep) {
+ ret = drv->ops->port_prep(slave, &prep_ch, cmd);
+ if (ret < 0)
+ dev_err(dev, "Slave Port Prep cmd %d failed: %d\n",
+ cmd, ret);
}
}
- return 0;
+ mutex_unlock(&slave->sdw_dev_lock);
+
+ return ret;
}
static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus,
@@ -578,7 +585,7 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt)
struct sdw_slave_runtime *s_rt;
struct sdw_bus *bus = m_rt->bus;
struct sdw_slave *slave;
- int ret = 0;
+ int ret;
if (bus->ops->set_bus_conf) {
ret = bus->ops->set_bus_conf(bus, &bus->params);
@@ -589,17 +596,27 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt)
list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
slave = s_rt->slave;
- if (slave->ops->bus_config) {
- ret = slave->ops->bus_config(slave, &bus->params);
- if (ret < 0) {
- dev_err(bus->dev, "Notify Slave: %d failed\n",
- slave->dev_num);
- return ret;
+ mutex_lock(&slave->sdw_dev_lock);
+
+ if (slave->probed) {
+ struct device *dev = &slave->dev;
+ struct sdw_driver *drv = drv_to_sdw_driver(dev->driver);
+
+ if (drv->ops && drv->ops->bus_config) {
+ ret = drv->ops->bus_config(slave, &bus->params);
+ if (ret < 0) {
+ dev_err(dev, "Notify Slave: %d failed\n",
+ slave->dev_num);
+ mutex_unlock(&slave->sdw_dev_lock);
+ return ret;
+ }
}
}
+
+ mutex_unlock(&slave->sdw_dev_lock);
}
- return ret;
+ return 0;
}
/**