summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 20:17:00 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 20:17:00 +0200
commita771ea6413c00cf4af0570745f2e27084d7e2376 (patch)
tree780a2ca2b837d0de3c1ac8b2f0d3e6475f1ee817 /drivers
parentMerge tag 'acpi-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ra... (diff)
parentMerge branches 'pm-devfreq', 'pm-qos', 'pm-tools' and 'pm-docs' (diff)
downloadlinux-a771ea6413c00cf4af0570745f2e27084d7e2376.tar.xz
linux-a771ea6413c00cf4af0570745f2e27084d7e2376.zip
Merge tag 'pm-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "These are mostly minor improvements all over including new CPU IDs for the Intel RAPL driver, an Energy Model rework to use micro-Watt as the power unit, cpufreq fixes and cleanus, cpuidle updates, devfreq updates, documentation cleanups and a new version of the pm-graph suite of utilities. Specifics: - Make cpufreq_show_cpus() more straightforward (Viresh Kumar). - Drop unnecessary CPU hotplug locking from store() used by cpufreq sysfs attributes (Viresh Kumar). - Make the ACPI cpufreq driver support the boost control interface on Zhaoxin/Centaur processors (Tony W Wang-oc). - Print a warning message on attempts to free an active cpufreq policy which should never happen (Viresh Kumar). - Fix grammar in the Kconfig help text for the loongson2 cpufreq driver (Randy Dunlap). - Use cpumask_var_t for an on-stack CPU mask in the ondemand cpufreq governor (Zhao Liu). - Add trace points for guest_halt_poll_ns grow/shrink to the haltpoll cpuidle driver (Eiichi Tsukata). - Modify intel_idle to treat C1 and C1E as independent idle states on Sapphire Rapids (Artem Bityutskiy). - Extend support for wakeirq to callback wrappers used during system suspend and resume (Ulf Hansson). - Defer waiting for device probe before loading a hibernation image till the first actual device access to avoid possible deadlocks reported by syzbot (Tetsuo Handa). - Unify device_init_wakeup() for PM_SLEEP and !PM_SLEEP (Bjorn Helgaas). - Add Raptor Lake-P to the list of processors supported by the Intel RAPL driver (George D Sworo). - Add Alder Lake-N and Raptor Lake-P to the list of processors for which Power Limit4 is supported in the Intel RAPL driver (Sumeet Pawnikar). - Make pm_genpd_remove() check genpd_debugfs_dir against NULL before attempting to remove it (Hsin-Yi Wang). - Change the Energy Model code to represent power in micro-Watts and adjust its users accordingly (Lukasz Luba). - Add new devfreq driver for Mediatek CCI (Cache Coherent Interconnect) (Johnson Wang). - Convert the Samsung Exynos SoC Bus bindings to DT schema of exynos-bus.c (Krzysztof Kozlowski). - Address kernel-doc warnings by adding the description for unused function parameters in devfreq core (Mauro Carvalho Chehab). - Use NULL to pass a null pointer rather than zero according to the function propotype in imx-bus.c (Colin Ian King). - Print error message instead of error interger value in tegra30-devfreq.c (Dmitry Osipenko). - Add checks to prevent setting negative frequency QoS limits for CPUs (Shivnandan Kumar). - Update the pm-graph suite of utilities to the latest revision 5.9 including multiple improvements (Todd Brandt). - Drop pme_interrupt reference from the PCI power management documentation (Mario Limonciello)" * tag 'pm-5.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (27 commits) powercap: RAPL: Add Power Limit4 support for Alder Lake-N and Raptor Lake-P PM: QoS: Add check to make sure CPU freq is non-negative PM: hibernate: defer device probing when resuming from hibernation intel_idle: make SPR C1 and C1E be independent cpufreq: ondemand: Use cpumask_var_t for on-stack cpu mask cpufreq: loongson2: fix Kconfig "its" grammar pm-graph v5.9 cpufreq: Warn users while freeing active policy cpufreq: scmi: Support the power scale in micro-Watts in SCMI v3.1 firmware: arm_scmi: Get detailed power scale from perf Documentation: EM: Switch to micro-Watts scale PM: EM: convert power field to micro-Watts precision and align drivers PM / devfreq: tegra30: Add error message for devm_devfreq_add_device() PM / devfreq: imx-bus: use NULL to pass a null pointer rather than zero PM / devfreq: shut up kernel-doc warnings dt-bindings: interconnect: samsung,exynos-bus: convert to dtschema PM / devfreq: mediatek: Introduce MediaTek CCI devfreq driver dt-bindings: interconnect: Add MediaTek CCI dt-bindings PM: domains: Ensure genpd_debugfs_dir exists before remove PM: runtime: Extend support for wakeirq for force_suspend|resume ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/base/power/domain.c3
-rw-r--r--drivers/base/power/runtime.c6
-rw-r--r--drivers/base/power/wakeup.c30
-rw-r--r--drivers/cpufreq/Kconfig2
-rw-r--r--drivers/cpufreq/acpi-cpufreq.c4
-rw-r--r--drivers/cpufreq/cpufreq.c37
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c13
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c7
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c15
-rw-r--r--drivers/cpuidle/governors/haltpoll.c3
-rw-r--r--drivers/devfreq/Kconfig10
-rw-r--r--drivers/devfreq/Makefile1
-rw-r--r--drivers/devfreq/devfreq.c4
-rw-r--r--drivers/devfreq/imx-bus.c2
-rw-r--r--drivers/devfreq/mtk-cci-devfreq.c440
-rw-r--r--drivers/devfreq/tegra30-devfreq.c4
-rw-r--r--drivers/firmware/arm_scmi/perf.c18
-rw-r--r--drivers/idle/intel_idle.c24
-rw-r--r--drivers/opp/of.c15
-rw-r--r--drivers/powercap/dtpm_cpu.c5
-rw-r--r--drivers/powercap/intel_rapl_common.c1
-rw-r--r--drivers/powercap/intel_rapl_msr.c2
-rw-r--r--drivers/thermal/cpufreq_cooling.c13
-rw-r--r--drivers/thermal/devfreq_cooling.c19
24 files changed, 570 insertions, 108 deletions
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 739e52cd4aba..55a10e6d4e2a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -222,6 +222,9 @@ static void genpd_debug_remove(struct generic_pm_domain *genpd)
{
struct dentry *d;
+ if (!genpd_debugfs_dir)
+ return;
+
d = debugfs_lookup(genpd->name, genpd_debugfs_dir);
debugfs_remove(d);
}
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 949907e2e242..997be3ac20a7 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1862,10 +1862,13 @@ int pm_runtime_force_suspend(struct device *dev)
callback = RPM_GET_CALLBACK(dev, runtime_suspend);
+ dev_pm_enable_wake_irq_check(dev, true);
ret = callback ? callback(dev) : 0;
if (ret)
goto err;
+ dev_pm_enable_wake_irq_complete(dev);
+
/*
* If the device can stay in suspend after the system-wide transition
* to the working state that will follow, drop the children counter of
@@ -1882,6 +1885,7 @@ int pm_runtime_force_suspend(struct device *dev)
return 0;
err:
+ dev_pm_disable_wake_irq_check(dev, true);
pm_runtime_enable(dev);
return ret;
}
@@ -1915,9 +1919,11 @@ int pm_runtime_force_resume(struct device *dev)
callback = RPM_GET_CALLBACK(dev, runtime_resume);
+ dev_pm_disable_wake_irq_check(dev, false);
ret = callback ? callback(dev) : 0;
if (ret) {
pm_runtime_set_suspended(dev);
+ dev_pm_enable_wake_irq_check(dev, false);
goto out;
}
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 11a4ffe91367..e3befa2c1b66 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -501,36 +501,6 @@ void device_set_wakeup_capable(struct device *dev, bool capable)
EXPORT_SYMBOL_GPL(device_set_wakeup_capable);
/**
- * device_init_wakeup - Device wakeup initialization.
- * @dev: Device to handle.
- * @enable: Whether or not to enable @dev as a wakeup device.
- *
- * By default, most devices should leave wakeup disabled. The exceptions are
- * devices that everyone expects to be wakeup sources: keyboards, power buttons,
- * possibly network interfaces, etc. Also, devices that don't generate their
- * own wakeup requests but merely forward requests from one bus to another
- * (like PCI bridges) should have wakeup enabled by default.
- */
-int device_init_wakeup(struct device *dev, bool enable)
-{
- int ret = 0;
-
- if (!dev)
- return -EINVAL;
-
- if (enable) {
- device_set_wakeup_capable(dev, true);
- ret = device_wakeup_enable(dev);
- } else {
- device_wakeup_disable(dev);
- device_set_wakeup_capable(dev, false);
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(device_init_wakeup);
-
-/**
* device_set_wakeup_enable - Enable or disable a device to wake up the system.
* @dev: Device to handle.
* @enable: enable/disable flag
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index c3038cdc6865..2a84fc63371e 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -268,7 +268,7 @@ config LOONGSON2_CPUFREQ
This option adds a CPUFreq driver for loongson processors which
support software configurable cpu frequency.
- Loongson2F and it's successors support this feature.
+ Loongson2F and its successors support this feature.
If in doubt, say N.
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 3d514b82d055..1bb2b90ebb21 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -78,6 +78,8 @@ static bool boost_state(unsigned int cpu)
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
+ case X86_VENDOR_CENTAUR:
+ case X86_VENDOR_ZHAOXIN:
rdmsr_on_cpu(cpu, MSR_IA32_MISC_ENABLE, &lo, &hi);
msr = lo | ((u64)hi << 32);
return !(msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
@@ -97,6 +99,8 @@ static int boost_set_msr(bool enable)
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
+ case X86_VENDOR_CENTAUR:
+ case X86_VENDOR_ZHAOXIN:
msr_addr = MSR_IA32_MISC_ENABLE;
msr_mask = MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
break;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 2cad42774164..954eef26685f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -843,12 +843,14 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf)
unsigned int cpu;
for_each_cpu(cpu, mask) {
- if (i)
- i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
- i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
+ i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u ", cpu);
if (i >= (PAGE_SIZE - 5))
break;
}
+
+ /* Remove the extra space at the end */
+ i--;
+
i += sprintf(&buf[i], "\n");
return i;
}
@@ -971,21 +973,10 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
if (!fattr->store)
return -EIO;
- /*
- * cpus_read_trylock() is used here to work around a circular lock
- * dependency problem with respect to the cpufreq_register_driver().
- */
- if (!cpus_read_trylock())
- return -EBUSY;
-
- if (cpu_online(policy->cpu)) {
- down_write(&policy->rwsem);
- if (likely(!policy_is_inactive(policy)))
- ret = fattr->store(policy, buf, count);
- up_write(&policy->rwsem);
- }
-
- cpus_read_unlock();
+ down_write(&policy->rwsem);
+ if (likely(!policy_is_inactive(policy)))
+ ret = fattr->store(policy, buf, count);
+ up_write(&policy->rwsem);
return ret;
}
@@ -1282,6 +1273,13 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
unsigned long flags;
int cpu;
+ /*
+ * The callers must ensure the policy is inactive by now, to avoid any
+ * races with show()/store() callbacks.
+ */
+ if (unlikely(!policy_is_inactive(policy)))
+ pr_warn("%s: Freeing active policy\n", __func__);
+
/* Remove policy from list */
write_lock_irqsave(&cpufreq_driver_lock, flags);
list_del(&policy->policy_list);
@@ -1536,8 +1534,6 @@ out_destroy_policy:
for_each_cpu(j, policy->real_cpus)
remove_cpu_dev_symlink(policy, j, get_cpu_device(j));
- cpumask_clear(policy->cpus);
-
out_offline_policy:
if (cpufreq_driver->offline)
cpufreq_driver->offline(policy);
@@ -1547,6 +1543,7 @@ out_exit_policy:
cpufreq_driver->exit(policy);
out_free_policy:
+ cpumask_clear(policy->cpus);
up_write(&policy->rwsem);
cpufreq_policy_free(policy);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index e8fbf970ff07..c52d19d67557 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -416,10 +416,13 @@ static struct dbs_governor od_dbs_gov = {
static void od_set_powersave_bias(unsigned int powersave_bias)
{
unsigned int cpu;
- cpumask_t done;
+ cpumask_var_t done;
+
+ if (!alloc_cpumask_var(&done, GFP_KERNEL))
+ return;
default_powersave_bias = powersave_bias;
- cpumask_clear(&done);
+ cpumask_clear(done);
cpus_read_lock();
for_each_online_cpu(cpu) {
@@ -428,7 +431,7 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
struct dbs_data *dbs_data;
struct od_dbs_tuners *od_tuners;
- if (cpumask_test_cpu(cpu, &done))
+ if (cpumask_test_cpu(cpu, done))
continue;
policy = cpufreq_cpu_get_raw(cpu);
@@ -439,13 +442,15 @@ static void od_set_powersave_bias(unsigned int powersave_bias)
if (!policy_dbs)
continue;
- cpumask_or(&done, &done, policy->cpus);
+ cpumask_or(done, done, policy->cpus);
dbs_data = policy_dbs->dbs_data;
od_tuners = dbs_data->tuners;
od_tuners->powersave_bias = default_powersave_bias;
}
cpus_read_unlock();
+
+ free_cpumask_var(done);
}
void od_register_powersave_bias_handler(unsigned int (*f)
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index 813cccbfe934..f0e0a35c7f21 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -51,7 +51,7 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
};
static int __maybe_unused
-mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
+mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *uW,
unsigned long *KHz)
{
struct mtk_cpufreq_data *data;
@@ -71,8 +71,9 @@ mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
i--;
*KHz = data->table[i].frequency;
- *mW = readl_relaxed(data->reg_bases[REG_EM_POWER_TBL] +
- i * LUT_ROW_SIZE) / 1000;
+ /* Provide micro-Watts value to the Energy Model */
+ *uW = readl_relaxed(data->reg_bases[REG_EM_POWER_TBL] +
+ i * LUT_ROW_SIZE);
return 0;
}
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 6d2a4cf46db7..513a071845c2 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/scmi_protocol.h>
#include <linux/types.h>
+#include <linux/units.h>
struct scmi_data {
int domain_id;
@@ -99,6 +100,7 @@ static int __maybe_unused
scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
unsigned long *KHz)
{
+ enum scmi_power_scale power_scale = perf_ops->power_scale_get(ph);
unsigned long Hz;
int ret, domain;
@@ -112,6 +114,10 @@ scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
if (ret)
return ret;
+ /* Convert the power to uW if it is mW (ignore bogoW) */
+ if (power_scale == SCMI_POWER_MILLIWATTS)
+ *power *= MICROWATT_PER_MILLIWATT;
+
/* The EM framework specifies the frequency in KHz. */
*KHz = Hz / 1000;
@@ -249,8 +255,9 @@ static int scmi_cpufreq_exit(struct cpufreq_policy *policy)
static void scmi_cpufreq_register_em(struct cpufreq_policy *policy)
{
struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power);
- bool power_scale_mw = perf_ops->power_scale_mw_get(ph);
+ enum scmi_power_scale power_scale = perf_ops->power_scale_get(ph);
struct scmi_data *priv = policy->driver_data;
+ bool em_power_scale = false;
/*
* This callback will be called for each policy, but we don't need to
@@ -262,9 +269,13 @@ static void scmi_cpufreq_register_em(struct cpufreq_policy *policy)
if (!priv->nr_opp)
return;
+ if (power_scale == SCMI_POWER_MILLIWATTS
+ || power_scale == SCMI_POWER_MICROWATTS)
+ em_power_scale = true;
+
em_dev_register_perf_domain(get_cpu_device(policy->cpu), priv->nr_opp,
&em_cb, priv->opp_shared_cpus,
- power_scale_mw);
+ em_power_scale);
}
static struct cpufreq_driver scmi_cpufreq_driver = {
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
index cb2a96eafc02..1dff3a52917d 100644
--- a/drivers/cpuidle/governors/haltpoll.c
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -19,6 +19,7 @@
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/kvm_para.h>
+#include <trace/events/power.h>
static unsigned int guest_halt_poll_ns __read_mostly = 200000;
module_param(guest_halt_poll_ns, uint, 0644);
@@ -90,6 +91,7 @@ static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
if (val > guest_halt_poll_ns)
val = guest_halt_poll_ns;
+ trace_guest_halt_poll_ns_grow(val, dev->poll_limit_ns);
dev->poll_limit_ns = val;
} else if (block_ns > guest_halt_poll_ns &&
guest_halt_poll_allow_shrink) {
@@ -100,6 +102,7 @@ static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
val = 0;
else
val /= shrink;
+ trace_guest_halt_poll_ns_shrink(val, dev->poll_limit_ns);
dev->poll_limit_ns = val;
}
}
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 87eb2b837e68..9754d8b31621 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -120,6 +120,16 @@ config ARM_TEGRA_DEVFREQ
It reads ACTMON counters of memory controllers and adjusts the
operating frequencies and voltages with OPP support.
+config ARM_MEDIATEK_CCI_DEVFREQ
+ tristate "MEDIATEK CCI DEVFREQ Driver"
+ depends on ARM_MEDIATEK_CPUFREQ || COMPILE_TEST
+ select DEVFREQ_GOV_PASSIVE
+ help
+ This adds a devfreq driver for MediaTek Cache Coherent Interconnect
+ which is shared the same regulators with the cpu cluster. It can track
+ buck voltages and update a proper CCI frequency. Use the notification
+ to get the regulator status.
+
config ARM_RK3399_DMC_DEVFREQ
tristate "ARM RK3399 DMC DEVFREQ Driver"
depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 0b6be92a25d9..bf40d04928d0 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o
obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o
obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ) += imx-bus.o
obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ) += imx8m-ddrc.o
+obj-$(CONFIG_ARM_MEDIATEK_CCI_DEVFREQ) += mtk-cci-devfreq.o
obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ) += rk3399_dmc.o
obj-$(CONFIG_ARM_SUN8I_A33_MBUS_DEVFREQ) += sun8i-a33-mbus.o
obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra30-devfreq.o
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 9602141bb8ec..63347a5ae599 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -696,6 +696,8 @@ static int qos_notifier_call(struct devfreq *devfreq)
/**
* qos_min_notifier_call() - Callback for QoS min_freq changes.
* @nb: Should be devfreq->nb_min
+ * @val: not used
+ * @ptr: not used
*/
static int qos_min_notifier_call(struct notifier_block *nb,
unsigned long val, void *ptr)
@@ -706,6 +708,8 @@ static int qos_min_notifier_call(struct notifier_block *nb,
/**
* qos_max_notifier_call() - Callback for QoS max_freq changes.
* @nb: Should be devfreq->nb_max
+ * @val: not used
+ * @ptr: not used
*/
static int qos_max_notifier_call(struct notifier_block *nb,
unsigned long val, void *ptr)
diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c
index f3f6e25053ed..f87067fc574d 100644
--- a/drivers/devfreq/imx-bus.c
+++ b/drivers/devfreq/imx-bus.c
@@ -59,7 +59,7 @@ static int imx_bus_init_icc(struct device *dev)
struct imx_bus *priv = dev_get_drvdata(dev);
const char *icc_driver_name;
- if (!of_get_property(dev->of_node, "#interconnect-cells", 0))
+ if (!of_get_property(dev->of_node, "#interconnect-cells", NULL))
return 0;
if (!IS_ENABLED(CONFIG_INTERCONNECT_IMX)) {
dev_warn(dev, "imx interconnect drivers disabled\n");
diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c
new file mode 100644
index 000000000000..71abb3fbd042
--- /dev/null
+++ b/drivers/devfreq/mtk-cci-devfreq.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 MediaTek Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/regulator/consumer.h>
+
+struct mtk_ccifreq_platform_data {
+ int min_volt_shift;
+ int max_volt_shift;
+ int proc_max_volt;
+ int sram_min_volt;
+ int sram_max_volt;
+};
+
+struct mtk_ccifreq_drv {
+ struct device *dev;
+ struct devfreq *devfreq;
+ struct regulator *proc_reg;
+ struct regulator *sram_reg;
+ struct clk *cci_clk;
+ struct clk *inter_clk;
+ int inter_voltage;
+ unsigned long pre_freq;
+ /* Avoid race condition for regulators between notify and policy */
+ struct mutex reg_lock;
+ struct notifier_block opp_nb;
+ const struct mtk_ccifreq_platform_data *soc_data;
+ int vtrack_max;
+};
+
+static int mtk_ccifreq_set_voltage(struct mtk_ccifreq_drv *drv, int new_voltage)
+{
+ const struct mtk_ccifreq_platform_data *soc_data = drv->soc_data;
+ struct device *dev = drv->dev;
+ int pre_voltage, pre_vsram, new_vsram, vsram, voltage, ret;
+ int retry_max = drv->vtrack_max;
+
+ if (!drv->sram_reg) {
+ ret = regulator_set_voltage(drv->proc_reg, new_voltage,
+ drv->soc_data->proc_max_volt);
+ return ret;
+ }
+
+ pre_voltage = regulator_get_voltage(drv->proc_reg);
+ if (pre_voltage < 0) {
+ dev_err(dev, "invalid vproc value: %d\n", pre_voltage);
+ return pre_voltage;
+ }
+
+ pre_vsram = regulator_get_voltage(drv->sram_reg);
+ if (pre_vsram < 0) {
+ dev_err(dev, "invalid vsram value: %d\n", pre_vsram);
+ return pre_vsram;
+ }
+
+ new_vsram = clamp(new_voltage + soc_data->min_volt_shift,
+ soc_data->sram_min_volt, soc_data->sram_max_volt);
+
+ do {
+ if (pre_voltage <= new_voltage) {
+ vsram = clamp(pre_voltage + soc_data->max_volt_shift,
+ soc_data->sram_min_volt, new_vsram);
+ ret = regulator_set_voltage(drv->sram_reg, vsram,
+ soc_data->sram_max_volt);
+ if (ret)
+ return ret;
+
+ if (vsram == soc_data->sram_max_volt ||
+ new_vsram == soc_data->sram_min_volt)
+ voltage = new_voltage;
+ else
+ voltage = vsram - soc_data->min_volt_shift;
+
+ ret = regulator_set_voltage(drv->proc_reg, voltage,
+ soc_data->proc_max_volt);
+ if (ret) {
+ regulator_set_voltage(drv->sram_reg, pre_vsram,
+ soc_data->sram_max_volt);
+ return ret;
+ }
+ } else if (pre_voltage > new_voltage) {
+ voltage = max(new_voltage,
+ pre_vsram - soc_data->max_volt_shift);
+ ret = regulator_set_voltage(drv->proc_reg, voltage,
+ soc_data->proc_max_volt);
+ if (ret)
+ return ret;
+
+ if (voltage == new_voltage)
+ vsram = new_vsram;
+ else
+ vsram = max(new_vsram,
+ voltage + soc_data->min_volt_shift);
+
+ ret = regulator_set_voltage(drv->sram_reg, vsram,
+ soc_data->sram_max_volt);
+ if (ret) {
+ regulator_set_voltage(drv->proc_reg, pre_voltage,
+ soc_data->proc_max_volt);
+ return ret;
+ }
+ }
+
+ pre_voltage = voltage;
+ pre_vsram = vsram;
+
+ if (--retry_max < 0) {
+ dev_err(dev,
+ "over loop count, failed to set voltage\n");
+ return -EINVAL;
+ }
+ } while (voltage != new_voltage || vsram != new_vsram);
+
+ return 0;
+}
+
+static int mtk_ccifreq_target(struct device *dev, unsigned long *freq,
+ u32 flags)
+{
+ struct mtk_ccifreq_drv *drv = dev_get_drvdata(dev);
+ struct clk *cci_pll = clk_get_parent(drv->cci_clk);
+ struct dev_pm_opp *opp;
+ unsigned long opp_rate;
+ int voltage, pre_voltage, inter_voltage, target_voltage, ret;
+
+ if (!drv)
+ return -EINVAL;
+
+ if (drv->pre_freq == *freq)
+ return 0;
+
+ inter_voltage = drv->inter_voltage;
+
+ opp_rate = *freq;
+ opp = devfreq_recommended_opp(dev, &opp_rate, 1);
+ if (IS_ERR(opp)) {
+ dev_err(dev, "failed to find opp for freq: %ld\n", opp_rate);
+ return PTR_ERR(opp);
+ }
+
+ mutex_lock(&drv->reg_lock);
+
+ voltage = dev_pm_opp_get_voltage(opp);
+ dev_pm_opp_put(opp);
+
+ pre_voltage = regulator_get_voltage(drv->proc_reg);
+ if (pre_voltage < 0) {
+ dev_err(dev, "invalid vproc value: %d\n", pre_voltage);
+ ret = pre_voltage;
+ goto out_unlock;
+ }
+
+ /* scale up: set voltage first then freq. */
+ target_voltage = max(inter_voltage, voltage);
+ if (pre_voltage <= target_voltage) {
+ ret = mtk_ccifreq_set_voltage(drv, target_voltage);
+ if (ret) {
+ dev_err(dev, "failed to scale up voltage\n");
+ goto out_restore_voltage;
+ }
+ }
+
+ /* switch the cci clock to intermediate clock source. */
+ ret = clk_set_parent(drv->cci_clk, drv->inter_clk);
+ if (ret) {
+ dev_err(dev, "failed to re-parent cci clock\n");
+ goto out_restore_voltage;
+ }
+
+ /* set the original clock to target rate. */
+ ret = clk_set_rate(cci_pll, *freq);
+ if (ret) {
+ dev_err(dev, "failed to set cci pll rate: %d\n", ret);
+ clk_set_parent(drv->cci_clk, cci_pll);
+ goto out_restore_voltage;
+ }
+
+ /* switch the cci clock back to the original clock source. */
+ ret = clk_set_parent(drv->cci_clk, cci_pll);
+ if (ret) {
+ dev_err(dev, "failed to re-parent cci clock\n");
+ mtk_ccifreq_set_voltage(drv, inter_voltage);
+ goto out_unlock;
+ }
+
+ /*
+ * If the new voltage is lower than the intermediate voltage or the
+ * original voltage, scale down to the new voltage.
+ */
+ if (voltage < inter_voltage || voltage < pre_voltage) {
+ ret = mtk_ccifreq_set_voltage(drv, voltage);
+ if (ret) {
+ dev_err(dev, "failed to scale down voltage\n");
+ goto out_unlock;
+ }
+ }
+
+ drv->pre_freq = *freq;
+ mutex_unlock(&drv->reg_lock);
+
+ return 0;
+
+out_restore_voltage:
+ mtk_ccifreq_set_voltage(drv, pre_voltage);
+
+out_unlock:
+ mutex_unlock(&drv->reg_lock);
+ return ret;
+}
+
+static int mtk_ccifreq_opp_notifier(struct notifier_block *nb,
+ unsigned long event, void *data)
+{
+ struct dev_pm_opp *opp = data;
+ struct mtk_ccifreq_drv *drv;
+ unsigned long freq, volt;
+
+ drv = container_of(nb, struct mtk_ccifreq_drv, opp_nb);
+
+ if (event == OPP_EVENT_ADJUST_VOLTAGE) {
+ freq = dev_pm_opp_get_freq(opp);
+
+ mutex_lock(&drv->reg_lock);
+ /* current opp item is changed */
+ if (freq == drv->pre_freq) {
+ volt = dev_pm_opp_get_voltage(opp);
+ mtk_ccifreq_set_voltage(drv, volt);
+ }
+ mutex_unlock(&drv->reg_lock);
+ }
+
+ return 0;
+}
+
+static struct devfreq_dev_profile mtk_ccifreq_profile = {
+ .target = mtk_ccifreq_target,
+};
+
+static int mtk_ccifreq_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct mtk_ccifreq_drv *drv;
+ struct devfreq_passive_data *passive_data;
+ struct dev_pm_opp *opp;
+ unsigned long rate, opp_volt;
+ int ret;
+
+ drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
+ if (!drv)
+ return -ENOMEM;
+
+ drv->dev = dev;
+ drv->soc_data = (const struct mtk_ccifreq_platform_data *)
+ of_device_get_match_data(&pdev->dev);
+ mutex_init(&drv->reg_lock);
+ platform_set_drvdata(pdev, drv);
+
+ drv->cci_clk = devm_clk_get(dev, "cci");
+ if (IS_ERR(drv->cci_clk)) {
+ ret = PTR_ERR(drv->cci_clk);
+ return dev_err_probe(dev, ret, "failed to get cci clk\n");
+ }
+
+ drv->inter_clk = devm_clk_get(dev, "intermediate");
+ if (IS_ERR(drv->inter_clk)) {
+ ret = PTR_ERR(drv->inter_clk);
+ return dev_err_probe(dev, ret,
+ "failed to get intermediate clk\n");
+ }
+
+ drv->proc_reg = devm_regulator_get_optional(dev, "proc");
+ if (IS_ERR(drv->proc_reg)) {
+ ret = PTR_ERR(drv->proc_reg);
+ return dev_err_probe(dev, ret,
+ "failed to get proc regulator\n");
+ }
+
+ ret = regulator_enable(drv->proc_reg);
+ if (ret) {
+ dev_err(dev, "failed to enable proc regulator\n");
+ return ret;
+ }
+
+ drv->sram_reg = devm_regulator_get_optional(dev, "sram");
+ if (IS_ERR(drv->sram_reg))
+ drv->sram_reg = NULL;
+ else {
+ ret = regulator_enable(drv->sram_reg);
+ if (ret) {
+ dev_err(dev, "failed to enable sram regulator\n");
+ goto out_free_resources;
+ }
+ }
+
+ /*
+ * We assume min voltage is 0 and tracking target voltage using
+ * min_volt_shift for each iteration.
+ * The retry_max is 3 times of expected iteration count.
+ */
+ drv->vtrack_max = 3 * DIV_ROUND_UP(max(drv->soc_data->sram_max_volt,
+ drv->soc_data->proc_max_volt),
+ drv->soc_data->min_volt_shift);
+
+ ret = clk_prepare_enable(drv->cci_clk);
+ if (ret)
+ goto out_free_resources;
+
+ ret = dev_pm_opp_of_add_table(dev);
+ if (ret) {
+ dev_err(dev, "failed to add opp table: %d\n", ret);
+ goto out_disable_cci_clk;
+ }
+
+ rate = clk_get_rate(drv->inter_clk);
+ opp = dev_pm_opp_find_freq_ceil(dev, &rate);
+ if (IS_ERR(opp)) {
+ ret = PTR_ERR(opp);
+ dev_err(dev, "failed to get intermediate opp: %d\n", ret);
+ goto out_remove_opp_table;
+ }
+ drv->inter_voltage = dev_pm_opp_get_voltage(opp);
+ dev_pm_opp_put(opp);
+
+ rate = U32_MAX;
+ opp = dev_pm_opp_find_freq_floor(drv->dev, &rate);
+ if (IS_ERR(opp)) {
+ dev_err(dev, "failed to get opp\n");
+ ret = PTR_ERR(opp);
+ goto out_remove_opp_table;
+ }
+
+ opp_volt = dev_pm_opp_get_voltage(opp);
+ dev_pm_opp_put(opp);
+ ret = mtk_ccifreq_set_voltage(drv, opp_volt);
+ if (ret) {
+ dev_err(dev, "failed to scale to highest voltage %lu in proc_reg\n",
+ opp_volt);
+ goto out_remove_opp_table;
+ }
+
+ passive_data = devm_kzalloc(dev, sizeof(*passive_data), GFP_KERNEL);
+ if (!passive_data) {
+ ret = -ENOMEM;
+ goto out_remove_opp_table;
+ }
+
+ passive_data->parent_type = CPUFREQ_PARENT_DEV;
+ drv->devfreq = devm_devfreq_add_device(dev, &mtk_ccifreq_profile,
+ DEVFREQ_GOV_PASSIVE,
+ passive_data);
+ if (IS_ERR(drv->devfreq)) {
+ ret = -EPROBE_DEFER;
+ dev_err(dev, "failed to add devfreq device: %ld\n",
+ PTR_ERR(drv->devfreq));
+ goto out_remove_opp_table;
+ }
+
+ drv->opp_nb.notifier_call = mtk_ccifreq_opp_notifier;
+ ret = dev_pm_opp_register_notifier(dev, &drv->opp_nb);
+ if (ret) {
+ dev_err(dev, "failed to register opp notifier: %d\n", ret);
+ goto out_remove_opp_table;
+ }
+ return 0;
+
+out_remove_opp_table:
+ dev_pm_opp_of_remove_table(dev);
+
+out_disable_cci_clk:
+ clk_disable_unprepare(drv->cci_clk);
+
+out_free_resources:
+ if (regulator_is_enabled(drv->proc_reg))
+ regulator_disable(drv->proc_reg);
+ if (drv->sram_reg && regulator_is_enabled(drv->sram_reg))
+ regulator_disable(drv->sram_reg);
+
+ return ret;
+}
+
+static int mtk_ccifreq_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct mtk_ccifreq_drv *drv;
+
+ drv = platform_get_drvdata(pdev);
+
+ dev_pm_opp_unregister_notifier(dev, &drv->opp_nb);
+ dev_pm_opp_of_remove_table(dev);
+ clk_disable_unprepare(drv->cci_clk);
+ regulator_disable(drv->proc_reg);
+ if (drv->sram_reg)
+ regulator_disable(drv->sram_reg);
+
+ return 0;
+}
+
+static const struct mtk_ccifreq_platform_data mt8183_platform_data = {
+ .min_volt_shift = 100000,
+ .max_volt_shift = 200000,
+ .proc_max_volt = 1150000,
+};
+
+static const struct mtk_ccifreq_platform_data mt8186_platform_data = {
+ .min_volt_shift = 100000,
+ .max_volt_shift = 250000,
+ .proc_max_volt = 1118750,
+ .sram_min_volt = 850000,
+ .sram_max_volt = 1118750,
+};
+
+static const struct of_device_id mtk_ccifreq_machines[] = {
+ { .compatible = "mediatek,mt8183-cci", .data = &mt8183_platform_data },
+ { .compatible = "mediatek,mt8186-cci", .data = &mt8186_platform_data },
+ { },
+};
+MODULE_DEVICE_TABLE(of, mtk_ccifreq_machines);
+
+static struct platform_driver mtk_ccifreq_platdrv = {
+ .probe = mtk_ccifreq_probe,
+ .remove = mtk_ccifreq_remove,
+ .driver = {
+ .name = "mtk-ccifreq",
+ .of_match_table = mtk_ccifreq_machines,
+ },
+};
+module_platform_driver(mtk_ccifreq_platdrv);
+
+MODULE_DESCRIPTION("MediaTek CCI devfreq driver");
+MODULE_AUTHOR("Jia-Wei Chang <jia-wei.chang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index 65ecf17a36f4..585a95fe2bd6 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -922,8 +922,10 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
devfreq = devm_devfreq_add_device(&pdev->dev, &tegra_devfreq_profile,
"tegra_actmon", NULL);
- if (IS_ERR(devfreq))
+ if (IS_ERR(devfreq)) {
+ dev_err(&pdev->dev, "Failed to add device: %pe\n", devfreq);
return PTR_ERR(devfreq);
+ }
return 0;
}
diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index 64ea2d2f2875..ecf5c4de851b 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c
@@ -140,8 +140,7 @@ struct perf_dom_info {
struct scmi_perf_info {
u32 version;
int num_domains;
- bool power_scale_mw;
- bool power_scale_uw;
+ enum scmi_power_scale power_scale;
u64 stats_addr;
u32 stats_size;
struct perf_dom_info *dom_info;
@@ -171,9 +170,13 @@ static int scmi_perf_attributes_get(const struct scmi_protocol_handle *ph,
u16 flags = le16_to_cpu(attr->flags);
pi->num_domains = le16_to_cpu(attr->num_domains);
- pi->power_scale_mw = POWER_SCALE_IN_MILLIWATT(flags);
+
+ if (POWER_SCALE_IN_MILLIWATT(flags))
+ pi->power_scale = SCMI_POWER_MILLIWATTS;
if (PROTOCOL_REV_MAJOR(pi->version) >= 0x3)
- pi->power_scale_uw = POWER_SCALE_IN_MICROWATT(flags);
+ if (POWER_SCALE_IN_MICROWATT(flags))
+ pi->power_scale = SCMI_POWER_MICROWATTS;
+
pi->stats_addr = le32_to_cpu(attr->stats_addr_low) |
(u64)le32_to_cpu(attr->stats_addr_high) << 32;
pi->stats_size = le32_to_cpu(attr->stats_size);
@@ -675,11 +678,12 @@ static bool scmi_fast_switch_possible(const struct scmi_protocol_handle *ph,
return dom->fc_info && dom->fc_info[PERF_FC_LEVEL].set_addr;
}
-static bool scmi_power_scale_mw_get(const struct scmi_protocol_handle *ph)
+static enum scmi_power_scale
+scmi_power_scale_get(const struct scmi_protocol_handle *ph)
{
struct scmi_perf_info *pi = ph->get_priv(ph);
- return pi->power_scale_mw;
+ return pi->power_scale;
}
static const struct scmi_perf_proto_ops perf_proto_ops = {
@@ -694,7 +698,7 @@ static const struct scmi_perf_proto_ops perf_proto_ops = {
.freq_get = scmi_dvfs_freq_get,
.est_power_get = scmi_dvfs_est_power_get,
.fast_switch_possible = scmi_fast_switch_possible,
- .power_scale_mw_get = scmi_power_scale_mw_get,
+ .power_scale_get = scmi_power_scale_get,
};
static int scmi_perf_set_notify_enabled(const struct scmi_protocol_handle *ph,
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 445b19d20b9a..3e101719689a 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -928,16 +928,6 @@ static struct cpuidle_state adl_l_cstates[] __initdata = {
.enter = NULL }
};
-/*
- * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
- * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
- * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
- * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
- * both C1 and C1E requests end up with C1, so there is effectively no C1E.
- *
- * By default we enable C1 and disable C1E by marking it with
- * 'CPUIDLE_FLAG_UNUSABLE'.
- */
static struct cpuidle_state spr_cstates[] __initdata = {
{
.name = "C1",
@@ -950,8 +940,7 @@ static struct cpuidle_state spr_cstates[] __initdata = {
{
.name = "C1E",
.desc = "MWAIT 0x01",
- .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
- CPUIDLE_FLAG_UNUSABLE,
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 2,
.target_residency = 4,
.enter = &intel_idle,
@@ -1774,17 +1763,6 @@ static void __init spr_idle_state_table_update(void)
{
unsigned long long msr;
- /* Check if user prefers C1E over C1. */
- if ((preferred_states_mask & BIT(2)) &&
- !(preferred_states_mask & BIT(1))) {
- /* Disable C1 and enable C1E. */
- spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
- spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
-
- /* Enable C1E using the "C1E promotion" bit. */
- c1e_promotion = C1E_PROMOTION_ENABLE;
- }
-
/*
* By default, the C6 state assumes the worst-case scenario of package
* C6. However, if PC6 is disabled, we update the numbers to match
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 30394929d700..eb89c9a75985 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -1443,12 +1443,12 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
* It provides the power used by @dev at @kHz if it is the frequency of an
* existing OPP, or at the frequency of the first OPP above @kHz otherwise
* (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled
- * frequency and @mW to the associated power.
+ * frequency and @uW to the associated power.
*
* Returns 0 on success or a proper -EINVAL value in case of error.
*/
static int __maybe_unused
-_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
+_get_dt_power(struct device *dev, unsigned long *uW, unsigned long *kHz)
{
struct dev_pm_opp *opp;
unsigned long opp_freq, opp_power;
@@ -1465,7 +1465,7 @@ _get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
return -EINVAL;
*kHz = opp_freq / 1000;
- *mW = opp_power / 1000;
+ *uW = opp_power;
return 0;
}
@@ -1475,14 +1475,14 @@ _get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
* This computes the power estimated by @dev at @kHz if it is the frequency
* of an existing OPP, or at the frequency of the first OPP above @kHz otherwise
* (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled
- * frequency and @mW to the associated power. The power is estimated as
+ * frequency and @uW to the associated power. The power is estimated as
* P = C * V^2 * f with C being the device's capacitance and V and f
* respectively the voltage and frequency of the OPP.
*
* Returns -EINVAL if the power calculation failed because of missing
* parameters, 0 otherwise.
*/
-static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
+static int __maybe_unused _get_power(struct device *dev, unsigned long *uW,
unsigned long *kHz)
{
struct dev_pm_opp *opp;
@@ -1512,9 +1512,10 @@ static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
return -EINVAL;
tmp = (u64)cap * mV * mV * (Hz / 1000000);
- do_div(tmp, 1000000000);
+ /* Provide power in micro-Watts */
+ do_div(tmp, 1000000);
- *mW = (unsigned long)tmp;
+ *uW = (unsigned long)tmp;
*kHz = Hz / 1000;
return 0;
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 6a88eb7e9f75..2ff7717530bf 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -53,7 +53,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
for (i = 0; i < pd->nr_perf_states; i++) {
- power = pd->table[i].power * MICROWATT_PER_MILLIWATT * nr_cpus;
+ power = pd->table[i].power * nr_cpus;
if (power > power_limit)
break;
@@ -63,8 +63,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
freq_qos_update_request(&dtpm_cpu->qos_req, freq);
- power_limit = pd->table[i - 1].power *
- MICROWATT_PER_MILLIWATT * nr_cpus;
+ power_limit = pd->table[i - 1].power * nr_cpus;
return power_limit;
}
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index a9c99d9e8b42..21d624f9f5fb 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -1109,6 +1109,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 9d23984d8931..bc6adda58883 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -140,7 +140,9 @@ static const struct x86_cpu_id pl4_support_ids[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_N, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE_P, X86_FEATURE_ANY },
{}
};
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index b263b0fde03c..cb75f1365615 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -21,6 +21,7 @@
#include <linux/pm_qos.h>
#include <linux/slab.h>
#include <linux/thermal.h>
+#include <linux/units.h>
#include <trace/events/thermal.h>
@@ -101,6 +102,7 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
u32 freq)
{
+ unsigned long power_mw;
int i;
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
@@ -108,16 +110,23 @@ static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
break;
}
- return cpufreq_cdev->em->table[i + 1].power;
+ power_mw = cpufreq_cdev->em->table[i + 1].power;
+ power_mw /= MICROWATT_PER_MILLIWATT;
+
+ return power_mw;
}
static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
u32 power)
{
+ unsigned long em_power_mw;
int i;
for (i = cpufreq_cdev->max_level; i > 0; i--) {
- if (power >= cpufreq_cdev->em->table[i].power)
+ /* Convert EM power to milli-Watts to make safe comparison */
+ em_power_mw = cpufreq_cdev->em->table[i].power;
+ em_power_mw /= MICROWATT_PER_MILLIWATT;
+ if (power >= em_power_mw)
break;
}
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 8c76f9655e57..8d1260f65061 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -200,7 +200,11 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
res = dfc->power_ops->get_real_power(df, power, freq, voltage);
if (!res) {
state = dfc->capped_state;
+
+ /* Convert EM power into milli-Watts first */
dfc->res_util = dfc->em_pd->table[state].power;
+ dfc->res_util /= MICROWATT_PER_MILLIWATT;
+
dfc->res_util *= SCALE_ERROR_MITIGATION;
if (*power > 1)
@@ -218,8 +222,10 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
_normalize_load(&status);
- /* Scale power for utilization */
+ /* Convert EM power into milli-Watts first */
*power = dfc->em_pd->table[perf_idx].power;
+ *power /= MICROWATT_PER_MILLIWATT;
+ /* Scale power for utilization */
*power *= status.busy_time;
*power >>= 10;
}
@@ -244,6 +250,7 @@ static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev,
perf_idx = dfc->max_state - state;
*power = dfc->em_pd->table[perf_idx].power;
+ *power /= MICROWATT_PER_MILLIWATT;
return 0;
}
@@ -254,7 +261,7 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
struct devfreq_cooling_device *dfc = cdev->devdata;
struct devfreq *df = dfc->devfreq;
struct devfreq_dev_status status;
- unsigned long freq;
+ unsigned long freq, em_power_mw;
s32 est_power;
int i;
@@ -279,9 +286,13 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev,
* Find the first cooling state that is within the power
* budget. The EM power table is sorted ascending.
*/
- for (i = dfc->max_state; i > 0; i--)
- if (est_power >= dfc->em_pd->table[i].power)
+ for (i = dfc->max_state; i > 0; i--) {
+ /* Convert EM power to milli-Watts to make safe comparison */
+ em_power_mw = dfc->em_pd->table[i].power;
+ em_power_mw /= MICROWATT_PER_MILLIWATT;
+ if (est_power >= em_power_mw)
break;
+ }
*state = dfc->max_state - i;
dfc->capped_state = *state;