From 2a8b80d0c151d1994e748b4777ac0576ea4a5693 Mon Sep 17 00:00:00 2001
From: Junjie Mao <junjie.mao@intel.com>
Date: Tue, 2 Aug 2022 01:12:26 +0800
Subject: [PATCH] config_tools: board_inspector: guess L3 CAT parameters if not
 reported via CPUID

On some platforms the L3 CAT capabilities are not reported via CPUID even
though they are present. The public real-time tuning guide suggests to try
accessing the MSRs directly to detect if L3 CAT is available or not.

This patch implements such guessing logic in the board inspector in order
to enable CAT for users with those kinds of platforms.

Tracked-On: #7948
Signed-off-by: Junjie Mao <junjie.mao@intel.com>
---
 .../board_inspector/cpuparser/msr.py          |  15 +++
 .../board_inspector/extractors/20-cache.py    | 113 +++++++++++++++---
 2 files changed, 113 insertions(+), 15 deletions(-)

diff --git a/misc/config_tools/board_inspector/cpuparser/msr.py b/misc/config_tools/board_inspector/cpuparser/msr.py
index 21d2ba6b4..0d482f877 100644
--- a/misc/config_tools/board_inspector/cpuparser/msr.py
+++ b/misc/config_tools/board_inspector/cpuparser/msr.py
@@ -255,3 +255,18 @@ class MSR_IA32_VMX_ENTRY_CTLS(VMXCapabilityReportingMSR):
         "vmx_entry_ctls_load_pat",
         "vmx_entry_ctls_ia32e_mode",
     ]
+
+class MSR_IA32_L3_QOS_CFG(MSR):
+    addr = 0x00000c81
+    cdp_enable = msrfield(0, 0, doc="L3 CDP enable")
+
+def MSR_IA32_L3_MASK_n(n):
+    if n >= 128:
+        logging.debug("Attempt to access an out-of-range IA32_L3_MASK_n register. Fall back to 0.")
+        n = 0
+
+    class IA32_L3_MASK_n(MSR):
+        addr = 0x00000c90 + n
+        bit_mask = msrfield(32, 0, doc="Capacity bit mask")
+
+    return IA32_L3_MASK_n
diff --git a/misc/config_tools/board_inspector/extractors/20-cache.py b/misc/config_tools/board_inspector/extractors/20-cache.py
index 7f92327f0..281ac7329 100644
--- a/misc/config_tools/board_inspector/extractors/20-cache.py
+++ b/misc/config_tools/board_inspector/extractors/20-cache.py
@@ -8,10 +8,85 @@ import lxml.etree
 from extractors.helpers import add_child, get_node
 
 from cpuparser import parse_cpuid
+import cpuparser.msr as msr
 from acpiparser import parse_rtct
 import acpiparser.rtct
 
-def extract_topology(root_node, caches_node):
+known_cbms = {
+    # From 11th Gen Intel(R) Core(TM) Processors Real-Time Tuning Guide, document number 640980-1.4
+    "11th Gen Intel(R) Core(TM) i3-1115GRE": 12,
+    "11th Gen Intel(R) Core(TM) i5-1145GRE": 8,
+    "11th Gen Intel(R) Core(TM) i7-1185GRE": 12,
+}
+
+def infer_l3_cat(cpu_id, processor_model_node, cache_node):
+    # First of all, existence of L3 CAT is indicated by the presence of IA32_L3_MASK_0 at C90H
+    try:
+        ia32_l3_mask_0 = msr.MSR_IA32_L3_MASK_n(0).rdmsr(cpu_id)
+    except IOError:
+        return
+
+    # If L3 CAT does exist, try inferring its parameters:
+    #
+    #   - For capacity mask length, detect in an trial-and-error way starting from:
+    #     a. the capacity mask length documented in any public real-time tuning guide, if any.
+    #     b. or, the number of ways of the L3 cache.
+    #
+    #   - For the number of CLOS IDs available, detect by searching the last programmable IA32_L3_MASK_n register within
+    #     the C90H - D0FH range which is the architecturally defined MSR space for those registers.
+    #
+    #   - For CDP, try setting the enable bit in IA32_L3_QOS_CFG. CDP is available if and only if that MSR is present
+    #     and its bit 0 can be set.
+
+    # Initial guess of the capacity mask length
+    capacity_mask_length = int(cache_node.find("ways").text)
+    processor_model = processor_model_node.get("description")
+    for k, v in known_cbms.items():
+        if processor_model.startswith(k):
+            capacity_mask_length = v
+            break
+
+    # Verify our guess. If the verification fails, decrease by 1 and guess again.
+    while capacity_mask_length > 0:
+        ia32_l3_mask_0.bit_mask = (1 << capacity_mask_length) - 1
+        try:
+            ia32_l3_mask_0.wrmsr()
+            break
+        except IOError:
+            capacity_mask_length = capacity_mask_length - 1
+            continue
+    else:
+        logging.debug("All writes to IA32_L3_MASK_0 failed. Cannot guess the capacity mask length of L3 CAT.")
+        return
+
+    # Binary search of the number of CLOS available
+    known_good = 1
+    known_bad = 129
+    while known_good + 1 < known_bad:
+        mid = (known_good + known_bad) // 2
+        try:
+            msr.MSR_IA32_L3_MASK_n(mid - 1).rdmsr(cpu_id)
+            known_good = mid
+        except IOError:
+            known_bad = mid
+    clos_number = known_good
+
+    # Detect availability of CDP by trying to write the enable bit.
+    try:
+        l3_qos_cfg = msr.MSR_IA32_L3_QOS_CFG.rdmsr(cpu_id)
+        l3_qos_cfg.cdp_enable = 1
+        l3_qos_cfg.wrmsr()
+        has_cdp = True
+    except IOError:
+        has_cdp = False
+
+    cap = add_child(cache_node, "capability", None, id="CAT")
+    add_child(cap, "capacity_mask_length", str(capacity_mask_length))
+    add_child(cap, "clos_number", str(clos_number))
+    if has_cdp:
+        add_child(cap, "capability", None, id="CDP")
+
+def extract_topology(args, root_node, caches_node):
     threads = root_node.xpath("//processors//*[cpu_id]")
     for thread in threads:
         subleaf = 0
@@ -56,6 +131,27 @@ def extract_topology(root_node, caches_node):
                     if leaf_10.code_and_data_prioritization == 1:
                         add_child(n, "capability", None, id="CDP")
 
+                    # Inform the user if L3 CAT capability is specified manually.
+                    if args.add_llc_cat:
+                        logging.warning(r"The last level cache (cache ID: {cache_id}) already reports CAT capability. The explicit settings from the command line options are ignored.")
+                elif cache_level == 3:
+                    if args.add_llc_cat:
+                        # Inject L3 CAT capability specified by the user
+                        cap = add_child(llc_node, "capability", None, id="CAT")
+                        add_child(cap, "capacity_mask_length", str(args.add_llc_cat.capacity_mask_length))
+                        add_child(cap, "clos_number", str(args.add_llc_cat.clos_number))
+                        if args.add_llc_cat.has_CDP:
+                            add_child(cap, "capability", None, id="CDP")
+                    else:
+                        # Try inferring L3 CAT according to the methods described in section 7.2.3, 11th Gen Intel(R)
+                        # Core(TM) Processors Real-Time Tuning Guide (document number: 640980-1.4).
+                        family_id = thread.find("family_id").text
+                        model_id = thread.find("model_id").text
+                        core_type = thread.find("core_type").text
+                        native_model_id = thread.find("native_model_id").text
+                        processor_model_node = get_node(root_node, f"//processors/model[family_id='{family_id}' and model_id='{model_id}' and core_type='{core_type}' and native_model_id='{native_model_id}']")
+                        infer_l3_cat(cpu_id, processor_model_node, n)
+
             add_child(get_node(n, "processors"), "processor", get_node(thread, "apic_id/text()"))
 
             subleaf += 1
@@ -70,17 +166,4 @@ def extract_topology(root_node, caches_node):
 def extract(args, board_etree):
     root_node = board_etree.getroot()
     caches_node = get_node(board_etree, "//caches")
-    extract_topology(root_node, caches_node)
-
-    # Inject the explicitly specified CAT capability if exists
-    if args.add_llc_cat:
-        llc_node = get_node(root_node, "//caches/cache[@level='3']")
-        llc_cat_node = get_node(llc_node, "capability[@id='CAT']")
-        if llc_cat_node is None:
-            llc_cat_node = add_child(llc_node, "capability", None, id="CAT")
-            add_child(llc_cat_node, "capacity_mask_length", str(args.add_llc_cat.capacity_mask_length))
-            add_child(llc_cat_node, "clos_number", str(args.add_llc_cat.clos_number))
-            if args.add_llc_cat.has_CDP:
-                add_child(llc_node, "capability", None, id="CDP")
-        else:
-            logging.warning("The last level cache already reports CAT capability. The explicit settings from the command line options are ignored.")
+    extract_topology(args, root_node, caches_node)