config_tools: board_inspector: guess L3 CAT parameters if not reported via CPUID

On some platforms the L3 CAT capabilities are not reported via CPUID even
though they are present. The public real-time tuning guide suggests to try
accessing the MSRs directly to detect if L3 CAT is available or not.

This patch implements such guessing logic in the board inspector in order
to enable CAT for users with those kinds of platforms.

Tracked-On: #7948
Signed-off-by: Junjie Mao <junjie.mao@intel.com>
This commit is contained in:
Junjie Mao 2022-08-02 01:12:26 +08:00 committed by acrnsi-robot
parent 67d7b8f4c8
commit 2a8b80d0c1
2 changed files with 113 additions and 15 deletions

View File

@ -255,3 +255,18 @@ class MSR_IA32_VMX_ENTRY_CTLS(VMXCapabilityReportingMSR):
"vmx_entry_ctls_load_pat",
"vmx_entry_ctls_ia32e_mode",
]
class MSR_IA32_L3_QOS_CFG(MSR):
addr = 0x00000c81
cdp_enable = msrfield(0, 0, doc="L3 CDP enable")
def MSR_IA32_L3_MASK_n(n):
if n >= 128:
logging.debug("Attempt to access an out-of-range IA32_L3_MASK_n register. Fall back to 0.")
n = 0
class IA32_L3_MASK_n(MSR):
addr = 0x00000c90 + n
bit_mask = msrfield(32, 0, doc="Capacity bit mask")
return IA32_L3_MASK_n

View File

@ -8,10 +8,85 @@ import lxml.etree
from extractors.helpers import add_child, get_node
from cpuparser import parse_cpuid
import cpuparser.msr as msr
from acpiparser import parse_rtct
import acpiparser.rtct
def extract_topology(root_node, caches_node):
known_cbms = {
# From 11th Gen Intel(R) Core(TM) Processors Real-Time Tuning Guide, document number 640980-1.4
"11th Gen Intel(R) Core(TM) i3-1115GRE": 12,
"11th Gen Intel(R) Core(TM) i5-1145GRE": 8,
"11th Gen Intel(R) Core(TM) i7-1185GRE": 12,
}
def infer_l3_cat(cpu_id, processor_model_node, cache_node):
# First of all, existence of L3 CAT is indicated by the presence of IA32_L3_MASK_0 at C90H
try:
ia32_l3_mask_0 = msr.MSR_IA32_L3_MASK_n(0).rdmsr(cpu_id)
except IOError:
return
# If L3 CAT does exist, try inferring its parameters:
#
# - For capacity mask length, detect in an trial-and-error way starting from:
# a. the capacity mask length documented in any public real-time tuning guide, if any.
# b. or, the number of ways of the L3 cache.
#
# - For the number of CLOS IDs available, detect by searching the last programmable IA32_L3_MASK_n register within
# the C90H - D0FH range which is the architecturally defined MSR space for those registers.
#
# - For CDP, try setting the enable bit in IA32_L3_QOS_CFG. CDP is available if and only if that MSR is present
# and its bit 0 can be set.
# Initial guess of the capacity mask length
capacity_mask_length = int(cache_node.find("ways").text)
processor_model = processor_model_node.get("description")
for k, v in known_cbms.items():
if processor_model.startswith(k):
capacity_mask_length = v
break
# Verify our guess. If the verification fails, decrease by 1 and guess again.
while capacity_mask_length > 0:
ia32_l3_mask_0.bit_mask = (1 << capacity_mask_length) - 1
try:
ia32_l3_mask_0.wrmsr()
break
except IOError:
capacity_mask_length = capacity_mask_length - 1
continue
else:
logging.debug("All writes to IA32_L3_MASK_0 failed. Cannot guess the capacity mask length of L3 CAT.")
return
# Binary search of the number of CLOS available
known_good = 1
known_bad = 129
while known_good + 1 < known_bad:
mid = (known_good + known_bad) // 2
try:
msr.MSR_IA32_L3_MASK_n(mid - 1).rdmsr(cpu_id)
known_good = mid
except IOError:
known_bad = mid
clos_number = known_good
# Detect availability of CDP by trying to write the enable bit.
try:
l3_qos_cfg = msr.MSR_IA32_L3_QOS_CFG.rdmsr(cpu_id)
l3_qos_cfg.cdp_enable = 1
l3_qos_cfg.wrmsr()
has_cdp = True
except IOError:
has_cdp = False
cap = add_child(cache_node, "capability", None, id="CAT")
add_child(cap, "capacity_mask_length", str(capacity_mask_length))
add_child(cap, "clos_number", str(clos_number))
if has_cdp:
add_child(cap, "capability", None, id="CDP")
def extract_topology(args, root_node, caches_node):
threads = root_node.xpath("//processors//*[cpu_id]")
for thread in threads:
subleaf = 0
@ -56,6 +131,27 @@ def extract_topology(root_node, caches_node):
if leaf_10.code_and_data_prioritization == 1:
add_child(n, "capability", None, id="CDP")
# Inform the user if L3 CAT capability is specified manually.
if args.add_llc_cat:
logging.warning(r"The last level cache (cache ID: {cache_id}) already reports CAT capability. The explicit settings from the command line options are ignored.")
elif cache_level == 3:
if args.add_llc_cat:
# Inject L3 CAT capability specified by the user
cap = add_child(llc_node, "capability", None, id="CAT")
add_child(cap, "capacity_mask_length", str(args.add_llc_cat.capacity_mask_length))
add_child(cap, "clos_number", str(args.add_llc_cat.clos_number))
if args.add_llc_cat.has_CDP:
add_child(cap, "capability", None, id="CDP")
else:
# Try inferring L3 CAT according to the methods described in section 7.2.3, 11th Gen Intel(R)
# Core(TM) Processors Real-Time Tuning Guide (document number: 640980-1.4).
family_id = thread.find("family_id").text
model_id = thread.find("model_id").text
core_type = thread.find("core_type").text
native_model_id = thread.find("native_model_id").text
processor_model_node = get_node(root_node, f"//processors/model[family_id='{family_id}' and model_id='{model_id}' and core_type='{core_type}' and native_model_id='{native_model_id}']")
infer_l3_cat(cpu_id, processor_model_node, n)
add_child(get_node(n, "processors"), "processor", get_node(thread, "apic_id/text()"))
subleaf += 1
@ -70,17 +166,4 @@ def extract_topology(root_node, caches_node):
def extract(args, board_etree):
root_node = board_etree.getroot()
caches_node = get_node(board_etree, "//caches")
extract_topology(root_node, caches_node)
# Inject the explicitly specified CAT capability if exists
if args.add_llc_cat:
llc_node = get_node(root_node, "//caches/cache[@level='3']")
llc_cat_node = get_node(llc_node, "capability[@id='CAT']")
if llc_cat_node is None:
llc_cat_node = add_child(llc_node, "capability", None, id="CAT")
add_child(llc_cat_node, "capacity_mask_length", str(args.add_llc_cat.capacity_mask_length))
add_child(llc_cat_node, "clos_number", str(args.add_llc_cat.clos_number))
if args.add_llc_cat.has_CDP:
add_child(llc_node, "capability", None, id="CDP")
else:
logging.warning("The last level cache already reports CAT capability. The explicit settings from the command line options are ignored.")
extract_topology(args, root_node, caches_node)