998 lines
36 KiB
Python
Executable File
998 lines
36 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Copyright(c) 2022 Intel Corporation. All rights reserved.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
import os
|
|
import sys
|
|
import struct
|
|
import logging
|
|
import asyncio
|
|
import time
|
|
import subprocess
|
|
import ctypes
|
|
import mmap
|
|
import argparse
|
|
import pty
|
|
|
|
start_output = True
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
log = logging.getLogger("cavs-fw")
|
|
|
|
PAGESZ = 4096
|
|
HUGEPAGESZ = 2 * 1024 * 1024
|
|
HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp."
|
|
|
|
# SRAM windows. Base and stride varies depending on ADSP version
|
|
#
|
|
# Window 0 is the FW_STATUS area, and 4k after that the IPC "outbox"
|
|
# Window 1 is the IPC "inbox" (host-writable memory, just 384 bytes currently)
|
|
# Window 2 is used for debug slots (Zephyr shell is one user)
|
|
# Window 3 is winstream-formatted log output
|
|
|
|
WINDOW_BASE = 0x80000
|
|
WINDOW_STRIDE = 0x20000
|
|
|
|
WINDOW_BASE_ACE = 0x180000
|
|
WINDOW_STRIDE_ACE = 0x8000
|
|
|
|
DEBUG_SLOT_SIZE = 4096
|
|
DEBUG_SLOT_SHELL = 0
|
|
SHELL_RX_SIZE = 256
|
|
SHELL_MAX_VALID_SLOT_SIZE = 16777216
|
|
|
|
# pylint: disable=duplicate-code
|
|
|
|
# ADSPCS bits
|
|
CRST = 0
|
|
CSTALL = 8
|
|
SPA = 16
|
|
CPA = 24
|
|
|
|
class HDAStream:
|
|
# creates an hda stream with at 2 buffers of buf_len
|
|
def __init__(self, stream_id: int):
|
|
self.stream_id = stream_id
|
|
self.base = hdamem + 0x0080 + (stream_id * 0x20)
|
|
log.info(f"Mapping registers for hda stream {self.stream_id} at base {self.base:x}")
|
|
|
|
self.hda = Regs(hdamem)
|
|
self.hda.GCAP = 0x0000
|
|
self.hda.GCTL = 0x0008
|
|
self.hda.DPLBASE = 0x0070
|
|
self.hda.DPUBASE = 0x0074
|
|
self.hda.SPBFCH = 0x0700
|
|
self.hda.SPBFCTL = 0x0704
|
|
self.hda.PPCH = 0x0800
|
|
self.hda.PPCTL = 0x0804
|
|
self.hda.PPSTS = 0x0808
|
|
self.hda.SPIB = 0x0708 + stream_id*0x08
|
|
self.hda.freeze()
|
|
|
|
self.regs = Regs(self.base)
|
|
self.regs.CTL = 0x00
|
|
self.regs.STS = 0x03
|
|
self.regs.LPIB = 0x04
|
|
self.regs.CBL = 0x08
|
|
self.regs.LVI = 0x0c
|
|
self.regs.FIFOW = 0x0e
|
|
self.regs.FIFOS = 0x10
|
|
self.regs.FMT = 0x12
|
|
self.regs.FIFOL= 0x14
|
|
self.regs.BDPL = 0x18
|
|
self.regs.BDPU = 0x1c
|
|
self.regs.freeze()
|
|
|
|
self.dbg0 = Regs(hdamem + 0x0084 + (0x20*stream_id))
|
|
self.dbg0.DPIB = 0x00
|
|
self.dbg0.EFIFOS = 0x10
|
|
self.dbg0.freeze()
|
|
|
|
self.reset()
|
|
|
|
def __del__(self):
|
|
self.reset()
|
|
|
|
def config(self, buf_len: int):
|
|
log.info(f"Configuring stream {self.stream_id}")
|
|
self.buf_len = buf_len
|
|
log.info("Allocating huge page and setting up buffers")
|
|
self.mem, self.hugef, self.buf_list_addr, self.pos_buf_addr, self.n_bufs = self.setup_buf(buf_len)
|
|
|
|
log.info("Setting buffer list, length, and stream id and traffic priority bit")
|
|
self.regs.CTL = ((self.stream_id & 0xFF) << 20) | (1 << 18) # must be set to something other than 0?
|
|
self.regs.BDPU = (self.buf_list_addr >> 32) & 0xffffffff
|
|
self.regs.BDPL = self.buf_list_addr & 0xffffffff
|
|
self.regs.CBL = buf_len
|
|
self.regs.LVI = self.n_bufs - 1
|
|
self.mem.seek(0)
|
|
self.debug()
|
|
log.info(f"Configured stream {self.stream_id}")
|
|
|
|
def write(self, data):
|
|
|
|
bufl = min(len(data), self.buf_len)
|
|
log.info(f"Writing data to stream {self.stream_id}, len {bufl}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}")
|
|
self.mem[0:bufl] = data[0:bufl]
|
|
self.mem[bufl:bufl+bufl] = data[0:bufl]
|
|
self.hda.SPBFCTL |= (1 << self.stream_id)
|
|
self.hda.SPIB += bufl
|
|
log.info(f"Wrote data to stream {self.stream_id}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}")
|
|
|
|
def start(self):
|
|
log.info(f"Starting stream {self.stream_id}, CTL {self.regs.CTL:x}")
|
|
self.regs.CTL |= 2
|
|
log.info(f"Started stream {self.stream_id}, CTL {self.regs.CTL:x}")
|
|
|
|
def stop(self):
|
|
log.info(f"Stopping stream {self.stream_id}, CTL {self.regs.CTL:x}")
|
|
self.regs.CTL &= 2
|
|
time.sleep(0.1)
|
|
self.regs.CTL |= 1
|
|
log.info(f"Stopped stream {self.stream_id}, CTL {self.regs.CTL:x}")
|
|
|
|
def setup_buf(self, buf_len: int):
|
|
(mem, phys_addr, hugef) = map_phys_mem(self.stream_id)
|
|
|
|
log.info(f"Mapped 2M huge page at 0x{phys_addr:x} for buf size ({buf_len})")
|
|
|
|
# create two buffers in the page of buf_len and mark them
|
|
# in a buffer descriptor list for the hardware to use
|
|
buf0_len = buf_len
|
|
buf1_len = buf_len
|
|
bdl_off = buf0_len + buf1_len
|
|
# bdl is 2 (64bits, 16 bytes) per entry, we have two
|
|
mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
|
|
phys_addr,
|
|
buf0_len,
|
|
phys_addr + buf0_len,
|
|
buf1_len)
|
|
dpib_off = bdl_off+32
|
|
|
|
# ensure buffer is initialized, sanity
|
|
for i in range(0, buf_len*2):
|
|
mem[i] = 0
|
|
|
|
log.info("Filled the buffer descriptor list (BDL) for DMA.")
|
|
return (mem, hugef, phys_addr + bdl_off, phys_addr+dpib_off, 2)
|
|
|
|
def debug(self):
|
|
log.debug("HDA %d: PPROC %d, CTL 0x%x, LPIB 0x%x, BDPU 0x%x, BDPL 0x%x, CBL 0x%x, LVI 0x%x",
|
|
self.stream_id, (hda.PPCTL >> self.stream_id) & 1, self.regs.CTL, self.regs.LPIB, self.regs.BDPU,
|
|
self.regs.BDPL, self.regs.CBL, self.regs.LVI)
|
|
log.debug(" FIFOW %d, FIFOS %d, FMT %x, FIFOL %d, DPIB %d, EFIFOS %d",
|
|
self.regs.FIFOW & 0x7, self.regs.FIFOS, self.regs.FMT, self.regs.FIFOL, self.dbg0.DPIB, self.dbg0.EFIFOS)
|
|
log.debug(" status: FIFORDY %d, DESE %d, FIFOE %d, BCIS %d",
|
|
(self.regs.STS >> 5) & 1, (self.regs.STS >> 4) & 1, (self.regs.STS >> 3) & 1, (self.regs.STS >> 2) & 1)
|
|
|
|
def reset(self):
|
|
# Turn DMA off and reset the stream. Clearing START first is a
|
|
# noop per the spec, but absolutely required for stability.
|
|
# Apparently the reset doesn't stop the stream, and the next load
|
|
# starts before it's ready and kills the load (and often the DSP).
|
|
# The sleep too is required, on at least one board (a fast
|
|
# chromebook) putting the two writes next each other also hangs
|
|
# the DSP!
|
|
log.info(f"Resetting stream {self.stream_id}")
|
|
self.debug()
|
|
self.regs.CTL &= ~2 # clear START
|
|
time.sleep(0.1)
|
|
# set enter reset bit
|
|
self.regs.CTL = 1
|
|
while (self.regs.CTL & 1) == 0: pass
|
|
# clear enter reset bit to exit reset
|
|
self.regs.CTL = 0
|
|
while (self.regs.CTL & 1) == 1: pass
|
|
|
|
log.info(f"Disable SPIB and set position 0 of stream {self.stream_id}")
|
|
self.hda.SPBFCTL = 0
|
|
self.hda.SPIB = 0
|
|
|
|
#log.info("Setting dma position buffer and enable it")
|
|
#self.hda.DPUBASE = self.pos_buf_addr >> 32 & 0xffffffff
|
|
#self.hda.DPLBASE = self.pos_buf_addr & 0xfffffff0 | 1
|
|
|
|
log.info(f"Enabling dsp capture (PROCEN) of stream {self.stream_id}")
|
|
self.hda.PPCTL |= (1 << self.stream_id)
|
|
|
|
self.debug()
|
|
log.info(f"Reset stream {self.stream_id}")
|
|
|
|
def adsp_is_cavs():
|
|
return cavs15 or cavs18 or cavs15
|
|
|
|
def adsp_is_ace():
|
|
return ace15 or ace20 or ace30
|
|
|
|
def adsp_mem_window_config():
|
|
if adsp_is_ace():
|
|
base = WINDOW_BASE_ACE
|
|
stride = WINDOW_STRIDE_ACE
|
|
else:
|
|
base = WINDOW_BASE
|
|
stride = WINDOW_STRIDE
|
|
|
|
return (base, stride)
|
|
|
|
def map_regs():
|
|
p = runx(f"grep -iEl 'PCI_CLASS=40(10|38)0' /sys/bus/pci/devices/*/uevent")
|
|
pcidir = os.path.dirname(p)
|
|
|
|
# Platform/quirk detection. ID lists cribbed from the SOF kernel driver
|
|
global cavs15, cavs18, cavs25, ace15, ace20, ace30
|
|
did = int(open(f"{pcidir}/device").read().rstrip(), 16)
|
|
cavs15 = did in [ 0x5a98, 0x1a98, 0x3198 ]
|
|
cavs18 = did in [ 0x9dc8, 0xa348, 0x02c8, 0x06c8, 0xa3f0 ]
|
|
cavs25 = did in [ 0xa0c8, 0x43c8, 0x4b55, 0x4b58, 0x7ad0, 0x51c8 ]
|
|
ace15 = did in [ 0x7e28 ]
|
|
ace20 = did in [ 0xa828 ]
|
|
ace30 = did in [ 0xe428 ]
|
|
|
|
# Check sysfs for a loaded driver and remove it
|
|
if os.path.exists(f"{pcidir}/driver"):
|
|
mod = os.path.basename(os.readlink(f"{pcidir}/driver/module"))
|
|
found_msg = f"Existing driver \"{mod}\" found"
|
|
if args.log_only:
|
|
log.info(found_msg)
|
|
else:
|
|
log.warning(found_msg + ", unloading module")
|
|
runx(f"rmmod -f {mod}")
|
|
# Disengage runtime power management so the kernel doesn't put it to sleep
|
|
log.info(f"Forcing {pcidir}/power/control to always 'on'")
|
|
with open(f"{pcidir}/power/control", "w") as ctrl:
|
|
ctrl.write("on")
|
|
|
|
# Make sure PCI memory space access and busmastering are enabled.
|
|
# Also disable interrupts so as not to confuse the kernel.
|
|
with open(f"{pcidir}/config", "wb+") as cfg:
|
|
cfg.seek(4)
|
|
cfg.write(b'\x06\x04')
|
|
|
|
# Standard HD Audio Registers
|
|
global hdamem
|
|
(hdamem, _) = bar_map(pcidir, 0)
|
|
hda = Regs(hdamem)
|
|
hda.GCAP = 0x0000
|
|
hda.GCTL = 0x0008
|
|
hda.SPBFCTL = 0x0704
|
|
hda.PPCTL = 0x0804
|
|
|
|
# Find the ID of the first output stream
|
|
hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams
|
|
log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})")
|
|
hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id)
|
|
hda.freeze()
|
|
|
|
|
|
# Standard HD Audio Stream Descriptor
|
|
sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20))
|
|
sd.CTL = 0x00
|
|
sd.CBL = 0x08
|
|
sd.LVI = 0x0c
|
|
sd.BDPL = 0x18
|
|
sd.BDPU = 0x1c
|
|
sd.freeze()
|
|
|
|
# Intel Audio DSP Registers
|
|
global bar4_mmap
|
|
(bar4_mem, bar4_mmap) = bar_map(pcidir, 4)
|
|
dsp = Regs(bar4_mem)
|
|
if adsp_is_ace():
|
|
dsp.HFDSSCS = 0x1000
|
|
dsp.HFPWRCTL = 0x1d18 if ace20 else 0x1d20
|
|
dsp.HFPWRSTS = 0x1d1c if ace20 else 0x1d24
|
|
dsp.DSP2CXCTL_PRIMARY = 0x178d04
|
|
dsp.HFIPCXTDR = 0x73200
|
|
dsp.HFIPCXTDA = 0x73204
|
|
dsp.HFIPCXIDR = 0x73210
|
|
dsp.HFIPCXIDA = 0x73214
|
|
dsp.HFIPCXCTL = 0x73228
|
|
dsp.HFIPCXTDDY = 0x73300
|
|
dsp.HFIPCXIDDY = 0x73380
|
|
dsp.ROM_STATUS = 0x163200 if ace15 else 0x160200
|
|
dsp.SRAM_FW_STATUS = WINDOW_BASE_ACE
|
|
else:
|
|
dsp.ADSPCS = 0x00004
|
|
dsp.HIPCTDR = 0x00040 if cavs15 else 0x000c0
|
|
dsp.HIPCTDA = 0x000c4 # 1.8+ only
|
|
dsp.HIPCTDD = 0x00044 if cavs15 else 0x000c8
|
|
dsp.HIPCIDR = 0x00048 if cavs15 else 0x000d0
|
|
dsp.HIPCIDA = 0x000d4 # 1.8+ only
|
|
dsp.HIPCIDD = 0x0004c if cavs15 else 0x000d8
|
|
dsp.ROM_STATUS = WINDOW_BASE # Start of first SRAM window
|
|
dsp.SRAM_FW_STATUS = WINDOW_BASE
|
|
dsp.freeze()
|
|
|
|
return (hda, sd, dsp, hda_ostream_id)
|
|
|
|
def setup_dma_mem(fw_bytes):
|
|
(mem, phys_addr, _) = map_phys_mem(hda_ostream_id)
|
|
mem[0:len(fw_bytes)] = fw_bytes
|
|
|
|
log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware"
|
|
% (phys_addr, len(fw_bytes)))
|
|
|
|
# HDA requires at least two buffers be defined, but we don't care about
|
|
# boundaries because it's all a contiguous region. Place a vestigial
|
|
# 128-byte (minimum size and alignment) buffer after the main one, and put
|
|
# the 4-entry BDL list into the final 128 bytes of the page.
|
|
buf0_len = HUGEPAGESZ - 2 * 128
|
|
buf1_len = 128
|
|
bdl_off = buf0_len + buf1_len
|
|
mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
|
|
phys_addr, buf0_len,
|
|
phys_addr + buf0_len, buf1_len)
|
|
log.info("Filled the buffer descriptor list (BDL) for DMA.")
|
|
return (phys_addr + bdl_off, 2)
|
|
|
|
global_mmaps = [] # protect mmap mappings from garbage collection!
|
|
|
|
# Maps 2M of contiguous memory using a single page from hugetlbfs,
|
|
# then locates its physical address for use as a DMA buffer.
|
|
def map_phys_mem(stream_id):
|
|
# Make sure hugetlbfs is mounted (not there on chromeos)
|
|
os.system("mount | grep -q hugetlbfs ||"
|
|
+ " (mkdir -p /dev/hugepages; "
|
|
+ " mount -t hugetlbfs hugetlbfs /dev/hugepages)")
|
|
|
|
# Ensure the kernel has enough budget for one new page
|
|
free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo"))
|
|
if free == 0:
|
|
tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo"))
|
|
os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages")
|
|
|
|
hugef_name = HUGEPAGE_FILE + str(stream_id)
|
|
hugef = open(hugef_name, "w+")
|
|
hugef.truncate(HUGEPAGESZ)
|
|
mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ)
|
|
log.info("type of mem is %s", str(type(mem)))
|
|
global_mmaps.append(mem)
|
|
os.unlink(hugef_name)
|
|
|
|
# Find the local process address of the mapping, then use that to extract
|
|
# the physical address from the kernel's pagemap interface. The physical
|
|
# page frame number occupies the bottom bits of the entry.
|
|
mem[0] = 0 # Fault the page in so it has an address!
|
|
vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem))
|
|
vpagenum = vaddr >> 12
|
|
pagemap = open("/proc/self/pagemap", "rb")
|
|
pagemap.seek(vpagenum * 8)
|
|
pent = pagemap.read(8)
|
|
paddr = (struct.unpack("Q", pent)[0] & ((1 << 55) - 1)) * PAGESZ
|
|
pagemap.close()
|
|
return (mem, paddr, hugef)
|
|
|
|
# Maps a PCI BAR and returns the in-process address
|
|
def bar_map(pcidir, barnum):
|
|
f = open(pcidir + "/resource" + str(barnum), "r+")
|
|
mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size)
|
|
global_mmaps.append(mm)
|
|
log.info("Mapped PCI bar %d of length %d bytes."
|
|
% (barnum, os.fstat(f.fileno()).st_size))
|
|
return (ctypes.addressof(ctypes.c_int.from_buffer(mm)), mm)
|
|
|
|
# Syntactic sugar to make register block definition & use look nice.
|
|
# Instantiate from a base address, assign offsets to (uint32) named registers as
|
|
# fields, call freeze(), then the field acts as a direct alias for the register!
|
|
class Regs:
|
|
def __init__(self, base_addr):
|
|
vars(self)["base_addr"] = base_addr
|
|
vars(self)["ptrs"] = {}
|
|
vars(self)["frozen"] = False
|
|
def freeze(self):
|
|
vars(self)["frozen"] = True
|
|
def __setattr__(self, name, val):
|
|
if not self.frozen and name not in self.ptrs:
|
|
addr = self.base_addr + val
|
|
self.ptrs[name] = ctypes.c_uint32.from_address(addr)
|
|
else:
|
|
self.ptrs[name].value = val
|
|
def __getattr__(self, name):
|
|
return self.ptrs[name].value
|
|
|
|
def runx(cmd):
|
|
return subprocess.check_output(cmd, shell=True).decode().rstrip()
|
|
|
|
def mask(bit):
|
|
if cavs25:
|
|
return 0b1 << bit
|
|
if cavs18:
|
|
return 0b1111 << bit
|
|
if cavs15:
|
|
return 0b11 << bit
|
|
|
|
def load_firmware(fw_file):
|
|
try:
|
|
fw_bytes = open(fw_file, "rb").read()
|
|
except Exception as e:
|
|
log.error(f"Could not read firmware file: `{fw_file}'")
|
|
log.error(e)
|
|
sys.exit(1)
|
|
|
|
(magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
|
|
if magic == b'XMan':
|
|
log.info(f"Trimming {sz} bytes of extended manifest")
|
|
fw_bytes = fw_bytes[sz:len(fw_bytes)]
|
|
|
|
# This actually means "enable access to BAR4 registers"!
|
|
hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
|
|
|
|
log.info("Resetting HDA device")
|
|
hda.GCTL = 0
|
|
while hda.GCTL & 1: pass
|
|
hda.GCTL = 1
|
|
while not hda.GCTL & 1: pass
|
|
|
|
log.info(f"Stalling and Resetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
|
|
dsp.ADSPCS |= mask(CSTALL)
|
|
dsp.ADSPCS |= mask(CRST)
|
|
while (dsp.ADSPCS & mask(CRST)) == 0: pass
|
|
|
|
log.info(f"Powering down DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
|
|
dsp.ADSPCS &= ~mask(SPA)
|
|
while dsp.ADSPCS & mask(CPA): pass
|
|
|
|
log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
|
|
(buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
|
|
sd.CTL = 1
|
|
while (sd.CTL & 1) == 0: pass
|
|
sd.CTL = 0
|
|
while (sd.CTL & 1) == 1: pass
|
|
sd.CTL = 1 << 20 # Set stream ID to anything non-zero
|
|
sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
|
|
sd.BDPL = buf_list_addr & 0xffffffff
|
|
sd.CBL = len(fw_bytes)
|
|
sd.LVI = num_bufs - 1
|
|
hda.PPCTL |= (1 << hda_ostream_id)
|
|
|
|
# SPIB ("Software Position In Buffer") is an Intel HDA extension
|
|
# that puts a transfer boundary into the stream beyond which the
|
|
# other side will not read. The ROM wants to poll on a "buffer
|
|
# full" bit on the other side that only works with this enabled.
|
|
hda.SPBFCTL |= (1 << hda_ostream_id)
|
|
hda.SD_SPIB = len(fw_bytes)
|
|
|
|
# Start DSP. Host needs to provide power to all cores on 1.5
|
|
# (which also starts them) and 1.8 (merely gates power, DSP also
|
|
# has to set PWRCTL). On 2.5 where the DSP has full control,
|
|
# and only core 0 is set.
|
|
log.info(f"Starting DSP, ADSPCS = 0x{dsp.ADSPCS:x}")
|
|
dsp.ADSPCS = mask(SPA)
|
|
while (dsp.ADSPCS & mask(CPA)) == 0: pass
|
|
|
|
log.info(f"Unresetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
|
|
dsp.ADSPCS &= ~mask(CRST)
|
|
while (dsp.ADSPCS & 1) != 0: pass
|
|
|
|
log.info(f"Running DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
|
|
dsp.ADSPCS &= ~mask(CSTALL)
|
|
|
|
# Wait for the ROM to boot and signal it's ready. This not so short
|
|
# sleep seems to be needed; if we're banging on the memory window
|
|
# during initial boot (before/while the window control registers
|
|
# are configured?) the DSP hardware will hang fairly reliably.
|
|
log.info(f"Wait for ROM startup, ADSPCS = 0x{dsp.ADSPCS:x}")
|
|
time.sleep(1)
|
|
while (dsp.SRAM_FW_STATUS >> 24) != 5: pass
|
|
|
|
# Send the DSP an IPC message to tell the device how to boot.
|
|
# Note: with cAVS 1.8+ the ROM receives the stream argument as an
|
|
# index within the array of output streams (and we always use the
|
|
# first one by construction). But with 1.5 it's the HDA index,
|
|
# and depends on the number of input streams on the device.
|
|
stream_idx = hda_ostream_id if cavs15 else 0
|
|
ipcval = ( (1 << 31) # BUSY bit
|
|
| (0x01 << 24) # type = PURGE_FW
|
|
| (1 << 14) # purge_fw = 1
|
|
| (stream_idx << 9)) # dma_id
|
|
log.info(f"Sending IPC command, HIPIDR = 0x{ipcval:x}")
|
|
dsp.HIPCIDR = ipcval
|
|
|
|
log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
|
|
sd.CTL |= 2 # START flag
|
|
|
|
wait_fw_entered()
|
|
|
|
# Turn DMA off and reset the stream. Clearing START first is a
|
|
# noop per the spec, but absolutely required for stability.
|
|
# Apparently the reset doesn't stop the stream, and the next load
|
|
# starts before it's ready and kills the load (and often the DSP).
|
|
# The sleep too is required, on at least one board (a fast
|
|
# chromebook) putting the two writes next each other also hangs
|
|
# the DSP!
|
|
sd.CTL &= ~2 # clear START
|
|
time.sleep(0.1)
|
|
sd.CTL |= 1
|
|
log.info(f"cAVS firmware load complete")
|
|
|
|
def load_firmware_ace(fw_file):
|
|
try:
|
|
fw_bytes = open(fw_file, "rb").read()
|
|
# Resize fw_bytes for MTL
|
|
if len(fw_bytes) < 512 * 1024:
|
|
fw_bytes += b'\x00' * (512 * 1024 - len(fw_bytes))
|
|
except Exception as e:
|
|
log.error(f"Could not read firmware file: `{fw_file}'")
|
|
log.error(e)
|
|
sys.exit(1)
|
|
|
|
(magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
|
|
if magic == b'$AE1':
|
|
log.info(f"Trimming {sz} bytes of extended manifest")
|
|
fw_bytes = fw_bytes[sz:len(fw_bytes)]
|
|
|
|
# This actually means "enable access to BAR4 registers"!
|
|
hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
|
|
|
|
log.info("Resetting HDA device")
|
|
hda.GCTL = 0
|
|
while hda.GCTL & 1: pass
|
|
hda.GCTL = 1
|
|
while not hda.GCTL & 1: pass
|
|
|
|
log.info("Turning of DSP subsystem")
|
|
dsp.HFDSSCS &= ~(1 << 16) # clear SPA bit
|
|
time.sleep(0.002)
|
|
# wait for CPA bit clear
|
|
while dsp.HFDSSCS & (1 << 24):
|
|
log.info("Waiting for DSP subsystem power off")
|
|
time.sleep(0.1)
|
|
|
|
log.info("Turning on DSP subsystem")
|
|
dsp.HFDSSCS |= (1 << 16) # set SPA bit
|
|
time.sleep(0.002) # needed as the CPA bit may be unstable
|
|
# wait for CPA bit
|
|
while not dsp.HFDSSCS & (1 << 24):
|
|
log.info("Waiting for DSP subsystem power on")
|
|
time.sleep(0.1)
|
|
|
|
log.info("Turning on Domain0")
|
|
dsp.HFPWRCTL |= 0x1 # set SPA bit
|
|
time.sleep(0.002) # needed as the CPA bit may be unstable
|
|
# wait for CPA bit
|
|
while not dsp.HFPWRSTS & 0x1:
|
|
log.info("Waiting for DSP domain0 power on")
|
|
time.sleep(0.1)
|
|
|
|
log.info("Turning off Primary Core")
|
|
dsp.DSP2CXCTL_PRIMARY &= ~(0x1) # clear SPA
|
|
time.sleep(0.002) # wait for CPA settlement
|
|
while dsp.DSP2CXCTL_PRIMARY & (1 << 8):
|
|
log.info("Waiting for DSP primary core power off")
|
|
time.sleep(0.1)
|
|
|
|
log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
|
|
(buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
|
|
sd.CTL = 1
|
|
while (sd.CTL & 1) == 0: pass
|
|
sd.CTL = 0
|
|
while (sd.CTL & 1) == 1: pass
|
|
sd.CTL |= (1 << 20) # Set stream ID to anything non-zero
|
|
sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
|
|
sd.BDPL = buf_list_addr & 0xffffffff
|
|
sd.CBL = len(fw_bytes)
|
|
sd.LVI = num_bufs - 1
|
|
hda.PPCTL |= (1 << hda_ostream_id)
|
|
|
|
# SPIB ("Software Position In Buffer") is an Intel HDA extension
|
|
# that puts a transfer boundary into the stream beyond which the
|
|
# other side will not read. The ROM wants to poll on a "buffer
|
|
# full" bit on the other side that only works with this enabled.
|
|
hda.SPBFCTL |= (1 << hda_ostream_id)
|
|
hda.SD_SPIB = len(fw_bytes)
|
|
|
|
|
|
# Send the DSP an IPC message to tell the device how to boot.
|
|
# Note: with cAVS 1.8+ the ROM receives the stream argument as an
|
|
# index within the array of output streams (and we always use the
|
|
# first one by construction). But with 1.5 it's the HDA index,
|
|
# and depends on the number of input streams on the device.
|
|
stream_idx = 0
|
|
ipcval = ( (1 << 31) # BUSY bit
|
|
| (0x01 << 24) # type = PURGE_FW
|
|
| (1 << 14) # purge_fw = 1
|
|
| (stream_idx << 9)) # dma_id
|
|
log.info(f"Sending IPC command, HFIPCXIDR = 0x{ipcval:x}")
|
|
dsp.HFIPCXIDR = ipcval
|
|
|
|
log.info("Turning on Primary Core")
|
|
dsp.DSP2CXCTL_PRIMARY |= 0x1 # clear SPA
|
|
time.sleep(0.002) # wait for CPA settlement
|
|
while not dsp.DSP2CXCTL_PRIMARY & (1 << 8):
|
|
log.info("Waiting for DSP primary core power on")
|
|
time.sleep(0.1)
|
|
|
|
log.info("Waiting for IPC acceptance")
|
|
while dsp.HFIPCXIDR & (1 << 31):
|
|
log.info("Waiting for IPC busy bit clear")
|
|
time.sleep(0.1)
|
|
|
|
log.info("ACK IPC")
|
|
dsp.HFIPCXIDA |= (1 << 31)
|
|
|
|
log.info(f"Starting DMA, FW_STATUS = 0x{dsp.ROM_STATUS:x}")
|
|
sd.CTL |= 2 # START flag
|
|
|
|
wait_fw_entered()
|
|
|
|
# Turn DMA off and reset the stream. Clearing START first is a
|
|
# noop per the spec, but absolutely required for stability.
|
|
# Apparently the reset doesn't stop the stream, and the next load
|
|
# starts before it's ready and kills the load (and often the DSP).
|
|
# The sleep too is required, on at least one board (a fast
|
|
# chromebook) putting the two writes next each other also hangs
|
|
# the DSP!
|
|
sd.CTL &= ~2 # clear START
|
|
time.sleep(0.1)
|
|
sd.CTL |= 1
|
|
log.info(f"ACE firmware load complete")
|
|
|
|
def fw_is_alive():
|
|
return dsp.ROM_STATUS & ((1 << 28) - 1) == 5 # "FW_ENTERED"
|
|
|
|
def wait_fw_entered(timeout_s=2):
|
|
log.info("Waiting %s for firmware handoff, ROM_STATUS = 0x%x",
|
|
"forever" if timeout_s is None else f"{timeout_s} seconds",
|
|
dsp.ROM_STATUS)
|
|
hertz = 100
|
|
attempts = None if timeout_s is None else timeout_s * hertz
|
|
while True:
|
|
alive = fw_is_alive()
|
|
if alive:
|
|
break
|
|
if attempts is not None:
|
|
attempts -= 1
|
|
if attempts < 0:
|
|
break
|
|
time.sleep(1 / hertz)
|
|
|
|
if not alive:
|
|
log.warning("Load failed? ROM_STATUS = 0x%x", dsp.ROM_STATUS)
|
|
else:
|
|
log.info("FW alive, ROM_STATUS = 0x%x", dsp.ROM_STATUS)
|
|
|
|
def winstream_offset():
|
|
( base, stride ) = adsp_mem_window_config()
|
|
return base + stride * 3
|
|
|
|
# This SHOULD be just "mem[start:start+length]", but slicing an mmap
|
|
# array seems to be unreliable on one of my machines (python 3.6.9 on
|
|
# Ubuntu 18.04). Read out bytes individually.
|
|
def win_read(base, start, length):
|
|
try:
|
|
return b''.join(bar4_mmap[base + x].to_bytes(1, 'little')
|
|
for x in range(start, start + length))
|
|
except IndexError as ie:
|
|
# A FW in a bad state may cause winstream garbage
|
|
log.error("IndexError in bar4_mmap[%d + %d]", base, start)
|
|
log.error("bar4_mmap.size()=%d", bar4_mmap.size())
|
|
raise ie
|
|
|
|
def win_hdr(base):
|
|
return struct.unpack("<IIII", win_read(base, 0, 16))
|
|
|
|
# Python implementation of the same algorithm in sys_winstream_read(),
|
|
# see there for details.
|
|
def winstream_read(base, last_seq):
|
|
while True:
|
|
(wlen, start, end, seq) = win_hdr(base)
|
|
if wlen > SHELL_MAX_VALID_SLOT_SIZE:
|
|
log.debug("DSP powered off at winstream_read")
|
|
return (seq, "")
|
|
if wlen == 0:
|
|
return (seq, "")
|
|
if last_seq == 0:
|
|
last_seq = seq if args.no_history else (seq - ((end - start) % wlen))
|
|
if seq == last_seq or start == end:
|
|
return (seq, "")
|
|
behind = seq - last_seq
|
|
if behind > ((end - start) % wlen):
|
|
return (seq, "")
|
|
copy = (end - behind) % wlen
|
|
suffix = min(behind, wlen - copy)
|
|
result = win_read(base, 16 + copy, suffix)
|
|
if suffix < behind:
|
|
result += win_read(base, 16, behind - suffix)
|
|
(wlen, start1, end, seq1) = win_hdr(base)
|
|
if start1 == start and seq1 == seq:
|
|
# Best effort attempt at decoding, replacing unusable characters
|
|
# Found to be useful when it really goes wrong
|
|
return (seq, result.decode("utf-8", "replace"))
|
|
|
|
def idx_mod(wlen, idx):
|
|
if idx >= wlen:
|
|
return idx - wlen
|
|
return idx
|
|
|
|
def idx_sub(wlen, a, b):
|
|
return idx_mod(wlen, a + (wlen - b))
|
|
|
|
# Python implementation of the same algorithm in sys_winstream_write(),
|
|
# see there for details.
|
|
def winstream_write(base, msg):
|
|
(wlen, start, end, seq) = win_hdr(base)
|
|
if wlen > SHELL_MAX_VALID_SLOT_SIZE:
|
|
log.debug("DSP powered off at winstream_write")
|
|
return
|
|
if wlen == 0:
|
|
return
|
|
lenmsg = len(msg)
|
|
lenmsg0 = lenmsg
|
|
if len(msg) > wlen + 1:
|
|
start = end
|
|
lenmsg = wlen - 1
|
|
lenmsg = min(lenmsg, wlen)
|
|
if seq != 0:
|
|
avail = (wlen - 1) - idx_sub(wlen, end, start)
|
|
if lenmsg > avail:
|
|
start = idx_mod(wlen, start + (lenmsg - avail))
|
|
if lenmsg < lenmsg0:
|
|
start = end
|
|
drop = lenmsg0 - lenmsg
|
|
msg = msg[drop : lenmsg - drop]
|
|
suffix = min(lenmsg, wlen - end)
|
|
for c in range(0, suffix):
|
|
bar4_mmap[base + 16 + end + c] = msg[c]
|
|
if lenmsg > suffix:
|
|
for c in range(0, lenmsg - suffix):
|
|
bar4_mmap[base + 16 + c] = msg[suffix + c]
|
|
end = idx_mod(wlen, end + lenmsg)
|
|
seq += lenmsg0
|
|
# write back updated fields as 32bit writes
|
|
update_hdr = struct.pack("<III", start, end, seq)
|
|
dst = base + 4 # skip wlen
|
|
for c in range(0, 3):
|
|
src = c * 4
|
|
bar4_mmap[dst : dst + 4] = update_hdr[src : src + 4]
|
|
dst += 4
|
|
|
|
def debug_offset():
|
|
( base, stride ) = adsp_mem_window_config()
|
|
return base + stride * 2
|
|
|
|
def shell_base_offset():
|
|
return debug_offset() + DEBUG_SLOT_SIZE * (1 + DEBUG_SLOT_SHELL)
|
|
|
|
def read_from_shell_memwindow_winstream(last_seq):
|
|
offset = shell_base_offset() + SHELL_RX_SIZE
|
|
(last_seq, output) = winstream_read(offset, last_seq)
|
|
if output:
|
|
os.write(shell_client_port, output.encode("utf-8"))
|
|
return last_seq
|
|
|
|
def write_to_shell_memwindow_winstream():
|
|
msg = os.read(shell_client_port, 1)
|
|
if len(msg) > 0:
|
|
winstream_write(shell_base_offset(), msg)
|
|
|
|
def create_shell_pty():
|
|
global shell_client_port
|
|
(shell_client_port, user_port) = pty.openpty()
|
|
name = os.ttyname(user_port)
|
|
log.info(f"shell PTY at: {name}")
|
|
asyncio.get_event_loop().add_reader(shell_client_port, write_to_shell_memwindow_winstream)
|
|
|
|
async def ipc_delay_done():
|
|
await asyncio.sleep(0.1)
|
|
if adsp_is_ace():
|
|
dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done
|
|
else:
|
|
dsp.HIPCTDA = 1<<31
|
|
|
|
def inbox_offset():
|
|
( base, stride ) = adsp_mem_window_config()
|
|
return base + stride
|
|
|
|
def outbox_offset():
|
|
( base, _ ) = adsp_mem_window_config()
|
|
return base + 4096
|
|
|
|
ipc_timestamp = 0
|
|
|
|
# Super-simple command language, driven by the test code on the DSP
|
|
def ipc_command(data, ext_data):
|
|
send_msg = False
|
|
done = True
|
|
log.debug ("ipc data %d, ext_data %x", data, ext_data)
|
|
if data == 0: # noop, with synchronous DONE
|
|
pass
|
|
elif data == 1: # async command: signal DONE after a delay (on 1.8+)
|
|
if not cavs15:
|
|
done = False
|
|
asyncio.ensure_future(ipc_delay_done())
|
|
elif data == 2: # echo back ext_data as a message command
|
|
send_msg = True
|
|
elif data == 3: # set ADSPCS
|
|
dsp.ADSPCS = ext_data
|
|
elif data == 4: # echo back microseconds since last timestamp command
|
|
global ipc_timestamp
|
|
t = round(time.time() * 1e6)
|
|
ext_data = t - ipc_timestamp
|
|
ipc_timestamp = t
|
|
send_msg = True
|
|
elif data == 5: # copy word at outbox[ext_data >> 16] to inbox[ext_data & 0xffff]
|
|
src = outbox_offset() + 4 * (ext_data >> 16)
|
|
dst = inbox_offset() + 4 * (ext_data & 0xffff)
|
|
for i in range(4):
|
|
bar4_mmap[dst + i] = bar4_mmap[src + i]
|
|
elif data == 6: # HDA RESET (init if not exists)
|
|
stream_id = ext_data & 0xff
|
|
if stream_id in hda_streams:
|
|
hda_streams[stream_id].reset()
|
|
else:
|
|
hda_str = HDAStream(stream_id)
|
|
hda_streams[stream_id] = hda_str
|
|
elif data == 7: # HDA CONFIG
|
|
stream_id = ext_data & 0xFF
|
|
buf_len = ext_data >> 8 & 0xFFFF
|
|
hda_str = hda_streams[stream_id]
|
|
hda_str.config(buf_len)
|
|
elif data == 8: # HDA START
|
|
stream_id = ext_data & 0xFF
|
|
hda_streams[stream_id].start()
|
|
hda_streams[stream_id].mem.seek(0)
|
|
|
|
elif data == 9: # HDA STOP
|
|
stream_id = ext_data & 0xFF
|
|
hda_streams[stream_id].stop()
|
|
elif data == 10: # HDA VALIDATE
|
|
stream_id = ext_data & 0xFF
|
|
hda_str = hda_streams[stream_id]
|
|
hda_str.debug()
|
|
is_ramp_data = True
|
|
hda_str.mem.seek(0)
|
|
for (i, val) in enumerate(hda_str.mem.read(256)):
|
|
if i != val:
|
|
is_ramp_data = False
|
|
# log.info("stream[%d][%d]: %d", stream_id, i, val) # debug helper
|
|
log.info("Is ramp data? " + str(is_ramp_data))
|
|
ext_data = int(is_ramp_data)
|
|
log.info(f"Ext data to send back on ramp status {ext_data}")
|
|
send_msg = True
|
|
elif data == 11: # HDA HOST OUT SEND
|
|
stream_id = ext_data & 0xff
|
|
buf = bytearray(256)
|
|
for i in range(0, 256):
|
|
buf[i] = i
|
|
hda_streams[stream_id].write(buf)
|
|
elif data == 12: # HDA PRINT
|
|
stream_id = ext_data & 0xFF
|
|
buf_len = ext_data >> 8 & 0xFFFF
|
|
hda_str = hda_streams[stream_id]
|
|
# check for wrap here
|
|
pos = hda_str.mem.tell()
|
|
read_lens = [buf_len, 0]
|
|
if pos + buf_len >= hda_str.buf_len*2:
|
|
read_lens[0] = hda_str.buf_len*2 - pos
|
|
read_lens[1] = buf_len - read_lens[0]
|
|
# validate the read lens
|
|
assert (read_lens[0] + pos) <= (hda_str.buf_len*2)
|
|
assert read_lens[0] % 128 == 0
|
|
assert read_lens[1] % 128 == 0
|
|
buf_data0 = hda_str.mem.read(read_lens[0])
|
|
hda_msg0 = buf_data0.decode("utf-8", "replace")
|
|
sys.stdout.write(hda_msg0)
|
|
if read_lens[1] != 0:
|
|
hda_str.mem.seek(0)
|
|
buf_data1 = hda_str.mem.read(read_lens[1])
|
|
hda_msg1 = buf_data1.decode("utf-8", "replace")
|
|
sys.stdout.write(hda_msg1)
|
|
pos = hda_str.mem.tell()
|
|
sys.stdout.flush()
|
|
else:
|
|
log.warning(f"cavstool: Unrecognized IPC command 0x{data:x} ext 0x{ext_data:x}")
|
|
if not fw_is_alive():
|
|
if args.log_only:
|
|
log.info("DSP power seems off")
|
|
wait_fw_entered(timeout_s=None)
|
|
else:
|
|
log.warning("DSP power seems off?!")
|
|
time.sleep(2) # potential spam reduction
|
|
|
|
return
|
|
|
|
if adsp_is_ace():
|
|
dsp.HFIPCXTDR = 1<<31 # Ack local interrupt, also signals DONE on v1.5
|
|
if done:
|
|
dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done
|
|
if send_msg:
|
|
log.debug("ipc: sending msg 0x%08x" % ext_data)
|
|
dsp.HFIPCXIDDY = ext_data
|
|
dsp.HFIPCXIDR = (1<<31) | ext_data
|
|
else:
|
|
dsp.HIPCTDR = 1<<31 # Ack local interrupt, also signals DONE on v1.5
|
|
if cavs18:
|
|
time.sleep(0.01) # Needed on 1.8, or the command below won't send!
|
|
if done and not cavs15:
|
|
dsp.HIPCTDA = 1<<31 # Signal done
|
|
if send_msg:
|
|
dsp.HIPCIDD = ext_data
|
|
dsp.HIPCIDR = (1<<31) | ext_data
|
|
|
|
def handle_ipc():
|
|
if adsp_is_ace():
|
|
if dsp.HFIPCXIDA & 0x80000000:
|
|
log.debug("ipc: Ack DSP reply with IDA_DONE")
|
|
dsp.HFIPCXIDA = 1<<31 # must ACK any DONE interrupts that arrive!
|
|
if dsp.HFIPCXTDR & 0x80000000:
|
|
ipc_command(dsp.HFIPCXTDR & ~0x80000000, dsp.HFIPCXTDDY)
|
|
return
|
|
|
|
if dsp.HIPCIDA & 0x80000000:
|
|
dsp.HIPCIDA = 1<<31 # must ACK any DONE interrupts that arrive!
|
|
if dsp.HIPCTDR & 0x80000000:
|
|
ipc_command(dsp.HIPCTDR & ~0x80000000, dsp.HIPCTDD)
|
|
|
|
async def main():
|
|
#TODO this bit me, remove the globals, write a little FirmwareLoader class or something to contain.
|
|
global hda, sd, dsp, hda_ostream_id, hda_streams
|
|
|
|
try:
|
|
(hda, sd, dsp, hda_ostream_id) = map_regs()
|
|
except Exception as e:
|
|
log.error("Could not map device in sysfs; run as root?")
|
|
log.error(e)
|
|
sys.exit(1)
|
|
|
|
log.info(f"Detected cAVS {'1.5' if cavs15 else '1.8+'} hardware")
|
|
|
|
if args.log_only:
|
|
wait_fw_entered(timeout_s=None)
|
|
else:
|
|
if not args.fw_file:
|
|
log.error("Firmware file argument missing")
|
|
sys.exit(1)
|
|
|
|
if adsp_is_ace():
|
|
load_firmware_ace(args.fw_file)
|
|
else:
|
|
load_firmware(args.fw_file)
|
|
time.sleep(0.1)
|
|
|
|
if not args.quiet:
|
|
sys.stdout.write("--\n")
|
|
|
|
if args.shell_pty:
|
|
create_shell_pty()
|
|
|
|
hda_streams = dict()
|
|
|
|
last_seq = 0
|
|
last_seq_shell = 0
|
|
while start_output is True:
|
|
await asyncio.sleep(0.03)
|
|
if args.shell_pty:
|
|
last_seq_shell = read_from_shell_memwindow_winstream(last_seq_shell)
|
|
(last_seq, output) = winstream_read(winstream_offset(), last_seq)
|
|
if output:
|
|
sys.stdout.write(output)
|
|
sys.stdout.flush()
|
|
if not args.log_only:
|
|
handle_ipc()
|
|
|
|
|
|
ap = argparse.ArgumentParser(description="DSP loader/logger tool", allow_abbrev=False)
|
|
ap.add_argument("-q", "--quiet", action="store_true",
|
|
help="No loader output, just DSP logging")
|
|
ap.add_argument("-v", "--verbose", action="store_true",
|
|
help="More loader output, DEBUG logging level")
|
|
ap.add_argument("-l", "--log-only", action="store_true",
|
|
help="Don't load firmware, just show log output")
|
|
ap.add_argument("-p", "--shell-pty", action="store_true",
|
|
help="Create a Zephyr shell pty if enabled in firmware")
|
|
ap.add_argument("-n", "--no-history", action="store_true",
|
|
help="No current log buffer at start, just new output")
|
|
ap.add_argument("fw_file", nargs="?", help="Firmware file")
|
|
|
|
args = ap.parse_args()
|
|
|
|
if args.quiet:
|
|
log.setLevel(logging.WARN)
|
|
elif args.verbose:
|
|
log.setLevel(logging.DEBUG)
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
asyncio.run(main())
|
|
except KeyboardInterrupt:
|
|
start_output = False
|