更新音频调试日志和修复脚本,添加 HDMI 监控工具

This commit is contained in:
Jack
2026-05-21 11:22:17 +08:00
parent e8073bc448
commit bd5a3e81d9
77 changed files with 15716 additions and 31339 deletions

View File

@@ -8,6 +8,7 @@ out=""
fix=0
fix_only=0
verify=0
verify_strict=0
only_pcm=""
retries=1
only_connected=0
@@ -16,13 +17,14 @@ log_dir="${KAISA_LOG_DIR:-./_logs}"
usage() {
cat <<'EOF'
Usage:
./scripts/kaisa-audio-doctor.sh [--fix] [--fix-only] [--verify] [--only-pcm N] [--only-connected] [--retries N] [-o|--out /path/to/log]
./scripts/kaisa-audio-doctor.sh [--fix] [--fix-only] [--verify] [--verify-strict] [--only-pcm N] [--only-connected] [--retries N] [-o|--out /path/to/log]
Modes:
default Diagnostics only (no changes)
--fix Best-effort recovery, then full diagnostics report
--fix-only Same recovery as --fix, then exit (for login/boot automation; use with -o for a short log)
--verify More detailed verification (route/ports/playback + kernel error window), best used right after boot
--verify-strict Implies --verify; after the full report, exit non-zero if VERIFY found no clean sink (for watchdogs/CI)
Options:
--only-pcm N Restrict fix/verify to a single PCM (0/2/3/4). Useful for single-monitor A/B tests.
@@ -31,7 +33,7 @@ Options:
Recovery steps:
- restart user PipeWire/WirePlumber
- set card profile to HiFi
- set card profile to HiFi (required; no pro-audio fallback)
- choose an "available" HDMI port (prefer pcm=3/4, then pcm=2), else fallback to Analog (pcm=0)
- enable matching IEC958 switch (pcm=2->IEC958,0; pcm=3->IEC958,1; pcm=4->IEC958,2)
- quick playback test (timeout)
@@ -47,6 +49,7 @@ while [[ $# -gt 0 ]]; do
--fix-only) fix=1; fix_only=1; shift ;;
--fix) fix=1; shift ;;
--verify) verify=1; shift ;;
--verify-strict) verify=1; verify_strict=1; shift ;;
--only-pcm) only_pcm="${2:-}"; shift 2 ;;
--only-connected) only_connected=1; shift ;;
--retries) retries="${2:-}"; shift 2 ;;
@@ -86,10 +89,41 @@ warn() { printf '\n[WARN] %s\n' "$*"; }
is_int() { [[ "${1:-}" =~ ^[0-9]+$ ]]; }
PLAY_TIMEOUT_S="${KAISA_PLAY_TIMEOUT_S:-8}"
PLAY_KILL_AFTER_S="${KAISA_PLAY_KILL_AFTER_S:-1}"
get_kaisa_card_name() {
pactl list cards short 2>/dev/null | awk '/cml_rt5682_def/ {print $2; exit}'
}
wait_for_card() {
# Wait for PipeWire/ACP to enumerate the card after service restart.
local timeout_s="${1:-6}"
local i
for i in $(seq 1 "$timeout_s"); do
local card
card="$(get_kaisa_card_name || true)"
if [[ -n "$card" ]]; then
echo "$card"
return 0
fi
sleep 1
done
return 1
}
card_has_profile() {
# Usage: card_has_profile "<card_name>" "HiFi"
local card="${1:?card}"
local prof="${2:?profile}"
pactl list cards 2>/dev/null | awk -v c="$card" -v p="$prof" '
$0 ~ ("名称:[ \t]*" c "$") { in_card=1; next }
in_card && $0 ~ /^名称:[ \t]*/ { in_card=0 }
in_card && $0 ~ ("^[ \t]*" p ":") { found=1; exit }
END { exit !found }
'
}
amixer_find_numid() {
# Find numid by matching a single control line from `amixer -c0 controls`.
# Usage:
@@ -222,6 +256,10 @@ connected_hdmi_pcms() {
local out=()
local pcm
for pcm in 3 4 2; do
if kernel_pcm_is_bad "$pcm"; then
warn "Kernel shows pcm=${pcm} hw_params failures in this boot; skipping from connected set"
continue
fi
if read_jack_on_for_pcm "$pcm"; then
local eld
eld="$(eld_bytes_len_for_pcm "$pcm" || echo 0)"
@@ -286,6 +324,7 @@ iec958_index_for_pcm() {
sink_name_for_pcm() {
local pcm="${1:?pcm}"
# Delivery requirement: must be managed by UCM/HiFi.
echo "alsa_output.pci-0000_00_1f.3-platform-cml_rt5682_def.HiFi__hw_sofrt5682_${pcm}__sink"
}
@@ -303,11 +342,70 @@ kernel_tail_since_ts() {
tail -n 200 || true
}
user_pipewire_tail_since_ts() {
# PipeWire user-service errors since ts (best-effort).
local since_ts="${1:?since_ts}"
journalctl --user -u pipewire -b --since "$since_ts" --no-pager 2>/dev/null | tail -n 120 || true
}
kernel_pcm_is_bad() {
# Return 0 if kernel has shown SOF/ASoC hw_params failures for this PCM in current boot.
# This is used as a guardrail to avoid re-probing a PCM that is known to brick HDMI audio in this session.
local pcm="${1:?pcm}"
journalctl -k -b --no-pager 2>/dev/null | \
grep -qE "sof_ipc3_pcm_hw_params: pcm${pcm} \\(HDMI|STREAM_PCM_PARAMS.*pcm${pcm}.*failed|HDMI[0-9]+: ASoC error .*pcm${pcm}" \
&& return 0
return 1
}
restart_user_audio_services() {
maybe systemctl --user restart pipewire pipewire-pulse wireplumber
maybe sleep 2
}
clamp_wireplumber_default_routes_min_volume() {
# WirePlumber persists per-port volumes in ~/.local/state/wireplumber/default-routes.
# On this machine, HDMI ports sometimes get persisted to very low values (e.g. 0.06~0.12),
# which feels like "no sound" after reboot even when routing is correct.
#
# This function is intentionally conservative:
# - only edits lines containing channelVolumes=
# - only bumps values that are >0 and < threshold
local threshold="${1:-0.25}"
local routes="${HOME}/.local/state/wireplumber/default-routes"
[[ -f "$routes" ]] || return 0
# If awk fails for any reason, do nothing (best-effort).
local tmp
tmp="$(mktemp)"
if awk -v th="$threshold" '
function should_bump(line, n, rest, v) {
n = split(line, a, "channelVolumes=")
if (n < 2) return 0
rest = a[2]
sub(/;.*/, "", rest)
v = rest + 0.0
if (v > 0 && v < th) return 1
return 0
}
{
if ($0 ~ /channelVolumes=/ && should_bump($0)) {
sub(/channelVolumes=[0-9.]+;[0-9.]+;/, "channelVolumes=1.0;1.0;")
}
print
}
' "$routes" >"$tmp" 2>/dev/null; then
if ! cmp -s "$routes" "$tmp" 2>/dev/null; then
mv "$tmp" "$routes"
note "Clamped low WirePlumber route volumes in default-routes (threshold ${threshold})"
else
rm -f "$tmp"
fi
else
rm -f "$tmp"
fi
}
verify_one_pcm() {
local pcm="${1:?pcm}"
local since_ts="${2:?since_ts}"
@@ -356,12 +454,23 @@ verify_one_pcm() {
local ok=0
while [[ "$attempt" -le "$max_attempts" ]]; do
note "pw-play attempt ${attempt}/${max_attempts}"
if timeout -k 1s 5s pw-play /usr/share/sounds/alsa/Front_Center.wav; then
timeout -k "${PLAY_KILL_AFTER_S}s" "${PLAY_TIMEOUT_S}s" pw-play /usr/share/sounds/alsa/Front_Center.wav
local rc=$?
if [[ "$rc" -eq 0 ]]; then
note "pw-play: OK (exit 0)"
ok=1
break
fi
warn "pw-play: FAILED (non-zero exit)"
warn "pw-play: FAILED (exit ${rc})"
if [[ "$rc" -eq 124 ]]; then
warn "pw-play timed out (${PLAY_TIMEOUT_S}s). This can indicate a hung open/stream; capture PipeWire errors below."
fi
note "PipeWire (user) log tail since $since_ts"
local u
u="$(user_pipewire_tail_since_ts "$since_ts")"
if [[ -n "$u" ]]; then
printf '%s\n' "$u"
fi
if [[ "$attempt" -lt "$max_attempts" ]]; then
note "Restarting user audio services before retry"
restart_user_audio_services
@@ -478,8 +587,46 @@ verify_audio() {
note "At least one sink attempt completed with no kernel error lines in its window (pcm=${ok_pcm})."
note "Default sink is kept at this successful pcm to preserve working audio."
note "If you still have silence but no kernel errors: focus on routing/monitor input/volume persistence."
return 0
fi
warn "No sink attempt was clean. If windows show ipc tx error -5 / hw_params failures, this points to kernel/SOF."
return 1
}
persist_wireplumber_default_profile_hifi() {
# WirePlumber restores card profile from this file on user-session start.
# If it contains pro-audio, it will override our manual pactl set-card-profile HiFi.
local f="${HOME}/.local/state/wireplumber/default-profile"
[[ -f "$f" ]] || return 0
local tmp
tmp="$(mktemp)"
if awk '
BEGIN { changed=0 }
{
if ($0 ~ /^alsa_card\.pci-0000_00_1f\.3-platform-cml_rt5682_def=/) {
if ($0 !~ /=HiFi$/) { changed=1 }
print "alsa_card.pci-0000_00_1f.3-platform-cml_rt5682_def=HiFi"
next
}
print
}
END {
# If mapping line does not exist, append it.
if (NR > 0 && !seen) {}
}
' "$f" >"$tmp" 2>/dev/null; then
# Ensure the mapping line exists (append if missing)
if ! grep -q '^alsa_card\.pci-0000_00_1f\.3-platform-cml_rt5682_def=' "$tmp" 2>/dev/null; then
printf '\nalsa_card.pci-0000_00_1f.3-platform-cml_rt5682_def=HiFi\n' >>"$tmp"
fi
if ! cmp -s "$f" "$tmp" 2>/dev/null; then
mv "$tmp" "$f"
note "Persisted WirePlumber default-profile to HiFi (${f})"
else
rm -f "$tmp"
fi
else
warn "No sink attempt was clean. If windows show ipc tx error -5 / hw_params failures, this points to kernel/SOF."
rm -f "$tmp"
fi
}
@@ -497,6 +644,9 @@ apply_fix() {
return 1
fi
# Preemptively fix the most common "looks routed but silent after reboot" trap.
clamp_wireplumber_default_routes_min_volume 0.25
note "Restarting user audio services"
restart_user_audio_services
@@ -506,14 +656,24 @@ apply_fix() {
maybe aplay -l
local card
card="$(get_kaisa_card_name)"
card="$(wait_for_card 8 || true)"
if [[ -z "$card" ]]; then
warn "Could not find card name (expected match: cml_rt5682_def). Abort fix."
return 1
fi
note "Forcing profile to HiFi on card: $card"
maybe pactl set-card-profile "$card" HiFi
local target_profile="HiFi"
if ! card_has_profile "$card" "HiFi"; then
warn "Card does not expose HiFi profile in pactl. This violates the 'HiFi-only' requirement."
warn "Fix: ensure UCM overlay is installed and PipeWire enumerates HiFi, then reboot/restart user audio services."
return 1
fi
# Ensure WirePlumber won't revert profile back to pro-audio on restart.
persist_wireplumber_default_profile_hifi
note "Setting card profile to ${target_profile} on card: $card"
maybe pactl set-card-profile "$card" "$target_profile"
maybe sleep 1
local preferred_pcm
@@ -548,6 +708,10 @@ apply_fix() {
local pcm sink iec avail total
for pcm in "${candidates[@]}"; do
if [[ "$pcm" -ne 0 ]] && kernel_pcm_is_bad "$pcm"; then
warn "Skipping pcm=$pcm due to kernel SOF/ASoC failure signature in this boot"
continue
fi
# For HDMI outputs, only try PCMs whose Jack is currently ON.
if [[ "$pcm" -ne 0 ]]; then
if ! wait_for_hdmi_ready_pcm "$pcm" 6; then
@@ -568,9 +732,9 @@ apply_fix() {
sink="alsa_output.pci-0000_00_1f.3-platform-cml_rt5682_def.HiFi__hw_sofrt5682_${pcm}__sink"
if [[ "$pcm" -eq 0 ]]; then
note "Trying fallback Analog (Port1) sink: $sink"
note "Trying fallback Analog (Port1)"
else
note "Trying HDMI pcm=$pcm sink: $sink"
note "Trying HDMI pcm=$pcm"
fi
maybe pactl set-default-sink "$sink"
@@ -659,8 +823,13 @@ if [[ "$fix_only" -eq 1 ]]; then
exit 0
fi
verify_rc=0
if [[ "$verify" -eq 1 ]]; then
verify_audio || true
if [[ "$verify_strict" -eq 1 ]]; then
verify_audio && verify_rc=0 || verify_rc=$?
else
verify_audio || true
fi
fi
sec "Versions (PipeWire / WirePlumber / ALSA utils)"
@@ -776,10 +945,10 @@ echo "Try these instead (they use PipeWire):"
echo
echo "Tip: these are wrapped with a short timeout to avoid hanging."
echo " (uses: timeout -k 1s 5s ... -> TERM then KILL)"
maybe timeout -k 1s 5s speaker-test -D pipewire -c2 -t sine -f 440 -l 1
maybe timeout -k 1s 5s speaker-test -D default -c2 -t sine -f 440 -l 1
maybe timeout -k 1s 5s pw-play /usr/share/sounds/alsa/Front_Center.wav
maybe timeout -k 1s 5s paplay /usr/share/sounds/alsa/Front_Center.wav
maybe timeout -k "${PLAY_KILL_AFTER_S}s" "${PLAY_TIMEOUT_S}s" speaker-test -D pipewire -c2 -t sine -f 440 -l 1
maybe timeout -k "${PLAY_KILL_AFTER_S}s" "${PLAY_TIMEOUT_S}s" speaker-test -D default -c2 -t sine -f 440 -l 1
maybe timeout -k "${PLAY_KILL_AFTER_S}s" "${PLAY_TIMEOUT_S}s" pw-play /usr/share/sounds/alsa/Front_Center.wav
maybe timeout -k "${PLAY_KILL_AFTER_S}s" "${PLAY_TIMEOUT_S}s" paplay /usr/share/sounds/alsa/Front_Center.wav
sec "Recent logs (journalctl --user, current boot)"
maybe journalctl --user -u wireplumber -b --no-pager -n 200
@@ -798,3 +967,6 @@ EOF
hr
echo "Done. Report saved to: $out"
if [[ "$verify" -eq 1 && "$verify_strict" -eq 1 ]]; then
exit "${verify_rc:-1}"
fi

View File

@@ -0,0 +1,134 @@
#!/usr/bin/env bash
# When VERIFY fails: PCI reset (root) + doctor --fix (desktop user), then re-verify.
# Run as your desktop user (same as kaisa-audio-doctor.sh). Reset step needs:
# sudo -n /path/to/reset-sof-hdmi-pci.sh
# (configure NOPASSWD for that absolute path).
#
# Typical:
# ./scripts/kaisa-audio-hdmi-watchdog.sh
# ./scripts/kaisa-audio-hdmi-watchdog.sh --loop --interval 300
#
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
RESET_SCRIPT="${KAISA_RESET_SCRIPT:-${REPO_ROOT}/scripts/reset-sof-hdmi-pci.sh}"
DOCTOR="${REPO_ROOT}/scripts/kaisa-audio-doctor.sh"
LOG_DIR="${KAISA_LOG_DIR:-${REPO_ROOT}/_logs}"
CONNECTED_ARGS=(--only-connected)
LOOPS=1
INTERVAL_SEC="${KAISA_WATCHDOG_INTERVAL:-120}"
LOOP_FOREVER=0
log() { printf '[%s] %s\n' "$(date '+%F %T')" "$*"; }
usage() {
sed -n '1,40p' "$0" | sed -n '/^# /s/^# //p'
cat <<EOF
Options:
--loop Repeat until interrupted (sleep --interval between cycles)
--loops N Run N cycles (default: 1)
--interval SEC Sleep between cycles in --loop mode (default: ${INTERVAL_SEC})
--all-hdmi Do not pass --only-connected (verify/fix may probe pcm2/3/4; riskier)
-h, --help This help
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--loop) LOOP_FOREVER=1; shift ;;
--loops)
LOOPS="${2:?}"
shift 2
;;
--interval)
INTERVAL_SEC="${2:?}"
shift 2
;;
--all-hdmi) CONNECTED_ARGS=(); shift ;;
-h|--help) usage; exit 0 ;;
*)
echo "Unknown option: $1" >&2
usage >&2
exit 2
;;
esac
done
if ! [[ "${LOOPS}" =~ ^[0-9]+$ ]] || [[ "${LOOPS}" -lt 1 ]]; then
echo "ERROR: --loops must be a positive integer" >&2
exit 2
fi
if [[ "${EUID}" -eq 0 ]]; then
echo "ERROR: run as desktop user, not root (doctor --fix needs user session)." >&2
exit 1
fi
if [[ ! -x "${DOCTOR}" ]]; then
echo "ERROR: missing doctor: ${DOCTOR}" >&2
exit 1
fi
if [[ ! -f "${RESET_SCRIPT}" ]]; then
echo "ERROR: missing reset script: ${RESET_SCRIPT}" >&2
exit 1
fi
mkdir -p "${LOG_DIR}"
run_one_cycle() {
local cycle="${1:?}"
local ts
ts="$(date +%Y%m%d_%H%M%S)"
local v1 f1 v2
v1="${LOG_DIR}/kaisa-watchdog_${ts}_c${cycle}_verify.log"
f1="${LOG_DIR}/kaisa-watchdog_${ts}_c${cycle}_fix.log"
v2="${LOG_DIR}/kaisa-watchdog_${ts}_c${cycle}_reverify.log"
log "cycle ${cycle}: verify -> ${v1}"
if "${DOCTOR}" --verify "${CONNECTED_ARGS[@]}" --verify-strict -o "${v1}"; then
log "cycle ${cycle}: verify OK"
return 0
fi
log "cycle ${cycle}: verify FAILED -> sudo -n reset (${RESET_SCRIPT})"
if ! sudo -n "${RESET_SCRIPT}"; then
log "ERROR: sudo -n reset failed (password / NOPASSWD path?)"
return 1
fi
log "cycle ${cycle}: doctor --fix -> ${f1}"
"${DOCTOR}" --fix "${CONNECTED_ARGS[@]}" -o "${f1}" || true
log "cycle ${cycle}: re-verify -> ${v2}"
if "${DOCTOR}" --verify "${CONNECTED_ARGS[@]}" --verify-strict -o "${v2}"; then
log "cycle ${cycle}: re-verify OK after reset+fix"
return 0
fi
log "cycle ${cycle}: re-verify still FAILED (see ${v2})"
return 1
}
cycle=0
while true; do
cycle=$((cycle + 1))
if run_one_cycle "${cycle}"; then
:
else
log "cycle ${cycle}: recovery did not achieve clean verify"
fi
if [[ "${LOOP_FOREVER}" -eq 0 ]]; then
if [[ "${cycle}" -ge "${LOOPS}" ]]; then
break
fi
fi
if [[ "${LOOP_FOREVER}" -eq 1 ]] || [[ "${cycle}" -lt "${LOOPS}" ]]; then
log "sleep ${INTERVAL_SEC}s before next cycle"
sleep "${INTERVAL_SEC}"
fi
done
log "watchdog finished (${cycle} cycle(s))"

115
scripts/reset-sof-hdmi-pci.sh Executable file
View File

@@ -0,0 +1,115 @@
#!/usr/bin/env bash
# Reset Intel SOF audio PCI driver (sof-audio-pci-intel-cnl) for 0000:00:1f.3
# Goal: recover from SOF/HDA DMA channel exhaustion ("hda-dmac: ... no free channel")
# that manifests as HDMI hw_params failures / ipc tx error -5 in kernel logs.
#
# IMPORTANT:
# - This script is intended to be run as root (sudo).
# - For unattended automation from tools without a TTY, add a single sudoers NOPASSWD entry, e.g.:
# jack ALL=(root) NOPASSWD: /home/jack/文档/chromebox_10th_audio_driver/scripts/reset-sof-hdmi-pci.sh
#
# Usage:
# sudo bash scripts/reset-sof-hdmi-pci.sh
# sudo bash scripts/reset-sof-hdmi-pci.sh --verify-pcm2
#
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
DOCTOR="${REPO_ROOT}/scripts/kaisa-audio-doctor.sh"
DEV_PCI="${DEV_PCI:-0000:00:1f.3}"
DRIVER_NAME="${DRIVER_NAME:-sof-audio-pci-intel-cnl}"
DRIVER_SYSFS="/sys/bus/pci/drivers/${DRIVER_NAME}"
DESKTOP_USER="${DESKTOP_USER:-jack}"
VERIFY_PCM2=0
if [[ "${1:-}" == "--verify-pcm2" ]]; then
VERIFY_PCM2=1
fi
log() { printf '[%s] %s\n' "$(date '+%F %T')" "$*"; }
require_root() {
if [[ "${EUID}" -ne 0 ]]; then
echo "ERROR: run as root: sudo bash $0" >&2
exit 1
fi
}
mount_debugfs_if_needed() {
if mountpoint -q /sys/kernel/debug; then
log "debugfs already mounted"
return 0
fi
log "mounting debugfs on /sys/kernel/debug"
mount -t debugfs none /sys/kernel/debug
}
stop_user_audio() {
log "stopping user audio services for ${DESKTOP_USER}"
# Use sudo -u to target the desktop user's systemd --user session.
# This requires root privileges (this script runs as root).
sudo -u "${DESKTOP_USER}" XDG_RUNTIME_DIR="/run/user/$(id -u "${DESKTOP_USER}")" \
systemctl --user stop wireplumber pipewire-pulse pipewire || true
sleep 1
}
start_user_audio() {
log "starting user audio services for ${DESKTOP_USER}"
sudo -u "${DESKTOP_USER}" XDG_RUNTIME_DIR="/run/user/$(id -u "${DESKTOP_USER}")" \
systemctl --user start pipewire pipewire-pulse wireplumber || true
sleep 2
}
pci_reset() {
if [[ ! -d "${DRIVER_SYSFS}" ]]; then
echo "ERROR: driver sysfs dir missing: ${DRIVER_SYSFS}" >&2
ls -la /sys/bus/pci/drivers | grep -i sof || true
exit 1
fi
log "unbinding ${DEV_PCI} from ${DRIVER_NAME}"
echo "${DEV_PCI}" >"${DRIVER_SYSFS}/unbind"
sleep 1
log "binding ${DEV_PCI} to ${DRIVER_NAME}"
echo "${DEV_PCI}" >"${DRIVER_SYSFS}/bind"
sleep 2
}
tail_kernel_hints() {
log "kernel hints (last ~120 matching lines)"
journalctl -k -b --no-pager 2>/dev/null | \
grep -nE 'sof-audio-pci-intel-cnl|Firmware file:|Topology file:|Firmware info: version|ipc tx error|sof_ipc3_pcm_hw_params|hda-dmac|dma_channel_get|ASoC error|HDMI[0-9]|pcm[0-9]+' | \
tail -n 120 || true
}
maybe_verify() {
if [[ "${VERIFY_PCM2}" -ne 1 ]]; then
return 0
fi
if [[ ! -x "${DOCTOR}" ]]; then
echo "WARN: doctor script missing: ${DOCTOR}" >&2
return 0
fi
log "running doctor verify (pcm2) as ${DESKTOP_USER}"
sudo -u "${DESKTOP_USER}" XDG_RUNTIME_DIR="/run/user/$(id -u "${DESKTOP_USER}")" \
bash -lc "cd '${REPO_ROOT}' && ./scripts/kaisa-audio-doctor.sh --verify --only-pcm 2" || true
}
main() {
require_root
mount_debugfs_if_needed
stop_user_audio
pci_reset
start_user_audio
tail_kernel_hints
maybe_verify
log "done"
}
main "$@"

View File

@@ -0,0 +1,172 @@
#!/usr/bin/env bash
set -euo pipefail
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
LOG_DIR="${REPO_ROOT}/_logs"
SOF_BIN_ROOT="${SOF_BIN_ROOT:-/var/tmp/sof-bin-upgrade/sof-bin-2025.12.2}"
SOF_LOGGER="${SOF_LOGGER:-${SOF_BIN_ROOT}/tools/sof-logger}"
SOF_LDC_DEFAULT="${SOF_LDC_DEFAULT:-${SOF_BIN_ROOT}/sof/sof-cml.ldc}"
mkdir -p "${LOG_DIR}"
ts="$(date +%Y%m%d_%H%M%S)"
start_ts_human="$(date '+%F %T')"
sof_log="${LOG_DIR}/sof-logger_${ts}.log"
kernel_log="${LOG_DIR}/kernel_sof_ipc_${ts}.log"
pipewire_log="${LOG_DIR}/user_pipewire_${ts}.log"
sof_debug_snapshot="${LOG_DIR}/sof-debugfs_${ts}.txt"
echo "[INFO] repo=${REPO_ROOT}"
echo "[INFO] log_dir=${LOG_DIR}"
echo "[INFO] start_ts=${start_ts_human}"
echo
if [[ "${EUID}" -ne 0 ]]; then
echo "[ERROR] 请用 sudo 运行sudo bash scripts/sof-trace-capture.sh" >&2
exit 1
fi
echo "[STEP] 1) 确保 debugfs 已挂载 (/sys/kernel/debug)"
if ! mountpoint -q /sys/kernel/debug; then
mount -t debugfs none /sys/kernel/debug
fi
if [[ ! -d /sys/kernel/debug ]]; then
echo "[ERROR] /sys/kernel/debug 不存在?" >&2
exit 1
fi
echo "[INFO] debugfs: OK"
echo
echo "[STEP] 2) 检查 sof-logger 工具"
if [[ ! -x "${SOF_LOGGER}" ]]; then
echo "[ERROR] 找不到可执行的 sof-logger" >&2
echo " ${SOF_LOGGER}" >&2
echo " 可通过环境变量覆盖SOF_BIN_ROOT 或 SOF_LOGGER" >&2
exit 1
fi
echo "[INFO] sof-logger=${SOF_LOGGER}"
echo
echo "[STEP] 2.1) 确认 SOF dictionary (.ldc)"
SOF_LDC="${SOF_LDC:-${SOF_LDC_DEFAULT}}"
if [[ ! -e "${SOF_LDC}" ]]; then
echo "[ERROR] 未找到 SOF .ldc 文件:" >&2
echo " ${SOF_LDC}" >&2
echo " 可通过环境变量覆盖SOF_LDC" >&2
exit 1
fi
echo "[INFO] SOF_LDC=${SOF_LDC}"
echo
echo "[STEP] 2.2) 确认 debugfs 下的 SOF trace 节点"
# We expect one of these to exist when SOF is running.
SOF_TRACE_NODE=""
SOF_TRACE_MODE=""
for p in /sys/kernel/debug/sof/trace /sys/kernel/debug/sof/etrace; do
if [[ -e "$p" ]]; then
SOF_TRACE_NODE="$p"
if [[ "$p" == */trace ]]; then
SOF_TRACE_MODE="trace"
else
SOF_TRACE_MODE="etrace"
fi
break
fi
done
if [[ -z "${SOF_TRACE_NODE}" ]]; then
echo "[ERROR] 未找到 /sys/kernel/debug/sof/{trace,etrace}(可能权限或内核配置问题)" >&2
echo " 你可以先用 root 查看ls -la /sys/kernel/debug/sof" >&2
exit 1
fi
echo "[INFO] SOF_TRACE_NODE=${SOF_TRACE_NODE}"
echo "[INFO] SOF_TRACE_MODE=${SOF_TRACE_MODE}"
echo
echo "[STEP] 2.3) 记录 SOF debugfs 快照(用于后续定位)"
{
echo "=== snapshot ts=${start_ts_human} ==="
echo "SOF_TRACE_NODE=${SOF_TRACE_NODE}"
echo "SOF_TRACE_MODE=${SOF_TRACE_MODE}"
echo
echo "## ls -la /sys/kernel/debug/sof"
ls -la /sys/kernel/debug/sof 2>&1 || true
echo
for f in fw_version dsp_state ipc4_fw_status ipc4_bldr_status; do
if [[ -e "/sys/kernel/debug/sof/${f}" ]]; then
echo "## cat /sys/kernel/debug/sof/${f}"
cat "/sys/kernel/debug/sof/${f}" 2>&1 || true
echo
fi
done
} > "${sof_debug_snapshot}" 2>&1 || true
echo "[DONE] sof debugfs snapshot: ${sof_debug_snapshot}"
cleanup() {
echo
echo "[STEP] 4) 收集日志窗口 (since ${start_ts_human})"
# Kernel: capture SOF/ASoC/HDMI related lines since start
journalctl -k -b --since "${start_ts_human}" --no-pager 2>/dev/null | \
grep -nE 'sof-audio|snd_sof|sof_ipc3_pcm_hw_params|ipc tx error|STREAM_PCM_PARAMS|ASoC error|set_hw_params|HDMI[0-9]|pcm[0-9]+' \
> "${kernel_log}" || true
# User PipeWire: capture since start
journalctl --user -u pipewire -b --since "${start_ts_human}" --no-pager 2>/dev/null \
> "${pipewire_log}" || true
echo "[DONE] sof-logger: ${sof_log}"
echo "[DONE] kernel window: ${kernel_log}"
echo "[DONE] pipewire window: ${pipewire_log}"
echo
echo "[NEXT] 请把这三份日志发我分析:"
echo " - ${sof_log}"
echo " - ${kernel_log}"
echo " - ${pipewire_log}"
}
trap cleanup INT TERM EXIT
echo
echo "[STEP] 3) 开始抓取 SOF trace前台运行按 Ctrl+C 结束并收集窗口日志)"
echo "[ACTION] 现在请在另一个终端复现一次问题(例如触发 HDMI pcm2 的播放/切换)。"
echo
echo "[INFO] capturing to: ${sof_log}"
echo
# sof-logger 参数说明(关键):
# -l: 传入 .ldc dictionary不是输出文件
# -t: 选择 DMA trace stream会强制读取 /sys/kernel/debug/sof/trace
# -i/-o: 将输入流解码输出到文件
echo "[INFO] NOTE: etrace 通常是“读完当前缓冲就退出”,所以这里使用循环追加,直到 Ctrl+C。"
echo
touch "${sof_log}"
echo "=== sof-trace-capture start ${start_ts_human} ===" >> "${sof_log}"
capture_once() {
local tmp="${LOG_DIR}/.sof-logger_${ts}.tmp"
rm -f "${tmp}"
if [[ "${SOF_TRACE_MODE}" == "trace" ]]; then
"${SOF_LOGGER}" -l "${SOF_LDC}" -t -i "${SOF_TRACE_NODE}" -o "${tmp}" || true
else
# etrace mailbox: do NOT pass -t, otherwise sof-logger will try /sys/kernel/debug/sof/trace
"${SOF_LOGGER}" -l "${SOF_LDC}" -i "${SOF_TRACE_NODE}" -o "${tmp}" || true
fi
if [[ -s "${tmp}" ]]; then
{
echo
echo "=== sof-logger chunk @ $(date '+%F %T') ==="
cat "${tmp}"
} >> "${sof_log}"
fi
rm -f "${tmp}"
}
while true; do
capture_once
sleep 0.2
done

159
scripts/upgrade-sof-cml.sh Normal file
View File

@@ -0,0 +1,159 @@
#!/usr/bin/env bash
set -euo pipefail
echo "==== 1) 当前内核中的 SOF 固件 / 拓扑信息 ===="
journalctl -k -b | grep -nE 'Firmware file:|Topology file:|Firmware info: version' || true
echo
# 确保用 root 运行
if [[ $EUID -ne 0 ]]; then
echo "请用 sudo 运行本脚本sudo bash $0"
exit 1
fi
echo "==== 2) 备份当前 SOF 固件和拓扑 ===="
BACKUP_FW_DIR="/lib/firmware/intel/sof-backup-kaisa-20260416"
BACKUP_TPLG_DIR="/lib/firmware/intel/sof-tplg-backup-kaisa-20260416"
mkdir -p "$BACKUP_FW_DIR"
mkdir -p "$BACKUP_TPLG_DIR"
if [[ -f /lib/firmware/intel/sof/community/sof-cml.ri ]]; then
cp -a /lib/firmware/intel/sof/community/sof-cml.ri "$BACKUP_FW_DIR"/
echo "已备份 sof-cml.ri 到 $BACKUP_FW_DIR/"
else
echo "警告:未找到 /lib/firmware/intel/sof/community/sof-cml.ri"
fi
if [[ -f /lib/firmware/intel/sof-tplg/sof-cml-rt5682.tplg ]]; then
cp -a /lib/firmware/intel/sof-tplg/sof-cml-rt5682.tplg "$BACKUP_TPLG_DIR"/
echo "已备份 sof-cml-rt5682.tplg 到 $BACKUP_TPLG_DIR/"
else
echo "警告:未找到 /lib/firmware/intel/sof-tplg/sof-cml-rt5682.tplg"
fi
echo
echo "==== 2.1) 校验备份是否落盘sha256 ===="
if [[ -f "$BACKUP_FW_DIR/sof-cml.ri" ]]; then
sha256sum /lib/firmware/intel/sof/community/sof-cml.ri "$BACKUP_FW_DIR/sof-cml.ri" || true
else
echo "错误:未生成备份文件 $BACKUP_FW_DIR/sof-cml.ri中止避免覆盖后无法回滚"
exit 1
fi
if [[ -f "$BACKUP_TPLG_DIR/sof-cml-rt5682.tplg" ]]; then
sha256sum /lib/firmware/intel/sof-tplg/sof-cml-rt5682.tplg "$BACKUP_TPLG_DIR/sof-cml-rt5682.tplg" || true
else
echo "错误:未生成备份文件 $BACKUP_TPLG_DIR/sof-cml-rt5682.tplg中止避免覆盖后无法回滚"
exit 1
fi
echo
echo "==== 3) 下载并解压 sof-bin 2025.12.2 ===="
WORKDIR="/var/tmp/sof-bin-upgrade"
mkdir -p "$WORKDIR"
cd "$WORKDIR"
if [[ ! -f sof-bin-2025.12.2.tar.gz ]]; then
echo "下载 sof-bin-2025.12.2.tar.gz ..."
wget https://github.com/thesofproject/sof-bin/releases/download/v2025.12.2/sof-bin-2025.12.2.tar.gz
else
echo "已存在 sof-bin-2025.12.2.tar.gz跳过下载。"
fi
echo "解析 tarball 顶层目录..."
# 注意:脚本启用了 pipefail直接用 `tar ... | head -n1` 会触发 tar 的 SIGPIPE (exit 141),导致脚本中止。
# 所以先将列表落盘,再读取第一行。
TAR_LIST_FILE="$WORKDIR/sof-bin-2025.12.2.tar.list"
tar tf sof-bin-2025.12.2.tar.gz > "$TAR_LIST_FILE"
TOPDIR="$(head -n 1 "$TAR_LIST_FILE" | cut -d/ -f1)"
if [[ -z "${TOPDIR}" ]]; then
echo "错误:无法解析 tarball 顶层目录TOPDIR 为空)"
exit 1
fi
echo "tarball 顶层目录:$TOPDIR"
if [[ ! -d "$TOPDIR" ]]; then
echo "解压 sof-bin-2025.12.2.tar.gz 到 $WORKDIR/$TOPDIR ..."
tar xf sof-bin-2025.12.2.tar.gz
else
echo "已存在目录 $TOPDIR,跳过解压。"
fi
cd "$TOPDIR"
echo
echo "==== 4) 查找 sof-cml.ri 和 sof-cml-rt5682*.tplg ===="
# 重要:同一个 tarball 里可能同时存在:
# - ./sof/sof-cml.ri通常指向 intel-signed 版本或与旧版本相同)
# - ./sof/community/sof-cml.ri我们系统实际加载的路径也是 intel/sof/community/sof-cml.ri
# 为避免误选,优先选择 community 目录的固件。
FW_CML_PATH=$(
find ./sof/community -maxdepth 1 \( -type f -o -type l \) -name 'sof-cml.ri' 2>/dev/null | head -n 1 || true
)
if [[ -z "${FW_CML_PATH}" ]]; then
FW_CML_PATH=$(find . \( -type f -o -type l \) -name 'sof-cml.ri' | head -n 1 || true)
fi
TPLG_RT5682_PATH=$(
find ./sof-tplg -maxdepth 1 -type f -name 'sof-cml-rt5682.tplg' 2>/dev/null | head -n 1 || true
)
if [[ -z "${TPLG_RT5682_PATH}" ]]; then
TPLG_RT5682_PATH=$(find . -type f -name 'sof-cml-rt5682.tplg' | head -n 1 || true)
fi
echo "找到的 sof-cml.ri 路径: $FW_CML_PATH"
echo "找到的 sof-cml-rt5682*.tplg 路径: $TPLG_RT5682_PATH"
echo
if [[ -z "$FW_CML_PATH" || -z "$TPLG_RT5682_PATH" ]]; then
echo "错误:未能在 sof-bin 包内找到 sof-cml.ri 或 sof-cml-rt5682*.tplg"
exit 1
fi
echo "==== 5) 用新固件 / 拓扑 覆盖系统文件 ===="
cp -L "$FW_CML_PATH" /lib/firmware/intel/sof/community/sof-cml.ri
echo "已覆盖 /lib/firmware/intel/sof/community/sof-cml.ri"
cp -L "$TPLG_RT5682_PATH" /lib/firmware/intel/sof-tplg/sof-cml-rt5682.tplg
echo "已覆盖 /lib/firmware/intel/sof-tplg/sof-cml-rt5682.tplg"
echo
echo "==== 5.1) 覆盖后校验sha256 / 文件大小) ===="
sha256sum \
/lib/firmware/intel/sof/community/sof-cml.ri \
/lib/firmware/intel/sof-tplg/sof-cml-rt5682.tplg || true
ls -la \
/lib/firmware/intel/sof/community/sof-cml.ri \
/lib/firmware/intel/sof-tplg/sof-cml-rt5682.tplg || true
echo
echo "提示:如果你想确认 tarball 里选中的文件 hash可在工作目录下执行"
echo " sha256sum \"$FW_CML_PATH\" \"$TPLG_RT5682_PATH\""
echo
echo "==== 6) 更新 initramfs可能需要几秒 ===="
update-initramfs -u
echo
echo "==== 7) 升级完成,当前系统中的固件信息 (下次重启后再确认一次) ===="
journalctl -k -b | grep -nE 'Firmware file:|Topology file:|Firmware info: version' || true
echo
echo "步骤完成:"
echo "1) 已备份旧版本到:"
echo " - $BACKUP_FW_DIR/sof-cml.ri"
echo " - $BACKUP_TPLG_DIR/sof-cml-rt5682.tplg"
echo "2) 已将 sof-bin 2025.12.2 中的 sof-cml.ri / sof-cml-rt5682.tplg 覆盖到系统目录。"
echo
echo "下一步:请手动执行 reboot然后重启回来后用以下命令确认新固件版本"
echo " journalctl -k -b | grep -nE 'Firmware file:|Topology file:|Firmware info: version'"
echo
echo "如需回滚,可将备份文件拷回原路径后再次执行 update-initramfs -u。"