/* * Project Acrn * Acrn-dm-monitor * * Copyright (C) 2018 Intel Corporation. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause * * * Author: TaoYuhong */ #include #include #include #include #include #include #include #include "dm.h" #include "dm_string.h" #include "monitor.h" #include "acrn_mngr.h" #include "pm.h" #include "vmmapi.h" #include "log.h" #define INTR_STORM_MONITOR_PERIOD 10 /* 10 seconds */ #define INTR_STORM_THRESHOLD 100000 /* 10K times per second */ #define DELAY_INTR_TIME 1 /* 1ms */ #define DELAY_DURATION 100000 /* 100ms of total duration for delay intr */ #define TIME_TO_CHECK_AGAIN 2 /* 2seconds */ struct intr_monitor_setting_t { bool enable; uint32_t threshold; /* intr count in probe_period when intr storm happens */ uint32_t probe_period; /* seconds: the period to probe intr data */ uint32_t delay_time; /* ms: the time to delay each intr injection */ uint32_t delay_duration; /* us: the delay duration, after it, intr injection restore to normal */ }; union intr_monitor_t { struct acrn_intr_monitor monitor; char reserved[4096]; } __aligned(4096); static union intr_monitor_t intr_data; static uint64_t intr_cnt_buf[MAX_PTDEV_NUM * 2]; static pthread_t intr_storm_monitor_pid; static struct intr_monitor_setting_t intr_monitor_setting = { .enable = false, }; /* switch macro, just open in debug */ /* #define INTR_MONITOR_DBG */ #ifdef INTR_MONITOR_DBG static FILE * dbg_file; #define DPRINTF(format, args...) \ do { fprintf(dbg_file, format, args); fflush(dbg_file); } while (0) /* this is a debug function */ static void write_intr_data_to_file(const struct acrn_intr_monitor *hdr) { static int wr_cnt; int j; wr_cnt++; fprintf(dbg_file, "\n==%d time devs=%d==\n", wr_cnt, hdr->buf_cnt / 2); fprintf(dbg_file, "IRQ\t\tCount\n"); for (j = 0; j < hdr->buf_cnt; j += 2) { if (hdr->buffer[j + 1] != 0) { fprintf(dbg_file, "%ld\t\t%ld\n", hdr->buffer[j], hdr->buffer[j + 1]); } } fflush(dbg_file); } #else #define DPRINTF(format, arg...) #endif static void *intr_storm_monitor_thread(void *arg) { struct vmctx *ctx = (struct vmctx *)arg; struct acrn_intr_monitor *hdr = &intr_data.monitor; uint64_t delta = 0UL; int ret, i; #ifdef INTR_MONITOR_DBG dbg_file = fopen("/tmp/intr_log", "w+"); #endif sleep(intr_monitor_setting.probe_period); /* first to get interrupt data */ hdr->cmd = INTR_CMD_GET_DATA; hdr->buf_cnt = MAX_PTDEV_NUM * 2; memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt); ret = vm_intr_monitor(ctx, hdr); if (ret) { DPRINTF("first get intr data failed, ret: %d\n", ret); intr_storm_monitor_pid = 0; return NULL; } while (1) { #ifdef INTR_MONITOR_DBG write_intr_data_to_file(hdr); #endif memcpy(intr_cnt_buf, hdr->buffer, sizeof(uint64_t) * hdr->buf_cnt); sleep(intr_monitor_setting.probe_period); /* next time to get interrupt data */ memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt); ret = vm_intr_monitor(ctx, hdr); if (ret) { pr_err("next get intr data failed, ret: %d\n", ret); intr_storm_monitor_pid = 0; break; } /* * calc the delta of the two times count of interrupt; * compare the IRQ num first, if not same just drop it, * for it just happens rarelly when devices dynamically * allocation in SOS or UOS, it can be calc next time */ for (i = 0; i < hdr->buf_cnt; i += 2) { if (hdr->buffer[i] != intr_cnt_buf[i]) continue; /* avoid delta overflow */ if (hdr->buffer[i + 1] < intr_cnt_buf[i + 1]) continue; delta = hdr->buffer[i + 1] - intr_cnt_buf[i + 1]; if (delta > intr_monitor_setting.threshold) { #ifdef INTR_MONITOR_DBG write_intr_data_to_file(hdr); #endif break; } } /* storm detected, handle the intr abnormal status */ if (i < hdr->buf_cnt) { pr_notice("irq=%ld, delta=%ld\n", intr_cnt_buf[i], delta); hdr->cmd = INTR_CMD_DELAY_INT; hdr->buffer[0] = intr_monitor_setting.delay_time; vm_intr_monitor(ctx, hdr); usleep(intr_monitor_setting.delay_duration); /* sleep-delay intr */ hdr->buffer[0] = 0; /* cancel to delay intr */ vm_intr_monitor(ctx, hdr); sleep(TIME_TO_CHECK_AGAIN); /* time to get data again */ hdr->cmd = INTR_CMD_GET_DATA; hdr->buf_cnt = MAX_PTDEV_NUM * 2; memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt); vm_intr_monitor(ctx, hdr); } } return NULL; } static void start_intr_storm_monitor(struct vmctx *ctx) { if (intr_monitor_setting.enable) { int ret = pthread_create(&intr_storm_monitor_pid, NULL, intr_storm_monitor_thread, ctx); if (ret) { pr_err("failed %s %d\n", __func__, __LINE__); intr_storm_monitor_pid = 0; } pthread_setname_np(intr_storm_monitor_pid, "storm_monitor"); pr_info("start monitor interrupt data...\n"); } } static void stop_intr_storm_monitor(void) { if (intr_storm_monitor_pid) { void *ret; pthread_cancel(intr_storm_monitor_pid); pthread_join(intr_storm_monitor_pid, &ret); intr_storm_monitor_pid = 0; } } /* .* interrupt monitor setting params, current interrupt mitigation will delay UOS's .* pass-through devices' interrupt injection, the settings input from acrn-dm: .* params: .* threshold: each intr count/second when intr storm happens; .* probe_period: seconds -- the period to probe intr data; .* delay_time: ms -- the time to delay each intr injection; * delay_duration; us -- the delay duration, after it, intr injection restore to normal .*/ int acrn_parse_intr_monitor(const char *opt) { uint32_t threshold, period, delay, duration; char *cp; if((!dm_strtoui(opt, &cp, 10, &threshold) && *cp == ',') && (!dm_strtoui(cp + 1, &cp, 10, &period) && *cp == ',') && (!dm_strtoui(cp + 1, &cp, 10, &delay) && *cp == ',') && (!dm_strtoui(cp + 1, &cp, 10, &duration))) { pr_dbg("interrupt storm monitor params: %d, %d, %d, %d\n", threshold, period, delay, duration); } else { pr_err("%s: not correct, it should be like: --intr_monitor 10000,10,1,100, please check!\n", opt); return -1; } intr_monitor_setting.enable = true; intr_monitor_setting.threshold = threshold * period; intr_monitor_setting.probe_period = period; intr_monitor_setting.delay_time = delay; intr_monitor_setting.delay_duration = duration * 1000; return 0; } struct vm_ops { char name[16]; void *arg; struct monitor_vm_ops *ops; LIST_ENTRY(vm_ops) list; }; static unsigned wakeup_reason = 0; unsigned get_wakeup_reason(void) { return wakeup_reason; } int set_wakeup_timer(time_t t) { int acrnd_fd; struct mngr_msg req; struct mngr_msg ack; int ret; acrnd_fd = mngr_open_un("acrnd", MNGR_CLIENT); if (acrnd_fd < 0) { return -1; } req.magic = MNGR_MSG_MAGIC; req.msgid = ACRND_TIMER; req.timestamp = time(NULL); req.data.rtc_timer.t = t; strncpy(req.data.rtc_timer.vmname, vmname, sizeof(req.data.rtc_timer.vmname)); memset(&ack, 0, sizeof(struct mngr_msg)); ret = mngr_send_msg(acrnd_fd, &req, &ack, 2); mngr_close(acrnd_fd); if (ret != sizeof(ack)) { pr_err("%s %d\r\n", __func__, __LINE__); return -1; } return ack.data.err; } static LIST_HEAD(vm_ops_list, vm_ops) vm_ops_head; static pthread_mutex_t vm_ops_mtx = PTHREAD_MUTEX_INITIALIZER; int monitor_register_vm_ops(struct monitor_vm_ops *mops, void *arg, const char *name) { struct vm_ops *ops; if (!mops) { pr_err("%s %d\r\n", __func__, __LINE__); return -1; } ops = calloc(1, sizeof(*ops)); if (!ops) { pr_err("Alloc ops"); return -1; } if (name) strncpy(ops->name, name, sizeof(ops->name) - 1); ops->ops = mops; ops->arg = arg; pthread_mutex_lock(&vm_ops_mtx); LIST_INSERT_HEAD(&vm_ops_head, ops, list); pthread_mutex_unlock(&vm_ops_mtx); return 0; } static int monitor_fd = -1; /* handlers */ #define ACK_TIMEOUT 1 #define DEFINE_HANDLER(name, func) \ static void name(struct mngr_msg *msg, int client_fd, void *param) \ { \ struct mngr_msg ack; \ struct vm_ops *ops; \ \ int ret = 0; \ int count = 0; \ \ ack.magic = MNGR_MSG_MAGIC; \ ack.msgid = msg->msgid; \ ack.timestamp = msg->timestamp; \ \ LIST_FOREACH(ops, &vm_ops_head, list) { \ if (ops->ops->func) { \ ret += ops->ops->func(ops->arg); \ count++; \ } \ } \ \ if (!count) { \ ack.data.err = -1; \ pr_err("No handler for id:%u\r\n", msg->msgid); \ } else \ ack.data.err = ret; \ \ mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT); \ } DEFINE_HANDLER(handle_suspend, suspend); static void handle_stop(struct mngr_msg *msg, int client_fd, void *param) { struct mngr_msg ack; struct vm_ops *ops; int ret = 0; int count = 0; ack.magic = MNGR_MSG_MAGIC; ack.msgid = msg->msgid; ack.timestamp = msg->timestamp; if (msg->data.acrnd_stop.force && !is_rtvm) { pr_info("%s: setting VM state to %s\n", __func__, vm_state_to_str(VM_SUSPEND_POWEROFF)); vm_set_suspend_mode(VM_SUSPEND_POWEROFF); ack.data.err = 0; } else { LIST_FOREACH(ops, &vm_ops_head, list) { if (ops->ops->stop) { ret += ops->ops->stop(ops->arg); count++; } } if (!count) { ack.data.err = -1; pr_err("No handler for id:%u\r\n", msg->msgid); } else ack.data.err = ret; } mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT); } static void handle_resume(struct mngr_msg *msg, int client_fd, void *param) { struct mngr_msg ack; struct vm_ops *ops; int ret = 0; int count = 0; ack.magic = MNGR_MSG_MAGIC; ack.msgid = msg->msgid; ack.timestamp = msg->timestamp; wakeup_reason = msg->data.reason; LIST_FOREACH(ops, &vm_ops_head, list) { if (ops->ops->resume) { ret += ops->ops->resume(ops->arg); count++; } } if (!count) { ack.data.err = -1; pr_err("No handler for id:%u\r\n", msg->msgid); } else ack.data.err = ret; mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT); } static void handle_query(struct mngr_msg *msg, int client_fd, void *param) { struct mngr_msg ack; struct vm_ops *ops; ack.magic = MNGR_MSG_MAGIC; ack.msgid = msg->msgid; ack.timestamp = msg->timestamp; ack.data.state = -1; LIST_FOREACH(ops, &vm_ops_head, list) { if (ops->ops->query) { ack.data.state = ops->ops->query(ops->arg); break; } } mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT); } static void handle_blkrescan(struct mngr_msg *msg, int client_fd, void *param) { struct mngr_msg ack; struct vm_ops *ops; int ret = 0; int count = 0; ack.magic = MNGR_MSG_MAGIC; ack.msgid = msg->msgid; ack.timestamp = msg->timestamp; wakeup_reason = msg->data.reason; LIST_FOREACH(ops, &vm_ops_head, list) { if (ops->ops->rescan) { ret += ops->ops->rescan(ops->arg, msg->data.devargs); count++; } } if (!count) { ack.data.err = -1; pr_err("No handler for id:%u\r\n", msg->msgid); } else ack.data.err = ret; mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT); } static struct monitor_vm_ops pmc_ops = { .stop = NULL, .resume = vm_monitor_resume, .suspend = NULL, .pause = NULL, .unpause = NULL, .query = vm_monitor_query, }; int monitor_init(struct vmctx *ctx) { int ret; char path[128] = {}; ret = check_dir(ACRN_DM_BASE_PATH, CHK_CREAT); if (ret) { pr_err("%s %d\r\n", __func__, __LINE__); goto dir_err; } ret = check_dir(ACRN_DM_SOCK_PATH, CHK_CREAT); if (ret) { pr_err("%s %d\r\n", __func__, __LINE__); goto dir_err; } snprintf(path, sizeof(path) - 1, "%s.monitor", vmname); monitor_fd = mngr_open_un(path, MNGR_SERVER); if (monitor_fd < 0) { pr_err("%s %d\r\n", __func__, __LINE__); goto server_err; } ret = 0; ret += mngr_add_handler(monitor_fd, DM_STOP, handle_stop, NULL); ret += mngr_add_handler(monitor_fd, DM_SUSPEND, handle_suspend, NULL); ret += mngr_add_handler(monitor_fd, DM_RESUME, handle_resume, NULL); ret += mngr_add_handler(monitor_fd, DM_QUERY, handle_query, NULL); ret += mngr_add_handler(monitor_fd, DM_BLKRESCAN, handle_blkrescan, NULL); if (ret) { pr_err("%s %d\r\n", __func__, __LINE__); goto handlers_err; } monitor_register_vm_ops(&pmc_ops, ctx, "PMC_VM_OPs"); start_intr_storm_monitor(ctx); return 0; handlers_err: mngr_close(monitor_fd); monitor_fd = -1; server_err: dir_err: return -1; } void monitor_close(void) { if (monitor_fd >= 0) mngr_close(monitor_fd); stop_intr_storm_monitor(); }