Discussion:
[libvirt] [PATCHv8 02/17] util: Introduce resctrl monitor for CMT
Wang Huaqiang
2018-11-12 13:31:33 UTC
Permalink
Cache Monitoring Technology (aka CMT) provides the capability
to report cache utilization information of system task.

This patch introduces the concept of resctrl monitor through
data structure virResctrlMonitor.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/libvirt_private.syms | 1 +
src/util/virresctrl.c | 80 ++++++++++++++++++++++++++++++++++++++++++++----
src/util/virresctrl.h | 9 ++++++
3 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 335210c..d2573c5 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2680,6 +2680,7 @@ virResctrlInfoGetCache;
virResctrlInfoGetMonitorPrefix;
virResctrlInfoMonFree;
virResctrlInfoNew;
+virResctrlMonitorNew;


# util/virrotatingfile.h
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 7339e9b..e3c84a3 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -36,9 +36,9 @@ VIR_LOG_INIT("util.virresctrl")


/* Resctrl is short for Resource Control. It might be implemented for various
- * resources. Currently this supports cache allocation technology (aka CAT) and
- * memory bandwidth allocation (aka MBA). More resources technologies may be
- * added in the future.
+ * resources. Currently this supports cache allocation technology (aka CAT),
+ * memory bandwidth allocation (aka MBA) and cache monitoring technology (aka
+ * CMT). More resources technologies may be added in the future.
*/


@@ -105,6 +105,7 @@ typedef virResctrlAllocMemBW *virResctrlAllocMemBWPtr;
/* Class definitions and initializations */
static virClassPtr virResctrlInfoClass;
static virClassPtr virResctrlAllocClass;
+static virClassPtr virResctrlMonitorClass;


/* virResctrlInfo */
@@ -224,11 +225,16 @@ virResctrlInfoMonFree(virResctrlInfoMonPtr mon)
}


-/* virResctrlAlloc */
+/* virResctrlAlloc and virResctrlMonitor*/

/*
- * virResctrlAlloc represents one allocation (in XML under cputune/cachetune and
- * consequently a directory under /sys/fs/resctrl). Since it can have multiple
+ * virResctrlAlloc and virResctrlMonitor are representing a resource control
+ * group (in XML under cputune/cachetune and consequently a directory under
+ * /sys/fs/resctrl). virResctrlAlloc is the data structure for resource
+ * allocation, while the virResctrlMonitor represents the resource monitoring
+ * part.
+ *
+ * virResctrlAlloc represents one allocation. Since it can have multiple
* parts of multiple caches allocated it is represented as bunch of nested
* sparse arrays (by sparse I mean array of pointers so that each might be NULL
* in case there is no allocation for that particular cache allocation (level,
@@ -275,6 +281,18 @@ virResctrlInfoMonFree(virResctrlInfoMonPtr mon)
* a sparse array to represent whether a memory bandwidth allocation happens
* on corresponding node. The available memory controller number is collected
* in 'virResctrlInfo'.
+ *
+ * =====Cache monitoring technology (CMT)=====
+ *
+ * Cache monitoring technology is used to perceive how many cache the process
+ * is using actually. virResctrlMonitor represents the resource control
+ * monitoring group, it is supported to monitor resource utilization
+ * information on granularity of vcpu.
+ *
+ * From hardware perspective, cache monitoring technology (CMT), memory
+ * bandwidth technology (MBM), as well as the CAT and MBA, are all orthogonal
+ * features. The monitor will be created under the scope of default resctrl
+ * group if no specific CAT or MBA entries are provided for the guest."
*/
struct _virResctrlAllocPerType {
/* There could be bool saying whether this is set or not, but since everything
@@ -320,6 +338,30 @@ struct _virResctrlAlloc {
char *path;
};

+/*
+ * virResctrlMonitor is the data structure for resctrl monitor. Resctrl
+ * monitor represents a resctrl monitoring group, which can be used to
+ * monitor the resource utilization information for either cache or
+ * memory bandwidth.
+ */
+struct _virResctrlMonitor {
+ virObject parent;
+
+ /* Each virResctrlMonitor is associated with one specific allocation,
+ * either the root directory allocation under /sys/fs/resctrl or a
+ * specific allocation defined under the root directory.
+ * This pointer points to the allocation this monitor is associated with.
+ */
+ virResctrlAllocPtr alloc;
+ /* The monitor identifier. For a monitor has the same @path name as its
+ * @alloc, the @id will be set to the same value as it is in @alloc->id.
+ */
+ char *id;
+ /* libvirt-generated path in /sys/fs/resctrl for this particular
+ * monitor */
+ char *path;
+};
+

static void
virResctrlAllocDispose(void *obj)
@@ -369,6 +411,17 @@ virResctrlAllocDispose(void *obj)
}


+static void
+virResctrlMonitorDispose(void *obj)
+{
+ virResctrlMonitorPtr monitor = obj;
+
+ virObjectUnref(monitor->alloc);
+ VIR_FREE(monitor->id);
+ VIR_FREE(monitor->path);
+}
+
+
/* Global initialization for classes */
static int
virResctrlOnceInit(void)
@@ -379,6 +432,9 @@ virResctrlOnceInit(void)
if (!VIR_CLASS_NEW(virResctrlAlloc, virClassForObject()))
return -1;

+ if (!VIR_CLASS_NEW(virResctrlMonitor, virClassForObject()))
+ return -1;
+
return 0;
}

@@ -2372,3 +2428,15 @@ virResctrlAllocRemove(virResctrlAllocPtr alloc)

return ret;
}
+
+
+/* virResctrlMonitor-related definitions */
+
+virResctrlMonitorPtr
+virResctrlMonitorNew(void)
+{
+ if (virResctrlInitialize() < 0)
+ return NULL;
+
+ return virObjectNew(virResctrlMonitorClass);
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 10505e9..eaa077d 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -185,4 +185,13 @@ int
virResctrlInfoGetMonitorPrefix(virResctrlInfoPtr resctrl,
const char *prefix,
virResctrlInfoMonPtr *monitor);
+
+/* Monitor-related things */
+
+typedef struct _virResctrlMonitor virResctrlMonitor;
+typedef virResctrlMonitor *virResctrlMonitorPtr;
+
+
+virResctrlMonitorPtr
+virResctrlMonitorNew(void);
#endif /* __VIR_RESCTRL_H__ */
--
2.7.4
Wang Huaqiang
2018-11-12 13:31:32 UTC
Permalink
Refactor schemas and virresctrl to support optional <cache> element
in <cachetune>.

Later, the monitor entry will be introduced and to be placed
under <cachetune>. Either cache entry or monitor entry is
an optional element of <cachetune>.

An cachetune has no <cache> element is taking the default resource
allocating policy defined in '/sys/fs/resctrl/schemata'.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
docs/formatdomain.html.in | 4 ++--
docs/schemas/domaincommon.rng | 4 ++--
src/util/virresctrl.c | 27 +++++++++++++++++++++++++++
3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 269741a..8850a71 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -943,8 +943,8 @@
<dl>
<dt><code>cache</code></dt>
<dd>
- This element controls the allocation of CPU cache and has the
- following attributes:
+ This optional element controls the allocation of CPU cache and has
+ the following attributes:
<dl>
<dt><code>level</code></dt>
<dd>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index b9ac5df..2b465be 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -956,7 +956,7 @@
<attribute name="vcpus">
<ref name='cpuset'/>
</attribute>
- <oneOrMore>
+ <zeroOrMore>
<element name="cache">
<attribute name="id">
<ref name='unsignedInt'/>
@@ -980,7 +980,7 @@
</attribute>
</optional>
</element>
- </oneOrMore>
+ </zeroOrMore>
</element>
</zeroOrMore>
<zeroOrMore>
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 5d811a2..7339e9b 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -234,6 +234,11 @@ virResctrlInfoMonFree(virResctrlInfoMonPtr mon)
* in case there is no allocation for that particular cache allocation (level,
* cache, ...) or memory allocation for particular node).
*
+ * Allocation corresponding to root directory, /sys/fs/sysctrl/, defines the
+ * default resource allocating policy, which is created immediately after
+ * mounting, and owns all the tasks and cpus in the system. Cache or memory
+ * bandwidth resource will be shared for tasks in this allocation.
+ *
* =====Cache allocation technology (CAT)=====
*
* Since one allocation can be made for caches on different levels, the first
@@ -2215,6 +2220,15 @@ virResctrlAllocDeterminePath(virResctrlAllocPtr alloc,
return -1;
}

+ /* If the allocation is empty, then the path will be SYSFS_RESCTRL_PATH */
+ if (virResctrlAllocIsEmpty(alloc)) {
+ if (!alloc->path &&
+ VIR_STRDUP(alloc->path, SYSFS_RESCTRL_PATH) < 0)
+ return -1;
+
+ return 0;
+ }
+
if (!alloc->path &&
virAsprintf(&alloc->path, "%s/%s-%s",
SYSFS_RESCTRL_PATH, machinename, alloc->id) < 0)
@@ -2248,6 +2262,10 @@ virResctrlAllocCreate(virResctrlInfoPtr resctrl,
if (virResctrlAllocDeterminePath(alloc, machinename) < 0)
return -1;

+ /* If using the system/default path for the allocation, then we're done */
+ if (STREQ(alloc->path, SYSFS_RESCTRL_PATH))
+ return 0;
+
if (virFileExists(alloc->path)) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("Path '%s' for resctrl allocation exists"),
@@ -2302,6 +2320,11 @@ virResctrlAllocAddPID(virResctrlAllocPtr alloc,
char *pidstr = NULL;
int ret = 0;

+ /* If allocation is empty, then no resctrl directory and the 'tasks' file
+ * exists, just return */
+ if (virResctrlAllocIsEmpty(alloc))
+ return 0;
+
if (!alloc->path) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Cannot add pid to non-existing resctrl allocation"));
@@ -2334,6 +2357,10 @@ virResctrlAllocRemove(virResctrlAllocPtr alloc)
{
int ret = 0;

+ /* Do not destroy if path is the system/default path for the allocation */
+ if (STREQ(alloc->path, SYSFS_RESCTRL_PATH))
+ return 0;
+
if (!alloc->path)
return 0;
--
2.7.4
John Ferlan
2018-11-13 23:09:13 UTC
Permalink
Post by Wang Huaqiang
Refactor schemas and virresctrl to support optional <cache> element
in <cachetune>.
Later, the monitor entry will be introduced and to be placed
under <cachetune>. Either cache entry or monitor entry is
an optional element of <cachetune>.
An cachetune has no <cache> element is taking the default resource
allocating policy defined in '/sys/fs/resctrl/schemata'.
---
docs/formatdomain.html.in | 4 ++--
docs/schemas/domaincommon.rng | 4 ++--
src/util/virresctrl.c | 27 +++++++++++++++++++++++++++
3 files changed, 31 insertions(+), 4 deletions(-)
[...]
Post by Wang Huaqiang
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 5d811a2..7339e9b 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
[...]
Post by Wang Huaqiang
@@ -2302,6 +2320,11 @@ virResctrlAllocAddPID(virResctrlAllocPtr alloc,
char *pidstr = NULL;
int ret = 0;
+ /* If allocation is empty, then no resctrl directory and the 'tasks' file
+ * exists, just return */
/* If the allocation is empty, then it is impossible to add a PID to
* allocation due to lacking of its 'tasks' file so just return */
Post by Wang Huaqiang
+ if (virResctrlAllocIsEmpty(alloc))
+ return 0;
+
if (!alloc->path) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Cannot add pid to non-existing resctrl allocation"));
@@ -2334,6 +2357,10 @@ virResctrlAllocRemove(virResctrlAllocPtr alloc)
{
int ret = 0;
+ /* Do not destroy if path is the system/default path for the allocation */
+ if (STREQ(alloc->path, SYSFS_RESCTRL_PATH))
+ return 0;
+
if (!alloc->path)
return 0;
With those changes,

Reviewed-by: John Ferlan <***@redhat.com>
(As with the rest, I can make the changes for you).


John
Wang Huaqiang
2018-11-12 13:31:40 UTC
Permalink
Refactor virResctrlAllocSetID.
In new code the error of id overwriting is reported promptly.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/util/virresctrl.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 3216abe..4e4831c 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -1361,17 +1361,32 @@ virResctrlAllocForeachMemory(virResctrlAllocPtr alloc,
}


-int
-virResctrlAllocSetID(virResctrlAllocPtr alloc,
- const char *id)
+static int
+virResctrlSetID(char **resctrlid,
+ const char *id)
{
if (!id) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("Resctrl allocation 'id' cannot be NULL"));
+ _("New resctrl 'id' cannot be NULL"));
+ return -1;
+ }
+
+ if (*resctrlid) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Attempt to overwrite resctrlid='%s' with id='%s'"),
+ *resctrlid, id);
return -1;
}

- return VIR_STRDUP(alloc->id, id);
+ return VIR_STRDUP(*resctrlid, id);
+}
+
+
+int
+virResctrlAllocSetID(virResctrlAllocPtr alloc,
+ const char *id)
+{
+ return virResctrlSetID(&alloc->id, id);
}
--
2.7.4
John Ferlan
2018-11-13 23:26:44 UTC
Permalink
Post by Wang Huaqiang
Refactor virResctrlAllocSetID.
In new code the error of id overwriting is reported promptly.
---
src/util/virresctrl.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
Reviewed-by: John Ferlan <***@redhat.com>

John
Wang Huaqiang
2018-11-12 13:31:35 UTC
Permalink
Add interface for resctrl monitor to determine the path.

Signed-off-by: Wang Huaqiang <***@intel.com>

Reviewed-by: John Ferlan <***@redhat.com>
---
src/libvirt_private.syms | 1 +
src/util/virresctrl.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
src/util/virresctrl.h | 5 ++++-
3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d2573c5..5235046 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2680,6 +2680,7 @@ virResctrlInfoGetCache;
virResctrlInfoGetMonitorPrefix;
virResctrlInfoMonFree;
virResctrlInfoNew;
+virResctrlMonitorDeterminePath;
virResctrlMonitorNew;


diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 463555c..aa062c3 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -2462,3 +2462,58 @@ virResctrlMonitorNew(void)

return virObjectNew(virResctrlMonitorClass);
}
+
+
+/*
+ * virResctrlMonitorDeterminePath
+ *
+ * @monitor: Pointer to a resctrl monitor
+ * @machinename: Name string of the VM
+ *
+ * Determines the directory path that the underlying resctrl group will be
+ * created with.
+ *
+ * A monitor represents a directory under resource control file system,
+ * its directory path could be the same path as @monitor->alloc, could be a
+ * path of directory under 'mon_groups' of @monitor->alloc, or a path of
+ * directory under '/sys/fs/resctrl/mon_groups' if @monitor->alloc is NULL.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+int
+virResctrlMonitorDeterminePath(virResctrlMonitorPtr monitor,
+ const char *machinename)
+{
+ VIR_AUTOFREE(char *) parentpath = NULL;
+
+ if (!monitor) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Invalid resctrl monitor"));
+ return -1;
+ }
+
+ if (monitor->path) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Resctrl monitor path is already set to '%s'"),
+ monitor->path);
+ return -1;
+ }
+
+ if (monitor->id && monitor->alloc && monitor->alloc->id) {
+ if (STREQ(monitor->id, monitor->alloc->id)) {
+ if (VIR_STRDUP(monitor->path, monitor->alloc->path) < 0)
+ return -1;
+ return 0;
+ }
+ }
+
+ if (virAsprintf(&parentpath, "%s/mon_groups", monitor->alloc->path) < 0)
+ return -1;
+
+ monitor->path = virResctrlDeterminePath(parentpath, machinename,
+ monitor->id);
+ if (!monitor->path)
+ return -1;
+
+ return 0;
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index eaa077d..baae554 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -191,7 +191,10 @@ virResctrlInfoGetMonitorPrefix(virResctrlInfoPtr resctrl,
typedef struct _virResctrlMonitor virResctrlMonitor;
typedef virResctrlMonitor *virResctrlMonitorPtr;

-
virResctrlMonitorPtr
virResctrlMonitorNew(void);
+
+int
+virResctrlMonitorDeterminePath(virResctrlMonitorPtr monitor,
+ const char *machinename);
#endif /* __VIR_RESCTRL_H__ */
--
2.7.4
John Ferlan
2018-11-13 23:14:21 UTC
Permalink
Post by Wang Huaqiang
Add interface for resctrl monitor to determine the path.
---
src/libvirt_private.syms | 1 +
src/util/virresctrl.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
src/util/virresctrl.h | 5 ++++-
3 files changed, 60 insertions(+), 1 deletion(-)
[...]
Post by Wang Huaqiang
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 463555c..aa062c3 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
[...]
Post by Wang Huaqiang
+int
+virResctrlMonitorDeterminePath(virResctrlMonitorPtr monitor,
+ const char *machinename)
+{
+ VIR_AUTOFREE(char *) parentpath = NULL;
+
+ if (!monitor) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Invalid resctrl monitor"));
+ return -1;
+ }
In the v7 in a follow-up to patch 4 we agreed the following check was fine:

if (!monitor->alloc) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Missing resctrl monitor alloc"));
return -1;
}
Post by Wang Huaqiang
+
+ if (monitor->path) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Resctrl monitor path is already set to '%s'"),
+ monitor->path);
+ return -1;
+ }
+
+ if (monitor->id && monitor->alloc && monitor->alloc->id) {
+ if (STREQ(monitor->id, monitor->alloc->id)) {
+ if (VIR_STRDUP(monitor->path, monitor->alloc->path) < 0)
+ return -1;
+ return 0;
+ }
+ }
Changing the above to

if (STREQ_NULLABLE(monitor->id, monitor->alloc->id)) {
if (VIR_STRDUP(monitor->path, monitor->alloc->path) < 0)
return -1;
return 0;

With that,

Reviewed-by: John Ferlan <***@redhat.com>

John

[...]
Wang Huaqiang
2018-11-12 13:31:42 UTC
Permalink
Add interfaces monitor group to support operations such
as add PID, get ID, remove group ... etc.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/libvirt_private.syms | 5 ++
src/util/virresctrl.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/virresctrl.h | 26 ++++++++
3 files changed, 198 insertions(+)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index a878083..2938833 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2683,7 +2683,12 @@ virResctrlInfoNew;
virResctrlMonitorAddPID;
virResctrlMonitorCreate;
virResctrlMonitorDeterminePath;
+virResctrlMonitorGetCacheOccupancy;
+virResctrlMonitorGetID;
virResctrlMonitorNew;
+virResctrlMonitorRemove;
+virResctrlMonitorSetAlloc;
+virResctrlMonitorSetID;


# util/virrotatingfile.h
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index ed682c9..9e7de62 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -2590,3 +2590,170 @@ virResctrlMonitorSetID(virResctrlMonitorPtr monitor,
{
return virResctrlSetID(&monitor->id, id);
}
+
+
+const char *
+virResctrlMonitorGetID(virResctrlMonitorPtr monitor)
+{
+ return monitor->id;
+}
+
+
+void
+virResctrlMonitorSetAlloc(virResctrlMonitorPtr monitor,
+ virResctrlAllocPtr alloc)
+{
+ monitor->alloc = virObjectRef(alloc);
+}
+
+
+int
+virResctrlMonitorRemove(virResctrlMonitorPtr monitor)
+{
+ int ret = 0;
+
+ if (!monitor->path)
+ return 0;
+
+ if (STREQ(monitor->path, monitor->alloc->path))
+ return 0;
+
+ VIR_DEBUG("Removing resctrl monitor path=%s", monitor->path);
+ if (rmdir(monitor->path) != 0 && errno != ENOENT) {
+ ret = -errno;
+ VIR_ERROR(_("Unable to remove %s (%d)"), monitor->path, errno);
+ }
+
+ return ret;
+}
+
+
+static int
+virResctrlMonitorStatsSorter(const void *a,
+ const void *b)
+{
+ return ((virResctrlMonitorStatsPtr)a)->id
+ - ((virResctrlMonitorStatsPtr)b)->id;
+}
+
+
+/*
+ * virResctrlMonitorGetStats
+ *
+ * @monitor: The monitor that the statistic data will be retrieved from.
+ * @resource: The name for resource name. 'llc_occupancy' for cache resource.
+ * "mbm_total_bytes" and "mbm_local_bytes" for memory bandwidth resource.
+ * @stats: Array of virResctrlMonitorStatsPtr for holding cache or memory
+ * bandwidth usage data.
+ * @nstats: A size_t pointer to hold the returned array length of @stats
+ *
+ * Get cache or memory bandwidth utilization information.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+static int
+virResctrlMonitorGetStats(virResctrlMonitorPtr monitor,
+ const char *resource,
+ virResctrlMonitorStatsPtr *stats,
+ size_t *nstats)
+{
+ int rv = -1;
+ int ret = -1;
+ DIR *dirp = NULL;
+ char *datapath = NULL;
+ struct dirent *ent = NULL;
+ virResctrlMonitorStatsPtr stat = NULL;
+
+ if (!monitor) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Invalid resctrl monitor"));
+ return -1;
+ }
+
+ if (virAsprintf(&datapath, "%s/mon_data", monitor->path) < 0)
+ return -1;
+
+ if (virDirOpen(&dirp, datapath) < 0)
+ goto cleanup;
+
+ *nstats = 0;
+ while (virDirRead(dirp, &ent, datapath) > 0) {
+ char *node_id = NULL;
+
+ if (VIR_ALLOC(stat) < 0)
+ goto cleanup;
+
+ /* Looking for directory that contains resource utilization
+ * information file. The directory name is arranged in format
+ * "mon_<node_name>_<node_id>". For example, "mon_L3_00" and
+ * "mon_L3_01" are two target directories for a two nodes system
+ * with resource utilization data file for each node respectively.
+ */
+ if (ent->d_type != DT_DIR)
+ continue;
+
+ /* Looking for directory has a prefix 'mon_L' */
+ if (!(node_id = STRSKIP(ent->d_name, "mon_L")))
+ continue;
+
+ /* Looking for directory has another '_' */
+ node_id = strchr(node_id, '_');
+ if (!node_id)
+ continue;
+
+ /* Skip the character '_' */
+ if (!(node_id = STRSKIP(node_id, "_")))
+ continue;
+
+ /* The node ID number should be here, parsing it. */
+ if (virStrToLong_uip(node_id, NULL, 0, &stat->id) < 0)
+ goto cleanup;
+
+ rv = virFileReadValueUint(&stat->val, "%s/%s/%s", datapath,
+ ent->d_name, resource);
+ if (rv == -2) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("File '%s/%s/%s' does not exist."),
+ datapath, ent->d_name, resource);
+ }
+ if (rv < 0)
+ goto cleanup;
+
+ if (VIR_APPEND_ELEMENT(*stats, *nstats, *stat) < 0)
+ goto cleanup;
+ }
+
+ /* Sort in id's ascending order */
+ if (*nstats)
+ qsort(*stats, *nstats, sizeof(*stat), virResctrlMonitorStatsSorter);
+
+ ret = 0;
+ cleanup:
+ VIR_FREE(datapath);
+ VIR_FREE(stat);
+ VIR_DIR_CLOSE(dirp);
+ return ret;
+}
+
+
+/*
+ * virResctrlMonitorGetCacheOccupancy
+ *
+ * @monitor: The monitor that the statistic data will be retrieved from.
+ * @stats: Array of virResctrlMonitorStatsPtr for receiving cache occupancy
+ * data. Caller is responsible to free this array.
+ * @nstats: A size_t pointer to hold the returned array length of @caches
+ *
+ * Get cache or memory bandwidth utilization information.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+
+int
+virResctrlMonitorGetCacheOccupancy(virResctrlMonitorPtr monitor,
+ virResctrlMonitorStatsPtr *stats,
+ size_t *nstats)
+{
+ return virResctrlMonitorGetStats(monitor, "llc_occupancy",
+ stats, nstats);
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 76e40a2..45ec967 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -191,6 +191,13 @@ virResctrlInfoGetMonitorPrefix(virResctrlInfoPtr resctrl,
typedef struct _virResctrlMonitor virResctrlMonitor;
typedef virResctrlMonitor *virResctrlMonitorPtr;

+typedef struct _virResctrlMonitorStats virResctrlMonitorStats;
+typedef virResctrlMonitorStats *virResctrlMonitorStatsPtr;
+struct _virResctrlMonitorStats {
+ unsigned int id;
+ unsigned int val;
+};
+
virResctrlMonitorPtr
virResctrlMonitorNew(void);

@@ -205,4 +212,23 @@ virResctrlMonitorAddPID(virResctrlMonitorPtr monitor,
int
virResctrlMonitorCreate(virResctrlMonitorPtr monitor,
const char *machinename);
+
+int
+virResctrlMonitorSetID(virResctrlMonitorPtr monitor,
+ const char *id);
+
+const char *
+virResctrlMonitorGetID(virResctrlMonitorPtr monitor);
+
+void
+virResctrlMonitorSetAlloc(virResctrlMonitorPtr monitor,
+ virResctrlAllocPtr alloc);
+
+int
+virResctrlMonitorRemove(virResctrlMonitorPtr monitor);
+
+int
+virResctrlMonitorGetCacheOccupancy(virResctrlMonitorPtr monitor,
+ virResctrlMonitorStatsPtr *caches,
+ size_t *ncaches);
#endif /* __VIR_RESCTRL_H__ */
--
2.7.4
John Ferlan
2018-11-13 23:31:59 UTC
Permalink
Post by Wang Huaqiang
Add interfaces monitor group to support operations such
as add PID, get ID, remove group ... etc.
virResctrlMonitorGetStats is perhaps a bit more than just basic stuff!
So I can add:

Implement the virResctrlMonitorGetStats to fetch all the
statistical data and the virResctrlMonitorGetCacheOccupancy
in order to fetch the cache specific "llc_oocupancy" value.
Post by Wang Huaqiang
---
src/libvirt_private.syms | 5 ++
src/util/virresctrl.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++
src/util/virresctrl.h | 26 ++++++++
3 files changed, 198 insertions(+)
Reviewed-by: John Ferlan <***@redhat.com>

John
Wang Huaqiang
2018-11-12 13:31:37 UTC
Permalink
Add interface for adding task PID to the monitor.

Signed-off-by: Wang Huaqiang <***@intel.com>

Reviewed-by: John Ferlan <***@redhat.com>
---
src/libvirt_private.syms | 1 +
src/util/virresctrl.c | 8 ++++++++
src/util/virresctrl.h | 4 ++++
3 files changed, 13 insertions(+)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 5235046..e175c8b 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2680,6 +2680,7 @@ virResctrlInfoGetCache;
virResctrlInfoGetMonitorPrefix;
virResctrlInfoMonFree;
virResctrlInfoNew;
+virResctrlMonitorAddPID;
virResctrlMonitorDeterminePath;
virResctrlMonitorNew;

diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 0bac032..7bd52cd 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -2525,3 +2525,11 @@ virResctrlMonitorDeterminePath(virResctrlMonitorPtr monitor,

return 0;
}
+
+
+int
+virResctrlMonitorAddPID(virResctrlMonitorPtr monitor,
+ pid_t pid)
+{
+ return virResctrlAddPID(monitor->path, pid);
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index baae554..52d283a 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -197,4 +197,8 @@ virResctrlMonitorNew(void);
int
virResctrlMonitorDeterminePath(virResctrlMonitorPtr monitor,
const char *machinename);
+
+int
+virResctrlMonitorAddPID(virResctrlMonitorPtr monitor,
+ pid_t pid);
#endif /* __VIR_RESCTRL_H__ */
--
2.7.4
Wang Huaqiang
2018-11-12 13:31:44 UTC
Permalink
Introducing <monitor> element under <cachetune> to represent
a cache monitor.

Signed-off-by: Wang Huaqiang <***@intel.com>

Reviewed-by: John Ferlan <***@redhat.com>
---
docs/formatdomain.html.in | 26 +++
docs/schemas/domaincommon.rng | 10 +
src/conf/domain_conf.c | 234 ++++++++++++++++++++-
src/conf/domain_conf.h | 11 +
tests/genericxml2xmlindata/cachetune-cdp.xml | 3 +
.../cachetune-colliding-monitor.xml | 30 +++
tests/genericxml2xmlindata/cachetune-small.xml | 7 +
tests/genericxml2xmltest.c | 2 +
8 files changed, 322 insertions(+), 1 deletion(-)
create mode 100644 tests/genericxml2xmlindata/cachetune-colliding-monitor.xml

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 8850a71..92ad4b2 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -759,6 +759,12 @@
&lt;cachetune vcpus='0-3'&gt;
&lt;cache id='0' level='3' type='both' size='3' unit='MiB'/&gt;
&lt;cache id='1' level='3' type='both' size='3' unit='MiB'/&gt;
+ &lt;monitor level='3' vcpus='1'/&gt;
+ &lt;monitor level='3' vcpus='0-3'/&gt;
+ &lt;/cachetune&gt;
+ &lt;cachetune vcpus='4-5'&gt;
+ &lt;monitor level='3' vcpus='4'/&gt;
+ &lt;monitor level='3' vcpus='5'/&gt;
&lt;/cachetune&gt;
&lt;memorytune vcpus='0-3'&gt;
&lt;node id='0' bandwidth='60'/&gt;
@@ -978,6 +984,26 @@
</dd>
</dl>
</dd>
+ <dt><code>monitor</code><span class="since">Since 4.10.0</span></dt>
+ <dd>
+ The optional element <code>monitor</code> creates the cache
+ monitor(s) for current cache allocation and has the following
+ required attributes:
+ <dl>
+ <dt><code>level</code></dt>
+ <dd>
+ Host cache level the monitor belongs to.
+ </dd>
+ <dt><code>vcpus</code></dt>
+ <dd>
+ vCPU list the monitor applies to. A monitor's vCPU list
+ can only be the member(s) of the vCPU list of the associated
+ allocation. The default monitor has the same vCPU list as the
+ associated allocation. For non-default monitors, overlapping
+ vCPUs are not permitted.
+ </dd>
+ </dl>
+ </dd>
</dl>
</dd>

diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 2b465be..1296b7f 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -981,6 +981,16 @@
</optional>
</element>
</zeroOrMore>
+ <zeroOrMore>
+ <element name="monitor">
+ <attribute name="level">
+ <ref name='unsignedInt'/>
+ </attribute>
+ <attribute name="vcpus">
+ <ref name='cpuset'/>
+ </attribute>
+ </element>
+ </zeroOrMore>
</element>
</zeroOrMore>
<zeroOrMore>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 8433214..7696098 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2955,13 +2955,31 @@ virDomainLoaderDefFree(virDomainLoaderDefPtr loader)


static void
+virDomainResctrlMonDefFree(virDomainResctrlMonDefPtr domresmon)
+{
+ if (!domresmon)
+ return;
+
+ virBitmapFree(domresmon->vcpus);
+ virObjectUnref(domresmon->instance);
+ VIR_FREE(domresmon);
+}
+
+
+static void
virDomainResctrlDefFree(virDomainResctrlDefPtr resctrl)
{
+ size_t i = 0;
+
if (!resctrl)
return;

+ for (i = 0; i < resctrl->nmonitors; i++)
+ virDomainResctrlMonDefFree(resctrl->monitors[i]);
+
virObjectUnref(resctrl->alloc);
virBitmapFree(resctrl->vcpus);
+ VIR_FREE(resctrl->monitors);
VIR_FREE(resctrl);
}

@@ -18948,6 +18966,177 @@ virDomainCachetuneDefParseCache(xmlXPathContextPtr ctxt,
}


+/* Checking if the monitor's vcpus is conflicted with existing allocation
+ * and monitors.
+ *
+ * Returns 1 if @vcpus equals to @resctrl->vcpus, then the monitor will
+ * share the underlying resctrl group with @resctrl->alloc. Returns - 1
+ * if any conflict found. Returns 0 if no conflict and @vcpus is not equal
+ * to @resctrl->vcpus.
+ */
+static int
+virDomainResctrlMonValidateVcpus(virDomainResctrlDefPtr resctrl,
+ virBitmapPtr vcpus)
+{
+ size_t i = 0;
+ int vcpu = -1;
+ size_t mons_same_alloc_vcpus = 0;
+
+ if (virBitmapIsAllClear(vcpus)) {
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("vcpus is empty"));
+ return -1;
+ }
+
+ while ((vcpu = virBitmapNextSetBit(vcpus, vcpu)) >= 0) {
+ if (!virBitmapIsBitSet(resctrl->vcpus, vcpu)) {
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("Monitor vcpus conflicts with allocation"));
+ return -1;
+ }
+ }
+
+ if (virBitmapEqual(vcpus, resctrl->vcpus))
+ return 1;
+
+ for (i = 0; i < resctrl->nmonitors; i++) {
+ if (virBitmapEqual(resctrl->vcpus, resctrl->monitors[i]->vcpus)) {
+ mons_same_alloc_vcpus++;
+ continue;
+ }
+
+ if (virBitmapOverlaps(vcpus, resctrl->monitors[i]->vcpus)) {
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("Monitor vcpus conflicts with monitors"));
+
+ return -1;
+ }
+ }
+
+ if (mons_same_alloc_vcpus > 1) {
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("Too many monitors have the same vcpu as allocation"));
+ return -1;
+ }
+
+ return 0;
+}
+
+
+#define VIR_DOMAIN_RESCTRL_MONITOR_CACHELEVEL 3
+
+static int
+virDomainResctrlMonDefParse(virDomainDefPtr def,
+ xmlXPathContextPtr ctxt,
+ xmlNodePtr node,
+ virResctrlMonitorType tag,
+ virDomainResctrlDefPtr resctrl)
+{
+ virDomainResctrlMonDefPtr domresmon = NULL;
+ xmlNodePtr oldnode = ctxt->node;
+ xmlNodePtr *nodes = NULL;
+ unsigned int level = 0;
+ char *tmp = NULL;
+ char *id = NULL;
+ size_t i = 0;
+ int n = 0;
+ int rv = -1;
+ int ret = -1;
+
+ ctxt->node = node;
+
+ if ((n = virXPathNodeSet("./monitor", ctxt, &nodes)) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot extract monitor nodes"));
+ goto cleanup;
+ }
+
+ for (i = 0; i < n; i++) {
+ if (VIR_ALLOC(domresmon) < 0)
+ goto cleanup;
+
+ domresmon->tag = tag;
+
+ domresmon->instance = virResctrlMonitorNew();
+ if (!domresmon->instance) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not create monitor"));
+ goto cleanup;
+ }
+
+ if (tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ tmp = virXMLPropString(nodes[i], "level");
+ if (!tmp) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("Missing monitor attribute 'level'"));
+ goto cleanup;
+ }
+
+ if (virStrToLong_uip(tmp, NULL, 10, &level) < 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Invalid monitor attribute 'level' value '%s'"),
+ tmp);
+ goto cleanup;
+ }
+
+ if (level != VIR_DOMAIN_RESCTRL_MONITOR_CACHELEVEL) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("Invalid monitor cache level '%d'"),
+ level);
+ goto cleanup;
+ }
+
+ VIR_FREE(tmp);
+ }
+
+ if (virDomainResctrlParseVcpus(def, nodes[i], &domresmon->vcpus) < 0)
+ goto cleanup;
+
+ rv = virDomainResctrlMonValidateVcpus(resctrl, domresmon->vcpus);
+ if (rv < 0)
+ goto cleanup;
+
+ /* If monitor's vcpu list is identical to the vcpu list of the
+ * associated allocation, set monitor's id to the same value
+ * as the allocation. */
+ if (rv == 1) {
+ const char *alloc_id = virResctrlAllocGetID(resctrl->alloc);
+
+ if (VIR_STRDUP(id, alloc_id) < 0)
+ goto cleanup;
+ } else {
+ if (!(tmp = virBitmapFormat(domresmon->vcpus)))
+ goto cleanup;
+
+ if (virAsprintf(&id, "vcpus_%s", tmp) < 0)
+ goto cleanup;
+ }
+
+ virResctrlMonitorSetAlloc(domresmon->instance, resctrl->alloc);
+
+ if (virResctrlMonitorSetID(domresmon->instance, id) < 0)
+ goto cleanup;
+
+ if (VIR_APPEND_ELEMENT(resctrl->monitors,
+ resctrl->nmonitors,
+ domresmon) < 0)
+ goto cleanup;
+
+ VIR_FREE(id);
+ VIR_FREE(tmp);
+ }
+
+ ret = 0;
+ cleanup:
+ ctxt->node = oldnode;
+ VIR_FREE(id);
+ VIR_FREE(tmp);
+ VIR_FREE(nodes);
+ virDomainResctrlMonDefFree(domresmon);
+ return ret;
+}
+
+
static virDomainResctrlDefPtr
virDomainResctrlNew(xmlNodePtr node,
virResctrlAllocPtr alloc,
@@ -19054,7 +19243,14 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
if (!resctrl)
goto cleanup;

- if (virResctrlAllocIsEmpty(alloc)) {
+ if (virDomainResctrlMonDefParse(def, ctxt, node,
+ VIR_RESCTRL_MONITOR_TYPE_CACHE,
+ resctrl) < 0)
+ goto cleanup;
+
+ /* If no <cache> element or <monitor> element in <cachetune>, do not
+ * append any resctrl element */
+ if (!resctrl->nmonitors && virResctrlAllocIsEmpty(alloc)) {
ret = 0;
goto cleanup;
}
@@ -27091,12 +27287,41 @@ virDomainCachetuneDefFormatHelper(unsigned int level,


static int
+virDomainResctrlMonDefFormatHelper(virDomainResctrlMonDefPtr domresmon,
+ virResctrlMonitorType tag,
+ virBufferPtr buf)
+{
+ char *vcpus = NULL;
+
+ if (domresmon->tag != tag)
+ return 0;
+
+ virBufferAddLit(buf, "<monitor ");
+
+ if (tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ virBufferAsprintf(buf, "level='%u' ",
+ VIR_DOMAIN_RESCTRL_MONITOR_CACHELEVEL);
+ }
+
+ vcpus = virBitmapFormat(domresmon->vcpus);
+ if (!vcpus)
+ return -1;
+
+ virBufferAsprintf(buf, "vcpus='%s'/>\n", vcpus);
+
+ VIR_FREE(vcpus);
+ return 0;
+}
+
+
+static int
virDomainCachetuneDefFormat(virBufferPtr buf,
virDomainResctrlDefPtr resctrl,
unsigned int flags)
{
virBuffer childrenBuf = VIR_BUFFER_INITIALIZER;
char *vcpus = NULL;
+ size_t i = 0;
int ret = -1;

virBufferSetChildIndent(&childrenBuf, buf);
@@ -27105,6 +27330,13 @@ virDomainCachetuneDefFormat(virBufferPtr buf,
&childrenBuf) < 0)
goto cleanup;

+ for (i = 0; i < resctrl->nmonitors; i ++) {
+ if (virDomainResctrlMonDefFormatHelper(resctrl->monitors[i],
+ VIR_RESCTRL_MONITOR_TYPE_CACHE,
+ &childrenBuf) < 0)
+ goto cleanup;
+ }
+
if (virBufferCheckError(&childrenBuf) < 0)
goto cleanup;

diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index e30a4b2..60f6464 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2236,12 +2236,23 @@ struct _virDomainCputune {
};


+typedef struct _virDomainResctrlMonDef virDomainResctrlMonDef;
+typedef virDomainResctrlMonDef *virDomainResctrlMonDefPtr;
+struct _virDomainResctrlMonDef {
+ virBitmapPtr vcpus;
+ virResctrlMonitorType tag;
+ virResctrlMonitorPtr instance;
+};
+
typedef struct _virDomainResctrlDef virDomainResctrlDef;
typedef virDomainResctrlDef *virDomainResctrlDefPtr;

struct _virDomainResctrlDef {
virBitmapPtr vcpus;
virResctrlAllocPtr alloc;
+
+ virDomainResctrlMonDefPtr *monitors;
+ size_t nmonitors;
};


diff --git a/tests/genericxml2xmlindata/cachetune-cdp.xml b/tests/genericxml2xmlindata/cachetune-cdp.xml
index 9718f06..9f4c139 100644
--- a/tests/genericxml2xmlindata/cachetune-cdp.xml
+++ b/tests/genericxml2xmlindata/cachetune-cdp.xml
@@ -8,9 +8,12 @@
<cachetune vcpus='0-1'>
<cache id='0' level='3' type='code' size='7680' unit='KiB'/>
<cache id='1' level='3' type='data' size='3840' unit='KiB'/>
+ <monitor level='3' vcpus='0'/>
+ <monitor level='3' vcpus='1'/>
</cachetune>
<cachetune vcpus='2'>
<cache id='1' level='3' type='code' size='6' unit='MiB'/>
+ <monitor level='3' vcpus='2'/>
</cachetune>
<cachetune vcpus='3'>
<cache id='1' level='3' type='data' size='6912' unit='KiB'/>
diff --git a/tests/genericxml2xmlindata/cachetune-colliding-monitor.xml b/tests/genericxml2xmlindata/cachetune-colliding-monitor.xml
new file mode 100644
index 0000000..d481fb5
--- /dev/null
+++ b/tests/genericxml2xmlindata/cachetune-colliding-monitor.xml
@@ -0,0 +1,30 @@
+<domain type='qemu'>
+ <name>QEMUGuest1</name>
+ <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid>
+ <memory unit='KiB'>219136</memory>
+ <currentMemory unit='KiB'>219136</currentMemory>
+ <vcpu placement='static'>4</vcpu>
+ <cputune>
+ <cachetune vcpus='0-1'>
+ <cache id='0' level='3' type='both' size='768' unit='KiB'/>
+ <monitor level='3' vcpus='2'/>
+ </cachetune>
+ </cputune>
+ <os>
+ <type arch='i686' machine='pc'>hvm</type>
+ <boot dev='hd'/>
+ </os>
+ <clock offset='utc'/>
+ <on_poweroff>destroy</on_poweroff>
+ <on_reboot>restart</on_reboot>
+ <on_crash>destroy</on_crash>
+ <devices>
+ <emulator>/usr/bin/qemu-system-i686</emulator>
+ <controller type='usb' index='0'/>
+ <controller type='ide' index='0'/>
+ <controller type='pci' index='0' model='pci-root'/>
+ <input type='mouse' bus='ps2'/>
+ <input type='keyboard' bus='ps2'/>
+ <memballoon model='virtio'/>
+ </devices>
+</domain>
diff --git a/tests/genericxml2xmlindata/cachetune-small.xml b/tests/genericxml2xmlindata/cachetune-small.xml
index ab2d9cf..748be08 100644
--- a/tests/genericxml2xmlindata/cachetune-small.xml
+++ b/tests/genericxml2xmlindata/cachetune-small.xml
@@ -7,6 +7,13 @@
<cputune>
<cachetune vcpus='0-1'>
<cache id='0' level='3' type='both' size='768' unit='KiB'/>
+ <monitor level='3' vcpus='0'/>
+ <monitor level='3' vcpus='1'/>
+ <monitor level='3' vcpus='0-1'/>
+ </cachetune>
+ <cachetune vcpus='2-3'>
+ <monitor level='3' vcpus='2'/>
+ <monitor level='3' vcpus='3'/>
</cachetune>
</cputune>
<os>
diff --git a/tests/genericxml2xmltest.c b/tests/genericxml2xmltest.c
index fa941f0..4393d44 100644
--- a/tests/genericxml2xmltest.c
+++ b/tests/genericxml2xmltest.c
@@ -137,6 +137,8 @@ mymain(void)
TEST_COMPARE_DOM_XML2XML_RESULT_FAIL_PARSE);
DO_TEST_FULL("cachetune-colliding-types", false, true,
TEST_COMPARE_DOM_XML2XML_RESULT_FAIL_PARSE);
+ DO_TEST_FULL("cachetune-colliding-monitor", false, true,
+ TEST_COMPARE_DOM_XML2XML_RESULT_FAIL_PARSE);
DO_TEST("memorytune");
DO_TEST_FULL("memorytune-colliding-allocs", false, true,
TEST_COMPARE_DOM_XML2XML_RESULT_FAIL_PARSE);
--
2.7.4
Wang Huaqiang
2018-11-12 13:31:34 UTC
Permalink
The code for determining resctrl allocation path could be reused
for monitor. Refactor it for reuse.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/util/virresctrl.c | 38 ++++++++++++++++++++++++++++++--------
1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index e3c84a3..463555c 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -2266,28 +2266,50 @@ virResctrlAllocAssign(virResctrlInfoPtr resctrl,
}


+static char *
+virResctrlDeterminePath(const char *parentpath,
+ const char *prefix,
+ const char *id)
+{
+ char *path = NULL;
+
+ if (!id) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Resctrl ID must be set before determining resctrl "
+ "parentpath='%s' prefix='%s'"), parentpath, prefix);
+ return NULL;
+ }
+
+ if (virAsprintf(&path, "%s/%s-%s", parentpath, prefix, id) < 0)
+ return NULL;
+
+ return path;
+}
+
+
int
virResctrlAllocDeterminePath(virResctrlAllocPtr alloc,
const char *machinename)
{
- if (!alloc->id) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("Resctrl Allocation ID must be set before creation"));
+ if (alloc->path) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Resctrl allocation path is already set to '%s'"),
+ alloc->path);
return -1;
}

/* If the allocation is empty, then the path will be SYSFS_RESCTRL_PATH */
if (virResctrlAllocIsEmpty(alloc)) {
- if (!alloc->path &&
- VIR_STRDUP(alloc->path, SYSFS_RESCTRL_PATH) < 0)
+ if (VIR_STRDUP(alloc->path, SYSFS_RESCTRL_PATH) < 0)
return -1;

return 0;
}

- if (!alloc->path &&
- virAsprintf(&alloc->path, "%s/%s-%s",
- SYSFS_RESCTRL_PATH, machinename, alloc->id) < 0)
+ alloc->path = virResctrlDeterminePath(SYSFS_RESCTRL_PATH,
+ machinename, alloc->id);
+
+ if (!alloc->path)
return -1;

return 0;
--
2.7.4
John Ferlan
2018-11-13 23:12:15 UTC
Permalink
Post by Wang Huaqiang
The code for determining resctrl allocation path could be reused
for monitor. Refactor it for reuse.
---
src/util/virresctrl.c | 38 ++++++++++++++++++++++++++++++--------
1 file changed, 30 insertions(+), 8 deletions(-)
Reviewed-by: John Ferlan <***@redhat.com>

John
Wang Huaqiang
2018-11-12 13:31:36 UTC
Permalink
The code of adding PID to the allocation could be reused, refactor it
for later reuse.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/util/virresctrl.c | 30 +++++++++++++++++++-----------
1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index aa062c3..0bac032 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -2390,26 +2390,21 @@ virResctrlAllocCreate(virResctrlInfoPtr resctrl,
}


-int
-virResctrlAllocAddPID(virResctrlAllocPtr alloc,
- pid_t pid)
+static int
+virResctrlAddPID(const char *path,
+ pid_t pid)
{
char *tasks = NULL;
char *pidstr = NULL;
int ret = 0;

- /* If allocation is empty, then no resctrl directory and the 'tasks' file
- * exists, just return */
- if (virResctrlAllocIsEmpty(alloc))
- return 0;
-
- if (!alloc->path) {
+ if (!path) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("Cannot add pid to non-existing resctrl allocation"));
+ _("Cannot add pid to non-existing resctrl group"));
return -1;
}

- if (virAsprintf(&tasks, "%s/tasks", alloc->path) < 0)
+ if (virAsprintf(&tasks, "%s/tasks", path) < 0)
return -1;

if (virAsprintf(&pidstr, "%lld", (long long int) pid) < 0)
@@ -2431,6 +2426,19 @@ virResctrlAllocAddPID(virResctrlAllocPtr alloc,


int
+virResctrlAllocAddPID(virResctrlAllocPtr alloc,
+ pid_t pid)
+{
+ /* If allocation is empty, then no resctrl directory and the 'tasks' file
+ * exists, just return */
+ if (virResctrlAllocIsEmpty(alloc))
+ return 0;
+
+ return virResctrlAddPID(alloc->path, pid);
+}
+
+
+int
virResctrlAllocRemove(virResctrlAllocPtr alloc)
{
int ret = 0;
--
2.7.4
John Ferlan
2018-11-13 23:15:45 UTC
Permalink
Post by Wang Huaqiang
The code of adding PID to the allocation could be reused, refactor it
for later reuse.
---
src/util/virresctrl.c | 30 +++++++++++++++++++-----------
1 file changed, 19 insertions(+), 11 deletions(-)
Reviewed-by: John Ferlan <***@redhat.com>

John
Wang Huaqiang
2018-11-12 13:31:48 UTC
Permalink
Signed-off-by: Wang Huaqiang <***@intel.com>
---
docs/news.xml | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/docs/news.xml b/docs/news.xml
index 88774c5..3c84126 100644
--- a/docs/news.xml
+++ b/docs/news.xml
@@ -50,6 +50,17 @@
pvops-based HVM domains.
</description>
</change>
+ <change>
+ <summary>
+ qemu: Added support for CMT (Cache Monitoring Technology)
+ </summary>
+ <description>
+ Introduced cache monitor by <code>monitor</code> element
+ in <code>cachetune</code> for vCPU threads, and added interface
+ to get the cache utilization information through command
+ 'virsh domstats'.
+ </description>
+ </change>
</section>
</release>
<release version="v4.9.0" date="2018-11-04">
--
2.7.4
John Ferlan
2018-11-14 16:18:32 UTC
Permalink
Post by Wang Huaqiang
---
docs/news.xml | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/docs/news.xml b/docs/news.xml
index 88774c5..3c84126 100644
--- a/docs/news.xml
+++ b/docs/news.xml
@@ -50,6 +50,17 @@
pvops-based HVM domains.
</description>
</change>
+ <change>
+ <summary>
+ qemu: Added support for CMT (Cache Monitoring Technology)
+ </summary>
+ <description>
+ Introduced cache monitor by <code>monitor</code> element
s/monitor by/monitoring using the/
Post by Wang Huaqiang
+ in <code>cachetune</code> for vCPU threads, and added interface
s/threads, and added interface/threads. Added interfaces/
Post by Wang Huaqiang
+ to get the cache utilization information through command
s/to get/to get and display/
s/information/statistics/
s/through command/through the command/
Post by Wang Huaqiang
+ 'virsh domstats'.
s/./ via the virConnectGetAllDomainStats API.
Post by Wang Huaqiang
+ </description>
+ </change>
This should be in the 'New features' section - this is definitely not a
bug fix!

John
Post by Wang Huaqiang
</section>
</release>
<release version="v4.9.0" date="2018-11-04">
Wang, Huaqiang
2018-11-15 12:43:54 UTC
Permalink
-----Original Message-----
Sent: Thursday, November 15, 2018 12:19 AM
Subject: Re: [PATCHv8 17/17] docs: Updated news.xml about the CMT
support
Post by Wang Huaqiang
---
docs/news.xml | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/docs/news.xml b/docs/news.xml index 88774c5..3c84126
100644
--- a/docs/news.xml
+++ b/docs/news.xml
@@ -50,6 +50,17 @@
pvops-based HVM domains.
</description>
</change>
+ <change>
+ <summary>
+ qemu: Added support for CMT (Cache Monitoring Technology)
+ </summary>
+ <description>
+ Introduced cache monitor by <code>monitor</code> element
s/monitor by/monitoring using the/
Post by Wang Huaqiang
+ in <code>cachetune</code> for vCPU threads, and added
+ interface
s/threads, and added interface/threads. Added interfaces/
Post by Wang Huaqiang
+ to get the cache utilization information through command
s/to get/to get and display/
s/information/statistics/
s/through command/through the command/
Post by Wang Huaqiang
+ 'virsh domstats'.
s/./ via the virConnectGetAllDomainStats API.
Post by Wang Huaqiang
+ </description>
+ </change>
This should be in the 'New features' section - this is definitely not a bug fix!
Accept all comments and changes thanks for review.
John
Post by Wang Huaqiang
</section>
</release>
<release version="v4.9.0" date="2018-11-04">
Wang Huaqiang
2018-11-12 13:31:39 UTC
Permalink
Add interface for creating the resource monitoring group according
to '@virResctrlMonitor->path'.

Signed-off-by: Wang Huaqiang <***@intel.com>

Reviewed-by: John Ferlan <***@redhat.com>
---
src/libvirt_private.syms | 1 +
src/util/virresctrl.c | 24 ++++++++++++++++++++++++
src/util/virresctrl.h | 4 ++++
3 files changed, 29 insertions(+)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index e175c8b..a878083 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2681,6 +2681,7 @@ virResctrlInfoGetMonitorPrefix;
virResctrlInfoMonFree;
virResctrlInfoNew;
virResctrlMonitorAddPID;
+virResctrlMonitorCreate;
virResctrlMonitorDeterminePath;
virResctrlMonitorNew;

diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 94689dd..3216abe 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -2542,3 +2542,27 @@ virResctrlMonitorAddPID(virResctrlMonitorPtr monitor,
{
return virResctrlAddPID(monitor->path, pid);
}
+
+
+int
+virResctrlMonitorCreate(virResctrlMonitorPtr monitor,
+ const char *machinename)
+{
+ int lockfd = -1;
+ int ret = -1;
+
+ if (!monitor)
+ return 0;
+
+ if (virResctrlMonitorDeterminePath(monitor, machinename) < 0)
+ return -1;
+
+ lockfd = virResctrlLockWrite();
+ if (lockfd < 0)
+ return -1;
+
+ ret = virResctrlCreateGroupPath(monitor->path);
+
+ virResctrlUnlock(lockfd);
+ return ret;
+}
diff --git a/src/util/virresctrl.h b/src/util/virresctrl.h
index 52d283a..76e40a2 100644
--- a/src/util/virresctrl.h
+++ b/src/util/virresctrl.h
@@ -201,4 +201,8 @@ virResctrlMonitorDeterminePath(virResctrlMonitorPtr monitor,
int
virResctrlMonitorAddPID(virResctrlMonitorPtr monitor,
pid_t pid);
+
+int
+virResctrlMonitorCreate(virResctrlMonitorPtr monitor,
+ const char *machinename);
#endif /* __VIR_RESCTRL_H__ */
--
2.7.4
Wang Huaqiang
2018-11-12 13:31:38 UTC
Permalink
The code for creating resctrl allocation group could be reused
for monitoring group, refactor it for reuse in the later patch.

Signed-off-by: Wang Huaqiang <***@intel.com>

Reviewed-by: John Ferlan <***@redhat.com>
---
src/util/virresctrl.c | 37 +++++++++++++++++++++++--------------
1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 7bd52cd..94689dd 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -2316,6 +2316,26 @@ virResctrlAllocDeterminePath(virResctrlAllocPtr alloc,
}


+/* This function creates a resctrl directory in resource control file system,
+ * and the directory path is specified by @path. */
+static int
+virResctrlCreateGroupPath(const char *path)
+{
+ /* Directory exists, return */
+ if (virFileExists(path))
+ return 0;
+
+ if (virFileMakePath(path) < 0) {
+ virReportSystemError(errno,
+ _("Cannot create resctrl directory '%s'"),
+ path);
+ return -1;
+ }
+
+ return 0;
+}
+
+
/* This checks if the directory for the alloc exists. If not it tries to create
* it and apply appropriate alloc settings. */
int
@@ -2344,13 +2364,6 @@ virResctrlAllocCreate(virResctrlInfoPtr resctrl,
if (STREQ(alloc->path, SYSFS_RESCTRL_PATH))
return 0;

- if (virFileExists(alloc->path)) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Path '%s' for resctrl allocation exists"),
- alloc->path);
- goto cleanup;
- }
-
lockfd = virResctrlLockWrite();
if (lockfd < 0)
goto cleanup;
@@ -2358,6 +2371,9 @@ virResctrlAllocCreate(virResctrlInfoPtr resctrl,
if (virResctrlAllocAssign(resctrl, alloc) < 0)
goto cleanup;

+ if (virResctrlCreateGroupPath(alloc->path) < 0)
+ goto cleanup;
+
alloc_str = virResctrlAllocFormat(alloc);
if (!alloc_str)
goto cleanup;
@@ -2365,13 +2381,6 @@ virResctrlAllocCreate(virResctrlInfoPtr resctrl,
if (virAsprintf(&schemata_path, "%s/schemata", alloc->path) < 0)
goto cleanup;

- if (virFileMakePath(alloc->path) < 0) {
- virReportSystemError(errno,
- _("Cannot create resctrl directory '%s'"),
- alloc->path);
- goto cleanup;
- }
-
VIR_DEBUG("Writing resctrl schemata '%s' into '%s'", alloc_str, schemata_path);
if (virFileWriteStr(schemata_path, alloc_str, 0) < 0) {
rmdir(alloc->path);
--
2.7.4
Wang Huaqiang
2018-11-12 13:31:41 UTC
Permalink
Add virResctrlMonitorSetID by leveraging previous refactored patch.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/util/virresctrl.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 4e4831c..ed682c9 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -2581,3 +2581,12 @@ virResctrlMonitorCreate(virResctrlMonitorPtr monitor,
virResctrlUnlock(lockfd);
return ret;
}
+
+
+int
+virResctrlMonitorSetID(virResctrlMonitorPtr monitor,
+ const char *id)
+
+{
+ return virResctrlSetID(&monitor->id, id);
+}
--
2.7.4
John Ferlan
2018-11-13 23:31:38 UTC
Permalink
Post by Wang Huaqiang
Add virResctrlMonitorSetID by leveraging previous refactored patch.
---
src/util/virresctrl.c | 9 +++++++++
1 file changed, 9 insertions(+)
This one won't compile alone - it's missing the virresctrl.h and the
libvirt_private.syms change.

To make life easier - I'm going to merge it with the next patch.

Reviewed-by: John Ferlan <***@redhat.com>

John
Post by Wang Huaqiang
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 4e4831c..ed682c9 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -2581,3 +2581,12 @@ virResctrlMonitorCreate(virResctrlMonitorPtr monitor,
virResctrlUnlock(lockfd);
return ret;
}
+
+
+int
+virResctrlMonitorSetID(virResctrlMonitorPtr monitor,
+ const char *id)
+
+{
+ return virResctrlSetID(&monitor->id, id);
+}
Wang Huaqiang
2018-11-12 13:31:43 UTC
Permalink
Introduced virDomainResctrlNew to do the most part of virDomainResctrlAppend
and move the operation of appending resctrl to @def->resctrls out of
function.

Rather than rely on virDomainResctrlAppend to perform the allocation, move
the onus to the caller and make use of virBitmapNewCopy for @vcpus and
virObjectRef for @alloc, thus removing the need to set each to NULL after the
call.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/conf/domain_conf.c | 56 +++++++++++++++++++++++++++++---------------------
1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 237540b..8433214 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -18948,26 +18948,22 @@ virDomainCachetuneDefParseCache(xmlXPathContextPtr ctxt,
}


-static int
-virDomainResctrlAppend(virDomainDefPtr def,
- xmlNodePtr node,
- virResctrlAllocPtr alloc,
- virBitmapPtr vcpus,
- unsigned int flags)
+static virDomainResctrlDefPtr
+virDomainResctrlNew(xmlNodePtr node,
+ virResctrlAllocPtr alloc,
+ virBitmapPtr vcpus,
+ unsigned int flags)
{
char *vcpus_str = NULL;
char *alloc_id = NULL;
- virDomainResctrlDefPtr tmp_resctrl = NULL;
- int ret = -1;
-
- if (VIR_ALLOC(tmp_resctrl) < 0)
- goto cleanup;
+ virDomainResctrlDefPtr resctrl = NULL;
+ virDomainResctrlDefPtr ret = NULL;

/* We need to format it back because we need to be consistent in the naming
* even when users specify some "sub-optimal" string there. */
vcpus_str = virBitmapFormat(vcpus);
if (!vcpus_str)
- goto cleanup;
+ return NULL;

if (!(flags & VIR_DOMAIN_DEF_PARSE_INACTIVE))
alloc_id = virXMLPropString(node, "id");
@@ -18985,15 +18981,20 @@ virDomainResctrlAppend(virDomainDefPtr def,
if (virResctrlAllocSetID(alloc, alloc_id) < 0)
goto cleanup;

- tmp_resctrl->vcpus = vcpus;
- tmp_resctrl->alloc = alloc;
+ if (VIR_ALLOC(resctrl) < 0)
+ goto cleanup;

- if (VIR_APPEND_ELEMENT(def->resctrls, def->nresctrls, tmp_resctrl) < 0)
+ if (!(resctrl->vcpus = virBitmapNewCopy(vcpus))) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("failed to copy 'vcpus'"));
goto cleanup;
+ }

- ret = 0;
+ resctrl->alloc = virObjectRef(alloc);
+
+ VIR_STEAL_PTR(ret, resctrl);
cleanup:
- virDomainResctrlDefFree(tmp_resctrl);
+ virDomainResctrlDefFree(resctrl);
VIR_FREE(alloc_id);
VIR_FREE(vcpus_str);
return ret;
@@ -19010,6 +19011,7 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
xmlNodePtr *nodes = NULL;
virBitmapPtr vcpus = NULL;
virResctrlAllocPtr alloc = NULL;
+ virDomainResctrlDefPtr resctrl = NULL;
ssize_t i = 0;
int n;
int ret = -1;
@@ -19048,19 +19050,22 @@ virDomainCachetuneDefParse(virDomainDefPtr def,
goto cleanup;
}

+ resctrl = virDomainResctrlNew(node, alloc, vcpus, flags);
+ if (!resctrl)
+ goto cleanup;
+
if (virResctrlAllocIsEmpty(alloc)) {
ret = 0;
goto cleanup;
}

- if (virDomainResctrlAppend(def, node, alloc, vcpus, flags) < 0)
+ if (VIR_APPEND_ELEMENT(def->resctrls, def->nresctrls, resctrl) < 0)
goto cleanup;
- vcpus = NULL;
- alloc = NULL;

ret = 0;
cleanup:
ctxt->node = oldnode;
+ virDomainResctrlDefFree(resctrl);
virObjectUnref(alloc);
virBitmapFree(vcpus);
VIR_FREE(nodes);
@@ -19218,6 +19223,8 @@ virDomainMemorytuneDefParse(virDomainDefPtr def,
xmlNodePtr *nodes = NULL;
virBitmapPtr vcpus = NULL;
virResctrlAllocPtr alloc = NULL;
+ virDomainResctrlDefPtr resctrl = NULL;
+
ssize_t i = 0;
int n;
int ret = -1;
@@ -19262,15 +19269,18 @@ virDomainMemorytuneDefParse(virDomainDefPtr def,
* just update the existing alloc information, which is done in above
* virDomainMemorytuneDefParseMemory */
if (new_alloc) {
- if (virDomainResctrlAppend(def, node, alloc, vcpus, flags) < 0)
+ resctrl = virDomainResctrlNew(node, alloc, vcpus, flags);
+ if (!resctrl)
+ goto cleanup;
+
+ if (VIR_APPEND_ELEMENT(def->resctrls, def->nresctrls, resctrl) < 0)
goto cleanup;
- vcpus = NULL;
- alloc = NULL;
}

ret = 0;
cleanup:
ctxt->node = oldnode;
+ virDomainResctrlDefFree(resctrl);
virObjectUnref(alloc);
virBitmapFree(vcpus);
VIR_FREE(nodes);
--
2.7.4
John Ferlan
2018-11-13 23:32:41 UTC
Permalink
Post by Wang Huaqiang
Introduced virDomainResctrlNew to do the most part of virDomainResctrlAppend
function.
Rather than rely on virDomainResctrlAppend to perform the allocation, move
call.
---
src/conf/domain_conf.c | 56 +++++++++++++++++++++++++++++---------------------
1 file changed, 33 insertions(+), 23 deletions(-)
Reviewed-by: John Ferlan <***@redhat.com>

John
Wang Huaqiang
2018-11-12 13:31:45 UTC
Permalink
Add functions for creating, destroying, reconnecting resctrl
monitor in qemu according to the configuration in domain XML.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/qemu/qemu_process.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 1850923..096fea1 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2597,10 +2597,20 @@ qemuProcessResctrlCreate(virQEMUDriverPtr driver,
return -1;

for (i = 0; i < vm->def->nresctrls; i++) {
+ size_t j = 0;
if (virResctrlAllocCreate(caps->host.resctrl,
vm->def->resctrls[i]->alloc,
priv->machineName) < 0)
goto cleanup;
+
+ for (j = 0; j < vm->def->resctrls[i]->nmonitors; j++) {
+ virDomainResctrlMonDefPtr mon = NULL;
+
+ mon = vm->def->resctrls[i]->monitors[j];
+ if (virResctrlMonitorCreate(mon->instance,
+ priv->machineName) < 0)
+ goto cleanup;
+ }
}

ret = 0;
@@ -5414,6 +5424,7 @@ qemuProcessSetupVcpu(virDomainObjPtr vm,
{
pid_t vcpupid = qemuDomainGetVcpuPid(vm, vcpuid);
virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(vm->def, vcpuid);
+ virDomainResctrlMonDefPtr mon = NULL;
size_t i = 0;

if (qemuProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
@@ -5424,11 +5435,26 @@ qemuProcessSetupVcpu(virDomainObjPtr vm,
return -1;

for (i = 0; i < vm->def->nresctrls; i++) {
+ size_t j = 0;
virDomainResctrlDefPtr ct = vm->def->resctrls[i];

if (virBitmapIsBitSet(ct->vcpus, vcpuid)) {
if (virResctrlAllocAddPID(ct->alloc, vcpupid) < 0)
return -1;
+
+ for (j = 0; j < ct->nmonitors; j++) {
+ mon = ct->monitors[j];
+
+ if (virBitmapEqual(ct->vcpus, mon->vcpus))
+ continue;
+
+ if (virBitmapIsBitSet(mon->vcpus, vcpuid)) {
+ if (virResctrlMonitorAddPID(mon->instance, vcpupid) < 0)
+ return -1;
+ break;
+ }
+ }
+
break;
}
}
@@ -7190,8 +7216,18 @@ void qemuProcessStop(virQEMUDriverPtr driver,
/* Remove resctrl allocation after cgroups are cleaned up which makes it
* kind of safer (although removing the allocation should work even with
* pids in tasks file */
- for (i = 0; i < vm->def->nresctrls; i++)
+ for (i = 0; i < vm->def->nresctrls; i++) {
+ size_t j = 0;
+
+ for (j = 0; j < vm->def->resctrls[i]->nmonitors; j++) {
+ virDomainResctrlMonDefPtr mon = NULL;
+
+ mon = vm->def->resctrls[i]->monitors[j];
+ virResctrlMonitorRemove(mon->instance);
+ }
+
virResctrlAllocRemove(vm->def->resctrls[i]->alloc);
+ }

qemuProcessRemoveDomainStatus(driver, vm);

@@ -7926,9 +7962,20 @@ qemuProcessReconnect(void *opaque)
goto error;

for (i = 0; i < obj->def->nresctrls; i++) {
+ size_t j = 0;
+
if (virResctrlAllocDeterminePath(obj->def->resctrls[i]->alloc,
priv->machineName) < 0)
goto error;
+
+ for (j = 0; j < obj->def->resctrls[i]->nmonitors; j++) {
+ virDomainResctrlMonDefPtr mon = NULL;
+
+ mon = obj->def->resctrls[i]->monitors[j];
+ if (virResctrlMonitorDeterminePath(mon->instance,
+ priv->machineName) < 0)
+ goto error;
+ }
}

/* update domain state XML with possibly updated state in virDomainObj */
--
2.7.4
John Ferlan
2018-11-13 23:46:08 UTC
Permalink
Post by Wang Huaqiang
Add functions for creating, destroying, reconnecting resctrl
monitor in qemu according to the configuration in domain XML.
---
src/qemu/qemu_process.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 48 insertions(+), 1 deletion(-)
Reviewed-by: John Ferlan <***@redhat.com>

John
Wang Huaqiang
2018-11-12 13:31:47 UTC
Permalink
Adding the interface in qemu to report CMT statistic information
through command 'virsh domstats --cpu-total'.

Below is a typical output:

# virsh domstats 1 --cpu-total
Domain: 'ubuntu16.04-base'
...
cpu.cache.monitor.count=2
cpu.cache.monitor.0.name=vcpus_1
cpu.cache.monitor.0.vcpus=1
cpu.cache.monitor.0.bank.count=2
cpu.cache.monitor.0.bank.0.id=0
cpu.cache.monitor.0.bank.0.bytes=4505600
cpu.cache.monitor.0.bank.1.id=1
cpu.cache.monitor.0.bank.1.bytes=5586944
cpu.cache.monitor.1.name=vcpus_4-6
cpu.cache.monitor.1.vcpus=4,5,6
cpu.cache.monitor.1.bank.count=2
cpu.cache.monitor.1.bank.0.id=0
cpu.cache.monitor.1.bank.0.bytes=17571840
cpu.cache.monitor.1.bank.1.id=1
cpu.cache.monitor.1.bank.1.bytes=29106176

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/libvirt-domain.c | 9 +++
src/qemu/qemu_driver.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 207 insertions(+)

diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c
index 7690339..4895f9f 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -11345,6 +11345,15 @@ virConnectGetDomainCapabilities(virConnectPtr conn,
* "cpu.user" - user cpu time spent in nanoseconds as unsigned long long.
* "cpu.system" - system cpu time spent in nanoseconds as unsigned long
* long.
+ * "cpu.cache.monitor.count" - tocal cache monitoring groups
+ * "cpu.cache.monitor.M.name" - name of cache monitoring group 'M'
+ * "cpu.cache.monitor.M.vcpus" - vcpus for cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.count" - total bank number of cache monitoring
+ * group 'M'
+ * "cpu.cache.monitor.M.bank.N.id" - OS assigned cache bank id for cache
+ * 'N' in cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.N.bytes" - monitor's cache occupancy of cache
+ * bank 'N' in cache monitoring group 'M'
*
* VIR_DOMAIN_STATS_BALLOON:
* Return memory balloon device information.
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 89d46ee..d41ae66 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19698,6 +19698,199 @@ typedef enum {
#define HAVE_JOB(flags) ((flags) & QEMU_DOMAIN_STATS_HAVE_JOB)


+typedef struct _virQEMUCpuResMonitorData virQEMUCpuResMonitorData;
+typedef virQEMUCpuResMonitorData *virQEMUCpuResMonitorDataPtr;
+struct _virQEMUCpuResMonitorData{
+ const char *name;
+ char *vcpus;
+ virResctrlMonitorType tag;
+ virResctrlMonitorStatsPtr stats;
+ size_t nstats;
+};
+
+
+static int
+qemuDomainGetCpuResMonitorData(virDomainObjPtr dom,
+ virQEMUCpuResMonitorDataPtr mondata)
+{
+ virDomainResctrlDefPtr resctrl = NULL;
+ size_t i = 0;
+ size_t j = 0;
+ size_t l = 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+
+ for (j = 0; j < resctrl->nmonitors; j++) {
+ virDomainResctrlMonDefPtr domresmon = NULL;
+ virResctrlMonitorPtr monitor = resctrl->monitors[j]->instance;
+
+ domresmon = resctrl->monitors[j];
+ mondata[l].tag = domresmon->tag;
+
+ /* If virBitmapFormat successfully returns an vcpu string, then
+ * mondata[l].vcpus is assigned with an memory space holding it,
+ * let this newly allocated memory buffer to be freed along with
+ * the free of 'mondata' */
+ if (!(mondata[l].vcpus = virBitmapFormat(domresmon->vcpus)))
+ return -1;
+
+ if (!(mondata[l].name = virResctrlMonitorGetID(monitor))) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not get monitor ID"));
+ return -1;
+ }
+
+ if (domresmon->tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ if (virResctrlMonitorGetCacheOccupancy(monitor,
+ &mondata[l].stats,
+ &mondata[l].nstats) < 0)
+ return -1;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Invalid CPU resource type"));
+ return -1;
+ }
+
+ l++;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+qemuDomainGetStatsCpuResMonitorPerTag(virQEMUCpuResMonitorDataPtr mondata,
+ size_t nmondata,
+ virResctrlMonitorType tag,
+ virDomainStatsRecordPtr record,
+ int *maxparams)
+{
+ char param_name[VIR_TYPED_PARAM_FIELD_LENGTH];
+ unsigned int nmonitors = 0;
+ const char *resname = NULL;
+ const char *resnodename = NULL;
+ size_t i = 0;
+
+ for (i = 0; i < nmondata; i++) {
+ if (mondata[i].tag == tag)
+ nmonitors++;
+ }
+
+ if (!nmonitors)
+ return 0;
+
+ if (tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ resname = "cache";
+ resnodename = "bank";
+ } else if (tag == VIR_RESCTRL_MONITOR_TYPE_MEMBW) {
+ resname = "memBW";
+ resnodename = "node";
+ } else {
+ return 0;
+ }
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.count", resname);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name, nmonitors) < 0)
+ return -1;
+
+ for (i = 0; i < nmonitors; i++) {
+ size_t l = 0;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.name", resname, i);
+ if (virTypedParamsAddString(&record->params,
+ &record->nparams,
+ maxparams,
+ param_name,
+ mondata[i].name) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.vcpus", resname, i);
+ if (virTypedParamsAddString(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].vcpus) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.count", resname, i, resnodename);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].nstats) < 0)
+ return -1;
+
+ for (l = 0; l < mondata[i].nstats; l++) {
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.id",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].id) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.bytes",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].val) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+qemuDomainGetStatsCpuResMonitor(virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams)
+{
+ virDomainResctrlDefPtr resctrl = NULL;
+ virQEMUCpuResMonitorDataPtr mondata = NULL;
+ unsigned int nmonitors = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ if (!virDomainObjIsActive(dom))
+ return 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+ nmonitors += resctrl->nmonitors;
+ }
+
+ if (!nmonitors)
+ return 0;
+
+ if (VIR_ALLOC_N(mondata, nmonitors) < 0)
+ return -1;
+
+ if (qemuDomainGetCpuResMonitorData(dom, mondata) < 0)
+ goto cleanup;
+
+ for (i = VIR_RESCTRL_MONITOR_TYPE_UNSUPPORT + 1;
+ i < VIR_RESCTRL_MONITOR_TYPE_LAST; i++) {
+ if (qemuDomainGetStatsCpuResMonitorPerTag(mondata, nmonitors, i,
+ record, maxparams) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ for (i = 0; i < nmonitors; i++)
+ VIR_FREE(mondata[i].vcpus);
+ VIR_FREE(mondata);
+
+ return ret;
+}
+
+
static int
qemuDomainGetStatsCpuCgroup(virDomainObjPtr dom,
virDomainStatsRecordPtr record,
@@ -19747,6 +19940,11 @@ qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
{
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;
+
+ if (qemuDomainGetStatsCpuResMonitor(dom, record, maxparams) < 0)
+ return -1;
+
+ return 0;
}
--
2.7.4
John Ferlan
2018-11-13 23:56:30 UTC
Permalink
Post by Wang Huaqiang
Adding the interface in qemu to report CMT statistic information
through command 'virsh domstats --cpu-total'.
I believe virsh.pod needs an update to list this type of data in the
domstats --cpu-total output.

In any case, I need to drop off for the evening - I'll pick this up in
the morning. You can send an update to this patch and I can merge it in.

The first 15 patches look good with some minor adjustments...

John
Post by Wang Huaqiang
# virsh domstats 1 --cpu-total
Domain: 'ubuntu16.04-base'
...
cpu.cache.monitor.count=2
cpu.cache.monitor.0.name=vcpus_1
cpu.cache.monitor.0.vcpus=1
cpu.cache.monitor.0.bank.count=2
cpu.cache.monitor.0.bank.0.id=0
cpu.cache.monitor.0.bank.0.bytes=4505600
cpu.cache.monitor.0.bank.1.id=1
cpu.cache.monitor.0.bank.1.bytes=5586944
cpu.cache.monitor.1.name=vcpus_4-6
cpu.cache.monitor.1.vcpus=4,5,6
cpu.cache.monitor.1.bank.count=2
cpu.cache.monitor.1.bank.0.id=0
cpu.cache.monitor.1.bank.0.bytes=17571840
cpu.cache.monitor.1.bank.1.id=1
cpu.cache.monitor.1.bank.1.bytes=29106176
---
src/libvirt-domain.c | 9 +++
src/qemu/qemu_driver.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 207 insertions(+)
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c
index 7690339..4895f9f 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -11345,6 +11345,15 @@ virConnectGetDomainCapabilities(virConnectPtr conn,
* "cpu.user" - user cpu time spent in nanoseconds as unsigned long long.
* "cpu.system" - system cpu time spent in nanoseconds as unsigned long
* long.
+ * "cpu.cache.monitor.count" - tocal cache monitoring groups
+ * "cpu.cache.monitor.M.name" - name of cache monitoring group 'M'
+ * "cpu.cache.monitor.M.vcpus" - vcpus for cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.count" - total bank number of cache monitoring
+ * group 'M'
+ * "cpu.cache.monitor.M.bank.N.id" - OS assigned cache bank id for cache
+ * 'N' in cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.N.bytes" - monitor's cache occupancy of cache
+ * bank 'N' in cache monitoring group 'M'
*
* Return memory balloon device information.
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 89d46ee..d41ae66 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19698,6 +19698,199 @@ typedef enum {
#define HAVE_JOB(flags) ((flags) & QEMU_DOMAIN_STATS_HAVE_JOB)
+typedef struct _virQEMUCpuResMonitorData virQEMUCpuResMonitorData;
+typedef virQEMUCpuResMonitorData *virQEMUCpuResMonitorDataPtr;
+struct _virQEMUCpuResMonitorData{
Data {
Post by Wang Huaqiang
+ const char *name;
+ char *vcpus;
+ virResctrlMonitorType tag;
+ virResctrlMonitorStatsPtr stats;
+ size_t nstats;
+};
+
+
+static int
+qemuDomainGetCpuResMonitorData(virDomainObjPtr dom,
+ virQEMUCpuResMonitorDataPtr mondata)
+{
+ virDomainResctrlDefPtr resctrl = NULL;
+ size_t i = 0;
+ size_t j = 0;
+ size_t l = 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+
+ for (j = 0; j < resctrl->nmonitors; j++) {
+ virDomainResctrlMonDefPtr domresmon = NULL;
+ virResctrlMonitorPtr monitor = resctrl->monitors[j]->instance;
+
+ domresmon = resctrl->monitors[j];
+ mondata[l].tag = domresmon->tag;
+
+ /* If virBitmapFormat successfully returns an vcpu string, then
+ * mondata[l].vcpus is assigned with an memory space holding it,
+ * let this newly allocated memory buffer to be freed along with
+ * the free of 'mondata' */
+ if (!(mondata[l].vcpus = virBitmapFormat(domresmon->vcpus)))
+ return -1;
+
+ if (!(mondata[l].name = virResctrlMonitorGetID(monitor))) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not get monitor ID"));
+ return -1;
+ }
+
+ if (domresmon->tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ if (virResctrlMonitorGetCacheOccupancy(monitor,
+ &mondata[l].stats,
+ &mondata[l].nstats) < 0)
+ return -1;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Invalid CPU resource type"));
+ return -1;
+ }
+
+ l++;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+qemuDomainGetStatsCpuResMonitorPerTag(virQEMUCpuResMonitorDataPtr mondata,
+ size_t nmondata,
+ virResctrlMonitorType tag,
+ virDomainStatsRecordPtr record,
+ int *maxparams)
+{
+ char param_name[VIR_TYPED_PARAM_FIELD_LENGTH];
+ unsigned int nmonitors = 0;
+ const char *resname = NULL;
+ const char *resnodename = NULL;
+ size_t i = 0;
+
+ for (i = 0; i < nmondata; i++) {
+ if (mondata[i].tag == tag)
+ nmonitors++;
+ }
+
+ if (!nmonitors)
+ return 0;
+
+ if (tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ resname = "cache";
+ resnodename = "bank";
+ } else if (tag == VIR_RESCTRL_MONITOR_TYPE_MEMBW) {
+ resname = "memBW";
+ resnodename = "node";
+ } else {
+ return 0;
+ }
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.count", resname);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name, nmonitors) < 0)
+ return -1;
+
+ for (i = 0; i < nmonitors; i++) {
+ size_t l = 0;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.name", resname, i);
+ if (virTypedParamsAddString(&record->params,
+ &record->nparams,
+ maxparams,
+ param_name,
+ mondata[i].name) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.vcpus", resname, i);
+ if (virTypedParamsAddString(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].vcpus) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.count", resname, i, resnodename);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].nstats) < 0)
+ return -1;
+
+ for (l = 0; l < mondata[i].nstats; l++) {
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.id",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].id) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.bytes",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].val) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+qemuDomainGetStatsCpuResMonitor(virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams)
+{
+ virDomainResctrlDefPtr resctrl = NULL;
+ virQEMUCpuResMonitorDataPtr mondata = NULL;
+ unsigned int nmonitors = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ if (!virDomainObjIsActive(dom))
+ return 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+ nmonitors += resctrl->nmonitors;
+ }
+
+ if (!nmonitors)
+ return 0;
+
+ if (VIR_ALLOC_N(mondata, nmonitors) < 0)
+ return -1;
+
+ if (qemuDomainGetCpuResMonitorData(dom, mondata) < 0)
+ goto cleanup;
+
+ for (i = VIR_RESCTRL_MONITOR_TYPE_UNSUPPORT + 1;
+ i < VIR_RESCTRL_MONITOR_TYPE_LAST; i++) {
+ if (qemuDomainGetStatsCpuResMonitorPerTag(mondata, nmonitors, i,
+ record, maxparams) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ for (i = 0; i < nmonitors; i++)
+ VIR_FREE(mondata[i].vcpus);
+ VIR_FREE(mondata);
+
+ return ret;
+}
+
+
static int
qemuDomainGetStatsCpuCgroup(virDomainObjPtr dom,
virDomainStatsRecordPtr record,
@@ -19747,6 +19940,11 @@ qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
{
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;
+
+ if (qemuDomainGetStatsCpuResMonitor(dom, record, maxparams) < 0)
+ return -1;
+
+ return 0;
}
Wang, Huaqiang
2018-11-14 02:57:14 UTC
Permalink
-----Original Message-----
Sent: Wednesday, November 14, 2018 7:57 AM
Subject: Re: [PATCHv8 16/17] qemu: Report cache occupancy (CMT) with
domstats
Post by Wang Huaqiang
Adding the interface in qemu to report CMT statistic information
through command 'virsh domstats --cpu-total'.
I believe virsh.pod needs an update to list this type of data in the domstats --
cpu-total output.
Thanks for mentioning, I haven't realized virsh.pod need a fix.

I will send out a standalone patch for patch16 and adding this fix soon.
In any case, I need to drop off for the evening - I'll pick this up in the morning.
You can send an update to this patch and I can merge it in.
The first 15 patches look good with some minor adjustments...
John
Thanks for your review and all your hard work.
Huaqiang
Post by Wang Huaqiang
# virsh domstats 1 --cpu-total
Domain: 'ubuntu16.04-base'
...
cpu.cache.monitor.count=2
cpu.cache.monitor.0.name=vcpus_1
cpu.cache.monitor.0.vcpus=1
cpu.cache.monitor.0.bank.count=2
cpu.cache.monitor.0.bank.0.id=0
cpu.cache.monitor.0.bank.0.bytes=4505600
cpu.cache.monitor.0.bank.1.id=1
cpu.cache.monitor.0.bank.1.bytes=5586944
cpu.cache.monitor.1.name=vcpus_4-6
cpu.cache.monitor.1.vcpus=4,5,6
cpu.cache.monitor.1.bank.count=2
cpu.cache.monitor.1.bank.0.id=0
cpu.cache.monitor.1.bank.0.bytes=17571840
cpu.cache.monitor.1.bank.1.id=1
cpu.cache.monitor.1.bank.1.bytes=29106176
---
src/libvirt-domain.c | 9 +++
src/qemu/qemu_driver.c | 198
+++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 207 insertions(+)
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c index
7690339..4895f9f 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -11345,6 +11345,15 @@ virConnectGetDomainCapabilities(virConnectPtr
conn,
Post by Wang Huaqiang
* "cpu.user" - user cpu time spent in nanoseconds as unsigned long long.
* "cpu.system" - system cpu time spent in nanoseconds as unsigned long
* long.
+ * "cpu.cache.monitor.count" - tocal cache monitoring groups
+ * "cpu.cache.monitor.M.name" - name of cache monitoring group 'M'
+ * "cpu.cache.monitor.M.vcpus" - vcpus for cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.count" - total bank number of cache
monitoring
Post by Wang Huaqiang
+ * group 'M'
+ * "cpu.cache.monitor.M.bank.N.id" - OS assigned cache bank id for cache
+ * 'N' in cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.N.bytes" - monitor's cache occupancy of
cache
Post by Wang Huaqiang
+ * bank 'N' in cache monitoring group 'M'
*
* Return memory balloon device information.
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index
89d46ee..d41ae66 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19698,6 +19698,199 @@ typedef enum { #define HAVE_JOB(flags)
((flags) & QEMU_DOMAIN_STATS_HAVE_JOB)
+typedef struct _virQEMUCpuResMonitorData virQEMUCpuResMonitorData;
+typedef virQEMUCpuResMonitorData *virQEMUCpuResMonitorDataPtr;
struct
Post by Wang Huaqiang
+_virQEMUCpuResMonitorData{
Data {
Post by Wang Huaqiang
+ const char *name;
+ char *vcpus;
+ virResctrlMonitorType tag;
+ virResctrlMonitorStatsPtr stats;
+ size_t nstats;
+};
+
+
+static int
+qemuDomainGetCpuResMonitorData(virDomainObjPtr dom,
+ virQEMUCpuResMonitorDataPtr mondata) {
+ virDomainResctrlDefPtr resctrl = NULL;
+ size_t i = 0;
+ size_t j = 0;
+ size_t l = 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+
+ for (j = 0; j < resctrl->nmonitors; j++) {
+ virDomainResctrlMonDefPtr domresmon = NULL;
+ virResctrlMonitorPtr monitor =
+ resctrl->monitors[j]->instance;
+
+ domresmon = resctrl->monitors[j];
+ mondata[l].tag = domresmon->tag;
+
+ /* If virBitmapFormat successfully returns an vcpu string, then
+ * mondata[l].vcpus is assigned with an memory space holding it,
+ * let this newly allocated memory buffer to be freed along with
+ * the free of 'mondata' */
+ if (!(mondata[l].vcpus = virBitmapFormat(domresmon->vcpus)))
+ return -1;
+
+ if (!(mondata[l].name = virResctrlMonitorGetID(monitor))) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not get monitor ID"));
+ return -1;
+ }
+
+ if (domresmon->tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ if (virResctrlMonitorGetCacheOccupancy(monitor,
+ &mondata[l].stats,
+ &mondata[l].nstats) < 0)
+ return -1;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Invalid CPU resource type"));
+ return -1;
+ }
+
+ l++;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+qemuDomainGetStatsCpuResMonitorPerTag(virQEMUCpuResMonitorDataPtr
mondata,
Post by Wang Huaqiang
+ size_t nmondata,
+ virResctrlMonitorType tag,
+ virDomainStatsRecordPtr record,
+ int *maxparams) {
+ char param_name[VIR_TYPED_PARAM_FIELD_LENGTH];
+ unsigned int nmonitors = 0;
+ const char *resname = NULL;
+ const char *resnodename = NULL;
+ size_t i = 0;
+
+ for (i = 0; i < nmondata; i++) {
+ if (mondata[i].tag == tag)
+ nmonitors++;
+ }
+
+ if (!nmonitors)
+ return 0;
+
+ if (tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ resname = "cache";
+ resnodename = "bank";
+ } else if (tag == VIR_RESCTRL_MONITOR_TYPE_MEMBW) {
+ resname = "memBW";
+ resnodename = "node";
+ } else {
+ return 0;
+ }
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.count", resname);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name, nmonitors) < 0)
+ return -1;
+
+ for (i = 0; i < nmonitors; i++) {
+ size_t l = 0;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.name", resname, i);
+ if (virTypedParamsAddString(&record->params,
+ &record->nparams,
+ maxparams,
+ param_name,
+ mondata[i].name) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.vcpus", resname, i);
+ if (virTypedParamsAddString(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].vcpus) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.count", resname, i, resnodename);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].nstats) < 0)
+ return -1;
+
+ for (l = 0; l < mondata[i].nstats; l++) {
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.id",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].id) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.bytes",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].val) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+qemuDomainGetStatsCpuResMonitor(virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams) {
+ virDomainResctrlDefPtr resctrl = NULL;
+ virQEMUCpuResMonitorDataPtr mondata = NULL;
+ unsigned int nmonitors = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ if (!virDomainObjIsActive(dom))
+ return 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+ nmonitors += resctrl->nmonitors;
+ }
+
+ if (!nmonitors)
+ return 0;
+
+ if (VIR_ALLOC_N(mondata, nmonitors) < 0)
+ return -1;
+
+ if (qemuDomainGetCpuResMonitorData(dom, mondata) < 0)
+ goto cleanup;
+
+ for (i = VIR_RESCTRL_MONITOR_TYPE_UNSUPPORT + 1;
+ i < VIR_RESCTRL_MONITOR_TYPE_LAST; i++) {
+ if (qemuDomainGetStatsCpuResMonitorPerTag(mondata, nmonitors, i,
+ record, maxparams) < 0)
+ goto cleanup;
+ }
+
+ ret = 0;
+ for (i = 0; i < nmonitors; i++)
+ VIR_FREE(mondata[i].vcpus);
+ VIR_FREE(mondata);
+
+ return ret;
+}
+
+
static int
qemuDomainGetStatsCpuCgroup(virDomainObjPtr dom,
ATTRIBUTE_UNUSED, {
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;
+
+ if (qemuDomainGetStatsCpuResMonitor(dom, record, maxparams) < 0)
+ return -1;
+
+ return 0;
}
John Ferlan
2018-11-14 16:18:26 UTC
Permalink
Post by Wang Huaqiang
Adding the interface in qemu to report CMT statistic information
through command 'virsh domstats --cpu-total'.
# virsh domstats 1 --cpu-total
Domain: 'ubuntu16.04-base'
...
cpu.cache.monitor.count=2
cpu.cache.monitor.0.name=vcpus_1
cpu.cache.monitor.0.vcpus=1
cpu.cache.monitor.0.bank.count=2
cpu.cache.monitor.0.bank.0.id=0
cpu.cache.monitor.0.bank.0.bytes=4505600
cpu.cache.monitor.0.bank.1.id=1
cpu.cache.monitor.0.bank.1.bytes=5586944
cpu.cache.monitor.1.name=vcpus_4-6
cpu.cache.monitor.1.vcpus=4,5,6
cpu.cache.monitor.1.bank.count=2
cpu.cache.monitor.1.bank.0.id=0
cpu.cache.monitor.1.bank.0.bytes=17571840
cpu.cache.monitor.1.bank.1.id=1
cpu.cache.monitor.1.bank.1.bytes=29106176
---
src/libvirt-domain.c | 9 +++
src/qemu/qemu_driver.c | 198 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 207 insertions(+)
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c
index 7690339..4895f9f 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -11345,6 +11345,15 @@ virConnectGetDomainCapabilities(virConnectPtr conn,
* "cpu.user" - user cpu time spent in nanoseconds as unsigned long long.
* "cpu.system" - system cpu time spent in nanoseconds as unsigned long
* long.
+ * "cpu.cache.monitor.count" - tocal cache monitoring groups
+ * "cpu.cache.monitor.M.name" - name of cache monitoring group 'M'
+ * "cpu.cache.monitor.M.vcpus" - vcpus for cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.count" - total bank number of cache monitoring
+ * group 'M'
+ * "cpu.cache.monitor.M.bank.N.id" - OS assigned cache bank id for cache
+ * 'N' in cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.N.bytes" - monitor's cache occupancy of cache
+ * bank 'N' in cache monitoring group 'M'
I'll comment on these in your update...
Post by Wang Huaqiang
*
* Return memory balloon device information.
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 89d46ee..d41ae66 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19698,6 +19698,199 @@ typedef enum {
#define HAVE_JOB(flags) ((flags) & QEMU_DOMAIN_STATS_HAVE_JOB)
+typedef struct _virQEMUCpuResMonitorData virQEMUCpuResMonitorData;
+typedef virQEMUCpuResMonitorData *virQEMUCpuResMonitorDataPtr;
+struct _virQEMUCpuResMonitorData{
Data {
Post by Wang Huaqiang
+ const char *name;
+ char *vcpus;
+ virResctrlMonitorType tag;
+ virResctrlMonitorStatsPtr stats;
+ size_t nstats;
+};
+
+
+static int
+qemuDomainGetCpuResMonitorData(virDomainObjPtr dom,
+ virQEMUCpuResMonitorDataPtr mondata)
+{
+ virDomainResctrlDefPtr resctrl = NULL;
+ size_t i = 0;
+ size_t j = 0;
+ size_t l = 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+
+ for (j = 0; j < resctrl->nmonitors; j++) {
+ virDomainResctrlMonDefPtr domresmon = NULL;
+ virResctrlMonitorPtr monitor = resctrl->monitors[j]->instance;
+
+ domresmon = resctrl->monitors[j];
+ mondata[l].tag = domresmon->tag;
Why "l" (BTW: 'k' would be preferred because '1' and 'l' are very
difficult to delineate).
Post by Wang Huaqiang
+
+ /* If virBitmapFormat successfully returns an vcpu string, then
+ * mondata[l].vcpus is assigned with an memory space holding it,
+ * let this newly allocated memory buffer to be freed along with
+ * the free of 'mondata' */
+ if (!(mondata[l].vcpus = virBitmapFormat(domresmon->vcpus)))
+ return -1;
+
+ if (!(mondata[l].name = virResctrlMonitorGetID(monitor))) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not get monitor ID"));
+ return -1;
+ }
+
+ if (domresmon->tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
Something doesn't quite add up with this... Since we're only filling in
types with 'cache' types and erroring out otherwise ... see [1] data
points below...
Post by Wang Huaqiang
+ if (virResctrlMonitorGetCacheOccupancy(monitor,
+ &mondata[l].stats,
+ &mondata[l].nstats) < 0)
+ return -1;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Invalid CPU resource type"));
+ return -1;
[1] Perhaps this should be done up front and "avoided" since this helper
should only care above getting cache stats...

IOW:

for (j = 0; j < resctrl->nmonitors; j++) {
virDomainResctrlMonDefPtr domresmon = NULL;
virResctrlMonitorPtr monitor = resctrl->monitors[j]->instance;

domresmon = resctrl->monitors[j];

if (domresmon->tag != VIR_RESCTRL_MONITOR_TYPE_CACHE)
continue;

This this loop only fetches cache information and then the 'l' (or
preferably 'k') makes more sense
Post by Wang Huaqiang
+ }
+
+ l++;
+ }
+ }
+
+ return 0;
+}
+
+
Might be nice to add comments...
Post by Wang Huaqiang
+static int
+qemuDomainGetStatsCpuResMonitorPerTag(virQEMUCpuResMonitorDataPtr mondata,
+ size_t nmondata,
+ virResctrlMonitorType tag,
+ virDomainStatsRecordPtr record,
+ int *maxparams)
+{
+ char param_name[VIR_TYPED_PARAM_FIELD_LENGTH];
+ unsigned int nmonitors = 0;
+ const char *resname = NULL;
+ const char *resnodename = NULL;
+ size_t i = 0;
+
+ for (i = 0; i < nmondata; i++) {
+ if (mondata[i].tag == tag)
+ nmonitors++;
+ }
+
+ if (!nmonitors)
+ return 0;
I'd place the above two below the following hunk - perf wise and
logically since @tag is the important piece here. However, it may not
be important to compare the [i].tag == tag as long as you've done your
collection of *only* one type. Hope this makes sense!

Thus your code is just:

if (nmondata == 0)
return 0;
Post by Wang Huaqiang
+
+ if (tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ resname = "cache";
+ resnodename = "bank";
+ } else if (tag == VIR_RESCTRL_MONITOR_TYPE_MEMBW) {
+ resname = "memBW";
+ resnodename = "node";
[1] In the current scheme of qemuDomainGetStatsCpuResMonitor, how could
we get this tag?

If your goal was to make a "generic" printing API to be used by both
cpustats and memstats (eventually), then perhaps the name of the helper
should just be qemuDomainGetStatsResMonitor (or *MonitorParams). Since
@tag is a parameter and it's fairly easy to see it's use and the
formatting of the params is based purely on the tag in the generically
output data.

The helper should also be appropriately placed in qemu_driver.c such
that when memBW stats support is added it can just be used and doesn't
need to be moved.
Post by Wang Huaqiang
+ } else {
+ return 0;
+ }
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.count", resname);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name, nmonitors) < 0)
+ return -1;
+
+ for (i = 0; i < nmonitors; i++) {
+ size_t l = 0;
Similar 'l' concerns here use 'j' instead
Post by Wang Huaqiang
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.name", resname, i);
+ if (virTypedParamsAddString(&record->params,
+ &record->nparams,
+ maxparams,
+ param_name,
+ mondata[i].name) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.vcpus", resname, i);
+ if (virTypedParamsAddString(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].vcpus) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.count", resname, i, resnodename);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].nstats) < 0)
+ return -1;
+
+ for (l = 0; l < mondata[i].nstats; l++) {
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.id",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].id) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.bytes",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].val) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+qemuDomainGetStatsCpuResMonitor(virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams)
+{
+ virDomainResctrlDefPtr resctrl = NULL;
+ virQEMUCpuResMonitorDataPtr mondata = NULL;
+ unsigned int nmonitors = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ if (!virDomainObjIsActive(dom))
+ return 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+ nmonitors += resctrl->nmonitors;
[1] To further the above conversation, @nmonitors won't be limited by
cache stats only, but the collection of the @mondata is. So I think here
nmonitors should only include tags w/ *TYPE_CACHE.
Post by Wang Huaqiang
+ }
+
+ if (!nmonitors)
+ return 0;
+
+ if (VIR_ALLOC_N(mondata, nmonitors) < 0)
+ return -1;
+
+ if (qemuDomainGetCpuResMonitorData(dom, mondata) < 0)
You could pass @nmonitors here and use as 'nmondata' in the above
function to "ensure" that the nmonitors looping and filling of mondata
doesn't go beyond bounds - although that shouldn't happen, so it's not a
Post by Wang Huaqiang
+ goto cleanup;
+
+ for (i = VIR_RESCTRL_MONITOR_TYPE_UNSUPPORT + 1;
+ i < VIR_RESCTRL_MONITOR_TYPE_LAST; i++) {
+ if (qemuDomainGetStatsCpuResMonitorPerTag(mondata, nmonitors, i,
+ record, maxparams) < 0)
+ goto cleanup;
+ }
Similar comment here - this is only being called from
qemuDomainGetStatsCpu thus it should only pass
VIR_RESCTRL_MONITOR_TYPE_CACHE and not be run in a loop. If eventually
memBW data was filled in - it would be printed next to cpu-stats data
and that doesn't necessarily make sense, does it?
Post by Wang Huaqiang
+
+ ret = 0;
+ for (i = 0; i < nmonitors; i++)
+ VIR_FREE(mondata[i].vcpus);
+ VIR_FREE(mondata);
+
+ return ret;
+}
+
+
static int
qemuDomainGetStatsCpuCgroup(virDomainObjPtr dom,
virDomainStatsRecordPtr record,
@@ -19747,6 +19940,11 @@ qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
{
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;
+
+ if (qemuDomainGetStatsCpuResMonitor(dom, record, maxparams) < 0)
+ return -1;
+
+ return 0;
This obviously would have some differences based on my comments from
patch15.

Rather than have you post patches 1-15 again just to fix 16, I'll push
1-15 and let you work on 16 and 17. We still have time before the next
release.

Besides once I push, we'll find out fairly quickly whether some other
arch has build/compile problems!

John
Post by Wang Huaqiang
}
Wang, Huaqiang
2018-11-15 12:41:13 UTC
Permalink
-----Original Message-----
Sent: Thursday, November 15, 2018 12:18 AM
Subject: Re: [PATCHv8 16/17] qemu: Report cache occupancy (CMT) with
domstats
Post by Wang Huaqiang
Adding the interface in qemu to report CMT statistic information
through command 'virsh domstats --cpu-total'.
# virsh domstats 1 --cpu-total
Domain: 'ubuntu16.04-base'
...
cpu.cache.monitor.count=2
cpu.cache.monitor.0.name=vcpus_1
cpu.cache.monitor.0.vcpus=1
cpu.cache.monitor.0.bank.count=2
cpu.cache.monitor.0.bank.0.id=0
cpu.cache.monitor.0.bank.0.bytes=4505600
cpu.cache.monitor.0.bank.1.id=1
cpu.cache.monitor.0.bank.1.bytes=5586944
cpu.cache.monitor.1.name=vcpus_4-6
cpu.cache.monitor.1.vcpus=4,5,6
cpu.cache.monitor.1.bank.count=2
cpu.cache.monitor.1.bank.0.id=0
cpu.cache.monitor.1.bank.0.bytes=17571840
cpu.cache.monitor.1.bank.1.id=1
cpu.cache.monitor.1.bank.1.bytes=29106176
---
src/libvirt-domain.c | 9 +++
src/qemu/qemu_driver.c | 198
+++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 207 insertions(+)
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c index
7690339..4895f9f 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -11345,6 +11345,15 @@ virConnectGetDomainCapabilities(virConnectPtr
conn,
Post by Wang Huaqiang
* "cpu.user" - user cpu time spent in nanoseconds as unsigned long long.
* "cpu.system" - system cpu time spent in nanoseconds as unsigned long
* long.
+ * "cpu.cache.monitor.count" - tocal cache monitoring groups
+ * "cpu.cache.monitor.M.name" - name of cache monitoring group 'M'
+ * "cpu.cache.monitor.M.vcpus" - vcpus for cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.count" - total bank number of cache
monitoring
Post by Wang Huaqiang
+ * group 'M'
+ * "cpu.cache.monitor.M.bank.N.id" - OS assigned cache bank id for cache
+ * 'N' in cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.N.bytes" - monitor's cache occupancy of
cache
Post by Wang Huaqiang
+ * bank 'N' in cache monitoring group 'M'
I'll comment on these in your update...
Post by Wang Huaqiang
*
* Return memory balloon device information.
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index
89d46ee..d41ae66 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19698,6 +19698,199 @@ typedef enum { #define HAVE_JOB(flags)
((flags) & QEMU_DOMAIN_STATS_HAVE_JOB)
+typedef struct _virQEMUCpuResMonitorData virQEMUCpuResMonitorData;
+typedef virQEMUCpuResMonitorData *virQEMUCpuResMonitorDataPtr;
struct
Post by Wang Huaqiang
+_virQEMUCpuResMonitorData{
Data {
Got. One space before '{'
Post by Wang Huaqiang
+ const char *name;
+ char *vcpus;
+ virResctrlMonitorType tag;
+ virResctrlMonitorStatsPtr stats;
+ size_t nstats;
+};
+
+
+static int
+qemuDomainGetCpuResMonitorData(virDomainObjPtr dom,
+ virQEMUCpuResMonitorDataPtr mondata) {
+ virDomainResctrlDefPtr resctrl = NULL;
+ size_t i = 0;
+ size_t j = 0;
+ size_t l = 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+
+ for (j = 0; j < resctrl->nmonitors; j++) {
+ virDomainResctrlMonDefPtr domresmon = NULL;
+ virResctrlMonitorPtr monitor =
+ resctrl->monitors[j]->instance;
+
+ domresmon = resctrl->monitors[j];
+ mondata[l].tag = domresmon->tag;
Why "l" (BTW: 'k' would be preferred because '1' and 'l' are very difficult to
delineate).
This 'l' and the occurrences in below will be substituted with 'k'.
Post by Wang Huaqiang
+
+ /* If virBitmapFormat successfully returns an vcpu string, then
+ * mondata[l].vcpus is assigned with an memory space holding it,
+ * let this newly allocated memory buffer to be freed along with
+ * the free of 'mondata' */
+ if (!(mondata[l].vcpus = virBitmapFormat(domresmon->vcpus)))
+ return -1;
+
+ if (!(mondata[l].name = virResctrlMonitorGetID(monitor))) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not get monitor ID"));
+ return -1;
+ }
+
+ if (domresmon->tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
Something doesn't quite add up with this... Since we're only filling in types with
'cache' types and erroring out otherwise ... see [1] data points below...
Post by Wang Huaqiang
+ if (virResctrlMonitorGetCacheOccupancy(monitor,
+ &mondata[l].stats,
+ &mondata[l].nstats) < 0)
+ return -1;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Invalid CPU resource type"));
+ return -1;
[1] Perhaps this should be done up front and "avoided" since this helper should
only care above getting cache stats...
Regarding this error message, it might over-checked since we have made safety check
In building *domresmon->tag.
Will remove the error report lines, since *domresmon->tag could only be
VIR_RESCTRL_MONITOR_TYPE_CACHE currently.

[R1]:
In my plan this function is to be used for VIR_RESCTRL_MONITOR_TYPE_CACHE and
VIR_RESCTRL_MONITOR_TYPE_MEMBW.

Beside this function, qemuDomainGetStatsCpuResMonitorPerTag (name will be refined)
and qemuDomainGetStatsCpuResMonitor (name will be refined) are also planned to support
both VIR_RESCTRL_MONITOR_TYPE_CACHE type monitor and
both VIR_RESCTRL_MONITOR_TYPE_MEMBW monitor even the names are specified with
word 'CpuRes'.

For me memBW monitor is also in scope of 'CPU'. Here is my understanding:

1. CAT/CMT is technology for cache, obviously it is belong to scope of 'CPU'.

2. It may make you confused from the name of MBM, memory bandwidth monitoring, but it get
the memory bandwidth utilization information by tracking L3 cache usage perf CPU thread not by
reading counters of DRAM, so MBM technology is more close to cache than DRAM controller.
This is the reason I think MBM is also a technology in scope of CPU.
With some links for MBM and kernel resctrl for your reference:
https://software.intel.com/en-us/articles/introduction-to-memory-bandwidth-monitoring
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt

Based on above understandings, in naming the three functions that newly introduced I chose name
with word 'cpures' (cpu resource). That I think cache is cpu resource and memBW is cpu resource, the
new functions will handle both resource types. So in this patch my plan is getting cache and memory
bandwidth statistics in one function qemuDomainGetStatsCpuResMonitor, the interface connected
to 'domstats' function is written as:

*** One function solution ***
static int
qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
virDomainObjPtr dom,
virDomainStatsRecordPtr record,
int *maxparams,
unsigned int privflags ATTRIBUTE_UNUSED)
{
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;

/* Get cache and memory bandwidth statistics */ <-- One function for both cache and memBW
if (qemuDomainGetStatsCpuResMonitor(dom, record, maxparams) < 0)
return -1;

return 0;
}

Otherwise, if you still think it is not wise to process memBW information and cache in one function,
they have very obvious boundary, then we'd better add two functions and not using word 'cpu resource'/cpures.
Like these:

*** Two functions solution ***
static int
qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
virDomainObjPtr dom,
virDomainStatsRecordPtr record,
int *maxparams,
unsigned int privflags ATTRIBUTE_UNUSED)
{
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;

/* Get cache statistics */ <-- function only for cache.
if (qemuDomainGetStatsCacheMonitor(dom, record, maxparams) < 0)
return -1;
}

/*A new function for getting memory bandwidth statistics */
static int
qemuDomainGetStatsMemStat(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
virDomainObjPtr dom,
virDomainStatsRecordPtr record,
int *maxparams,
unsigned int privflags ATTRIBUTE_UNUSED)
{

/* Read memBW monitors and output ...

memorybandwidth.monitor.count=...
memorybandwidth.monitor.0.name=...
memorybandwidth.monitor.0.vcpus=...
...
*/
return 0;
}

How do you think? Which one do you prefer, one function interface or two functions interface?
(function names might be refined.)
for (j = 0; j < resctrl->nmonitors; j++) {
virDomainResctrlMonDefPtr domresmon = NULL;
virResctrlMonitorPtr monitor = resctrl->monitors[j]->instance;
domresmon = resctrl->monitors[j];
if (domresmon->tag != VIR_RESCTRL_MONITOR_TYPE_CACHE)
continue;
This this loop only fetches cache information and then the 'l' (or preferably 'k')
makes more sense
Maybe before memBW is introduced, it might be better to make code more clear just
as you wrote, for memBW then we make changes.

But I still need your opinion on using one function interface or interface of two separate functions
for cache and memory statistics.

Now we will add output of 'domstats' something like:
cpu.cache.monitor.count = ...
cpu.cache.monitor.0.name=...
cpu.cache.monitor.0.vcpus=...
...

Is following arrangement for memBW monitor is not acceptable?

cpu.memorybandwidth.monitor.count=...
cpu.memorybandwidth.monitor.0.name=...
cpu.memorybandwidth.monitor.0.vcpus=...
...
Post by Wang Huaqiang
+ }
+
+ l++;
+ }
+ }
+
+ return 0;
+}
+
+
Might be nice to add comments...
Post by Wang Huaqiang
+static int
+qemuDomainGetStatsCpuResMonitorPerTag(virQEMUCpuResMonitorDataPtr
mondata,
Post by Wang Huaqiang
+ size_t nmondata,
+ virResctrlMonitorType tag,
+ virDomainStatsRecordPtr record,
+ int *maxparams) {
+ char param_name[VIR_TYPED_PARAM_FIELD_LENGTH];
+ unsigned int nmonitors = 0;
+ const char *resname = NULL;
+ const char *resnodename = NULL;
+ size_t i = 0;
+
+ for (i = 0; i < nmondata; i++) {
+ if (mondata[i].tag == tag)
+ nmonitors++;
+ }
+
+ if (!nmonitors)
+ return 0;
I'd place the above two below the following hunk - perf wise and logically since
@tag is the important piece here. However, it may not be important to
compare the [i].tag == tag as long as you've done your collection of *only* one
type. Hope this makes sense!
if (nmondata == 0)
return 0;
Yes. At least currently no need to add up *nmoitors again.
Post by Wang Huaqiang
+
+ if (tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ resname = "cache";
+ resnodename = "bank";
+ } else if (tag == VIR_RESCTRL_MONITOR_TYPE_MEMBW) {
+ resname = "memBW";
+ resnodename = "node";
[1] In the current scheme of qemuDomainGetStatsCpuResMonitor, how could
we get this tag?
Another tag, the VIR_RESCTRL_MONITOR_TYPE_MEMBW, will be added to
qemuDomainGetStatsCpuResMonitor if memBW monitor is introduced in my
plan.

But I know you are challenging this plan. I'll make change according to your
suggestion.
If your goal was to make a "generic" printing API to be used by both cpustats
and memstats (eventually), then perhaps the name of the helper should just be
parameter and it's fairly easy to see it's use and the formatting of the params is
based purely on the tag in the generically output data.
The helper should also be appropriately placed in qemu_driver.c such that when
memBW stats support is added it can just be used and doesn't need to be moved.
As I stated in [R1], I look memstats as one kind of CPU resources.

If we chose the two function scheme, things will be considered as you stated.
Post by Wang Huaqiang
+ } else {
+ return 0;
+ }
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.count", resname);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name, nmonitors) < 0)
+ return -1;
+
+ for (i = 0; i < nmonitors; i++) {
+ size_t l = 0;
Similar 'l' concerns here use 'j' instead
'l' will be modified to 'j'.
Post by Wang Huaqiang
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.name", resname, i);
+ if (virTypedParamsAddString(&record->params,
+ &record->nparams,
+ maxparams,
+ param_name,
+ mondata[i].name) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.vcpus", resname, i);
+ if (virTypedParamsAddString(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].vcpus) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.count", resname, i, resnodename);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].nstats) < 0)
+ return -1;
+
+ for (l = 0; l < mondata[i].nstats; l++) {
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.id",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].id) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.bytes",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].stats[l].val) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+qemuDomainGetStatsCpuResMonitor(virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams) {
+ virDomainResctrlDefPtr resctrl = NULL;
+ virQEMUCpuResMonitorDataPtr mondata = NULL;
+ unsigned int nmonitors = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ if (!virDomainObjIsActive(dom))
+ return 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+ nmonitors += resctrl->nmonitors;
should only include tags w/ *TYPE_CACHE.
Post by Wang Huaqiang
+ }
+
+ if (!nmonitors)
+ return 0;
+
+ if (VIR_ALLOC_N(mondata, nmonitors) < 0)
+ return -1;
+
+ if (qemuDomainGetCpuResMonitorData(dom, mondata) < 0)
"ensure" that the nmonitors looping and filling of mondata doesn't go beyond
bounds - although that shouldn't happen, so it's not a requirement as long as the
qemuDomainGetCpuResMonitorData gets all monitor statistics and stored in mondata, in my
design not only for cache statistics.
But since we only have cache monitor currently I'll do as you suggested.
Post by Wang Huaqiang
+ goto cleanup;
+
+ for (i = VIR_RESCTRL_MONITOR_TYPE_UNSUPPORT + 1;
+ i < VIR_RESCTRL_MONITOR_TYPE_LAST; i++) {
+ if (qemuDomainGetStatsCpuResMonitorPerTag(mondata, nmonitors, i,
+ record, maxparams) < 0)
+ goto cleanup;
+ }
Similar comment here - this is only being called from qemuDomainGetStatsCpu
thus it should only pass VIR_RESCTRL_MONITOR_TYPE_CACHE and not be run in
a loop. If eventually memBW data was filled in - it would be printed next to cpu-
stats data and that doesn't necessarily make sense, does it?
Only for cache, OK.
Post by Wang Huaqiang
+
+ ret = 0;
+ for (i = 0; i < nmonitors; i++)
+ VIR_FREE(mondata[i].vcpus);
+ VIR_FREE(mondata);
+
+ return ret;
+}
+
+
static int
qemuDomainGetStatsCpuCgroup(virDomainObjPtr dom,
ATTRIBUTE_UNUSED, {
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;
+
+ if (qemuDomainGetStatsCpuResMonitor(dom, record, maxparams) < 0)
+ return -1;
+
+ return 0;
This obviously would have some differences based on my comments from
patch15.
Got. Still remember what changes you made.
Rather than have you post patches 1-15 again just to fix 16, I'll push
1-15 and let you work on 16 and 17. We still have time before the next release.
I am looking forward for your attitude toward whether I could regard 'memBW monitor'(MBM)
as a kind of CPU resource in libvirt and report the memory bandwidth statistics by command
' virsh domstats --cpu-total'?

I still think MBM might have big difference in comparing with DRAM memory bandwidth, because
the cache hierarchy has been significantly changed since CPU platform Skylake-SP, in Skyalke-SP
not all data to CPU from DRAM is through L3 cache. Data might be read to L2 cache directly from
DRAM.
I am looking for answers regard the difference of MBM observed bandwidth and DRAM bandwidth
from internal team. I will make update once get answers.
Besides once I push, we'll find out fairly quickly whether some other arch has
build/compile problems!
John
Thanks for review.
Huaqiang
Post by Wang Huaqiang
}
Wang, Huaqiang
2018-11-20 13:25:51 UTC
Permalink
-----Original Message-----
From: Wang, Huaqiang
Sent: Thursday, November 15, 2018 8:41 PM
Subject: RE: [PATCHv8 16/17] qemu: Report cache occupancy (CMT) with
domstats
-----Original Message-----
Sent: Thursday, November 15, 2018 12:18 AM
Subject: Re: [PATCHv8 16/17] qemu: Report cache occupancy (CMT) with
domstats
Post by Wang Huaqiang
Adding the interface in qemu to report CMT statistic information
through command 'virsh domstats --cpu-total'.
# virsh domstats 1 --cpu-total
Domain: 'ubuntu16.04-base'
...
cpu.cache.monitor.count=2
cpu.cache.monitor.0.name=vcpus_1
cpu.cache.monitor.0.vcpus=1
cpu.cache.monitor.0.bank.count=2
cpu.cache.monitor.0.bank.0.id=0
cpu.cache.monitor.0.bank.0.bytes=4505600
cpu.cache.monitor.0.bank.1.id=1
cpu.cache.monitor.0.bank.1.bytes=5586944
cpu.cache.monitor.1.name=vcpus_4-6
cpu.cache.monitor.1.vcpus=4,5,6
cpu.cache.monitor.1.bank.count=2
cpu.cache.monitor.1.bank.0.id=0
cpu.cache.monitor.1.bank.0.bytes=17571840
cpu.cache.monitor.1.bank.1.id=1
cpu.cache.monitor.1.bank.1.bytes=29106176
---
src/libvirt-domain.c | 9 +++
src/qemu/qemu_driver.c | 198
+++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 207 insertions(+)
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c index
7690339..4895f9f 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -11345,6 +11345,15 @@
virConnectGetDomainCapabilities(virConnectPtr
conn,
Post by Wang Huaqiang
* "cpu.user" - user cpu time spent in nanoseconds as unsigned long
long.
Post by Wang Huaqiang
* "cpu.system" - system cpu time spent in nanoseconds as unsigned
long
Post by Wang Huaqiang
* long.
+ * "cpu.cache.monitor.count" - tocal cache monitoring groups
+ * "cpu.cache.monitor.M.name" - name of cache monitoring group
'M'
Post by Wang Huaqiang
+ * "cpu.cache.monitor.M.vcpus" - vcpus for cache monitoring group
'M'
Post by Wang Huaqiang
+ * "cpu.cache.monitor.M.bank.count" - total bank number of cache
monitoring
Post by Wang Huaqiang
+ * group 'M'
+ * "cpu.cache.monitor.M.bank.N.id" - OS assigned cache bank id for
cache
Post by Wang Huaqiang
+ * 'N' in cache monitoring group 'M'
+ * "cpu.cache.monitor.M.bank.N.bytes" - monitor's cache occupancy
of
cache
Post by Wang Huaqiang
+ * bank 'N' in cache monitoring group 'M'
I'll comment on these in your update...
Post by Wang Huaqiang
*
* Return memory balloon device information.
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index
89d46ee..d41ae66 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19698,6 +19698,199 @@ typedef enum { #define
HAVE_JOB(flags)
Post by Wang Huaqiang
((flags) & QEMU_DOMAIN_STATS_HAVE_JOB)
+typedef struct _virQEMUCpuResMonitorData
virQEMUCpuResMonitorData;
Post by Wang Huaqiang
+typedef virQEMUCpuResMonitorData
*virQEMUCpuResMonitorDataPtr;
struct
Post by Wang Huaqiang
+_virQEMUCpuResMonitorData{
Data {
Got. One space before '{'
Post by Wang Huaqiang
+ const char *name;
+ char *vcpus;
+ virResctrlMonitorType tag;
+ virResctrlMonitorStatsPtr stats;
+ size_t nstats;
+};
+
+
+static int
+qemuDomainGetCpuResMonitorData(virDomainObjPtr dom,
+ virQEMUCpuResMonitorDataPtr mondata) {
+ virDomainResctrlDefPtr resctrl = NULL;
+ size_t i = 0;
+ size_t j = 0;
+ size_t l = 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+
+ for (j = 0; j < resctrl->nmonitors; j++) {
+ virDomainResctrlMonDefPtr domresmon = NULL;
+ virResctrlMonitorPtr monitor =
+ resctrl->monitors[j]->instance;
+
+ domresmon = resctrl->monitors[j];
+ mondata[l].tag = domresmon->tag;
Why "l" (BTW: 'k' would be preferred because '1' and 'l' are very
difficult to delineate).
This 'l' and the occurrences in below will be substituted with 'k'.
Post by Wang Huaqiang
+
+ /* If virBitmapFormat successfully returns an vcpu string, then
+ * mondata[l].vcpus is assigned with an memory space holding
it,
Post by Wang Huaqiang
+ * let this newly allocated memory buffer to be freed along with
+ * the free of 'mondata' */
+ if (!(mondata[l].vcpus = virBitmapFormat(domresmon->vcpus)))
+ return -1;
+
+ if (!(mondata[l].name = virResctrlMonitorGetID(monitor))) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Could not get monitor ID"));
+ return -1;
+ }
+
+ if (domresmon->tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
Something doesn't quite add up with this... Since we're only filling
in types with 'cache' types and erroring out otherwise ... see [1] data
points below...
Post by Wang Huaqiang
+ if (virResctrlMonitorGetCacheOccupancy(monitor,
+ &mondata[l].stats,
+ &mondata[l].nstats) < 0)
+ return -1;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Invalid CPU resource type"));
+ return -1;
[1] Perhaps this should be done up front and "avoided" since this
helper should only care above getting cache stats...
Regarding this error message, it might over-checked since we have made
safety check In building *domresmon->tag.
Will remove the error report lines, since *domresmon->tag could only be
VIR_RESCTRL_MONITOR_TYPE_CACHE currently.
In my plan this function is to be used for
VIR_RESCTRL_MONITOR_TYPE_CACHE and
VIR_RESCTRL_MONITOR_TYPE_MEMBW.
Beside this function, qemuDomainGetStatsCpuResMonitorPerTag (name
will be refined) and qemuDomainGetStatsCpuResMonitor (name will be
refined) are also planned to support both
VIR_RESCTRL_MONITOR_TYPE_CACHE type monitor and both
VIR_RESCTRL_MONITOR_TYPE_MEMBW monitor even the names are
specified with word 'CpuRes'.
1. CAT/CMT is technology for cache, obviously it is belong to scope of 'CPU'.
2. It may make you confused from the name of MBM, memory bandwidth
monitoring, but it get the memory bandwidth utilization information by
tracking L3 cache usage perf CPU thread not by reading counters of DRAM,
so MBM technology is more close to cache than DRAM controller.
This is the reason I think MBM is also a technology in scope of CPU.
https://software.intel.com/en-us/articles/introduction-to-memory-
bandwidth-monitoring
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
Based on above understandings, in naming the three functions that newly
introduced I chose name with word 'cpures' (cpu resource). That I think
cache is cpu resource and memBW is cpu resource, the new functions will
handle both resource types. So in this patch my plan is getting cache and
memory bandwidth statistics in one function
qemuDomainGetStatsCpuResMonitor, the interface connected to
*** One function solution ***
static int
qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
virDomainObjPtr dom,
virDomainStatsRecordPtr record,
int *maxparams,
unsigned int privflags ATTRIBUTE_UNUSED) {
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;
/* Get cache and memory bandwidth statistics */ <--
One function for both cache and memBW
if (qemuDomainGetStatsCpuResMonitor(dom, record, maxparams) < 0)
return -1;
return 0;
}
Otherwise, if you still think it is not wise to process memBW information
and cache in one function, they have very obvious boundary, then we'd
better add two functions and not using word 'cpu resource'/cpures.
*** Two functions solution ***
static int
qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
virDomainObjPtr dom,
virDomainStatsRecordPtr record,
int *maxparams,
unsigned int privflags ATTRIBUTE_UNUSED) {
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;
/* Get cache statistics */
<-- function only for cache.
if (qemuDomainGetStatsCacheMonitor(dom, record, maxparams) < 0)
return -1;
}
/*A new function for getting memory bandwidth statistics */ static int
qemuDomainGetStatsMemStat(virQEMUDriverPtr driver
ATTRIBUTE_UNUSED,
virDomainObjPtr dom,
virDomainStatsRecordPtr record,
int *maxparams,
unsigned int privflags ATTRIBUTE_UNUSED) {
/* Read memBW monitors and output ...
memorybandwidth.monitor.count=...
memorybandwidth.monitor.0.name=...
memorybandwidth.monitor.0.vcpus=...
...
*/
return 0;
}
How do you think? Which one do you prefer, one function interface or two
functions interface?
(function names might be refined.)
for (j = 0; j < resctrl->nmonitors; j++) {
virDomainResctrlMonDefPtr domresmon = NULL;
virResctrlMonitorPtr monitor =
resctrl->monitors[j]->instance;
domresmon = resctrl->monitors[j];
if (domresmon->tag != VIR_RESCTRL_MONITOR_TYPE_CACHE)
continue;
This this loop only fetches cache information and then the 'l' (or
preferably 'k') makes more sense
Maybe before memBW is introduced, it might be better to make code more
clear just as you wrote, for memBW then we make changes.
But I still need your opinion on using one function interface or interface of
two separate functions for cache and memory statistics.
cpu.cache.monitor.count = ...
cpu.cache.monitor.0.name=...
cpu.cache.monitor.0.vcpus=...
...
Is following arrangement for memBW monitor is not acceptable?
cpu.memorybandwidth.monitor.count=...
cpu.memorybandwidth.monitor.0.name=...
cpu.memorybandwidth.monitor.0.vcpus=...
...
I have had some discussion with intel engineers about the MBM
and CMT, it is known that these are very similar technologies in underlying
hardware but tracking difference set of CPU internal counters.
For CMT, the concept is very clear, it is monitoring last level
cache and report the cache usage. For MBM it is reports memory
bandwidth by tracking counters from cache size, it reflects the
DRAM bandwidth, so it will be no problem to call it as memory
bandwidth.

And I'll submit the updated code for patch16 and 17 and addressing
your review comments and suggestions.

Since cache monitor is the only monitor we support now, just
as you pointed out, I'll not involve memBW monitor tag to avoid
confusion.

For MBM patches, I'll submit RFC patches to raise discussion about its
interface.

I'll also rename the new added data structure name and function
names, please have a review.

BR
Huaqiang
Post by Wang Huaqiang
+ }
+
+ l++;
+ }
+ }
+
+ return 0;
+}
+
+
Might be nice to add comments...
Post by Wang Huaqiang
+static int
+qemuDomainGetStatsCpuResMonitorPerTag(virQEMUCpuResMonitorData
Ptr
mondata,
Post by Wang Huaqiang
+ size_t nmondata,
+ virResctrlMonitorType tag,
+ virDomainStatsRecordPtr record,
+ int *maxparams) {
+ char param_name[VIR_TYPED_PARAM_FIELD_LENGTH];
+ unsigned int nmonitors = 0;
+ const char *resname = NULL;
+ const char *resnodename = NULL;
+ size_t i = 0;
+
+ for (i = 0; i < nmondata; i++) {
+ if (mondata[i].tag == tag)
+ nmonitors++;
+ }
+
+ if (!nmonitors)
+ return 0;
I'd place the above two below the following hunk - perf wise and
be important to compare the [i].tag == tag as long as you've done your
collection of *only* one type. Hope this makes sense!
if (nmondata == 0)
return 0;
Yes. At least currently no need to add up *nmoitors again.
Post by Wang Huaqiang
+
+ if (tag == VIR_RESCTRL_MONITOR_TYPE_CACHE) {
+ resname = "cache";
+ resnodename = "bank";
+ } else if (tag == VIR_RESCTRL_MONITOR_TYPE_MEMBW) {
+ resname = "memBW";
+ resnodename = "node";
[1] In the current scheme of qemuDomainGetStatsCpuResMonitor, how
could we get this tag?
Another tag, the VIR_RESCTRL_MONITOR_TYPE_MEMBW, will be added to
qemuDomainGetStatsCpuResMonitor if memBW monitor is introduced in
my plan.
But I know you are challenging this plan. I'll make change according to your
suggestion.
If your goal was to make a "generic" printing API to be used by both
cpustats and memstats (eventually), then perhaps the name of the
helper should just be qemuDomainGetStatsResMonitor (or
it's use and the formatting of the params is based purely on the tag in the
generically output data.
The helper should also be appropriately placed in qemu_driver.c such
that when memBW stats support is added it can just be used and doesn't
need to be moved.
As I stated in [R1], I look memstats as one kind of CPU resources.
If we chose the two function scheme, things will be considered as you stated.
Post by Wang Huaqiang
+ } else {
+ return 0;
+ }
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.count", resname);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name, nmonitors) < 0)
+ return -1;
+
+ for (i = 0; i < nmonitors; i++) {
+ size_t l = 0;
Similar 'l' concerns here use 'j' instead
'l' will be modified to 'j'.
Post by Wang Huaqiang
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.name", resname, i);
+ if (virTypedParamsAddString(&record->params,
+ &record->nparams,
+ maxparams,
+ param_name,
+ mondata[i].name) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.vcpus", resname, i);
+ if (virTypedParamsAddString(&record->params, &record-
nparams,
Post by Wang Huaqiang
+ maxparams, param_name,
+ mondata[i].vcpus) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.count", resname, i, resnodename);
+ if (virTypedParamsAddUInt(&record->params, &record->nparams,
+ maxparams, param_name,
+ mondata[i].nstats) < 0)
+ return -1;
+
+ for (l = 0; l < mondata[i].nstats; l++) {
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.id",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record-
nparams,
Post by Wang Huaqiang
+ maxparams, param_name,
+ mondata[i].stats[l].id) < 0)
+ return -1;
+
+ snprintf(param_name, VIR_TYPED_PARAM_FIELD_LENGTH,
+ "cpu.%s.monitor.%zd.%s.%zd.bytes",
+ resname, i, resnodename, l);
+ if (virTypedParamsAddUInt(&record->params, &record-
nparams,
Post by Wang Huaqiang
+ maxparams, param_name,
+ mondata[i].stats[l].val) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+qemuDomainGetStatsCpuResMonitor(virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams) {
+ virDomainResctrlDefPtr resctrl = NULL;
+ virQEMUCpuResMonitorDataPtr mondata = NULL;
+ unsigned int nmonitors = 0;
+ size_t i = 0;
+ int ret = -1;
+
+ if (!virDomainObjIsActive(dom))
+ return 0;
+
+ for (i = 0; i < dom->def->nresctrls; i++) {
+ resctrl = dom->def->resctrls[i];
+ nmonitors += resctrl->nmonitors;
here nmonitors should only include tags w/ *TYPE_CACHE.
Post by Wang Huaqiang
+ }
+
+ if (!nmonitors)
+ return 0;
+
+ if (VIR_ALLOC_N(mondata, nmonitors) < 0)
+ return -1;
+
+ if (qemuDomainGetCpuResMonitorData(dom, mondata) < 0)
function to "ensure" that the nmonitors looping and filling of mondata
doesn't go beyond bounds - although that shouldn't happen, so it's not
qemuDomainGetCpuResMonitorData gets all monitor statistics and stored
in mondata, in my design not only for cache statistics.
But since we only have cache monitor currently I'll do as you suggested.
Post by Wang Huaqiang
+ goto cleanup;
+
+ for (i = VIR_RESCTRL_MONITOR_TYPE_UNSUPPORT + 1;
+ i < VIR_RESCTRL_MONITOR_TYPE_LAST; i++) {
+ if (qemuDomainGetStatsCpuResMonitorPerTag(mondata,
nmonitors, i,
Post by Wang Huaqiang
+ record, maxparams) < 0)
+ goto cleanup;
+ }
Similar comment here - this is only being called from
qemuDomainGetStatsCpu thus it should only pass
VIR_RESCTRL_MONITOR_TYPE_CACHE and not be run in a loop. If
eventually
memBW data was filled in - it would be printed next to cpu- stats data
and that doesn't necessarily make sense, does it?
Only for cache, OK.
Post by Wang Huaqiang
+
+ ret = 0;
+ for (i = 0; i < nmonitors; i++)
+ VIR_FREE(mondata[i].vcpus);
+ VIR_FREE(mondata);
+
+ return ret;
+}
+
+
static int
qemuDomainGetStatsCpuCgroup(virDomainObjPtr dom,
driver
Post by Wang Huaqiang
ATTRIBUTE_UNUSED, {
if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
return -1;
+
+ if (qemuDomainGetStatsCpuResMonitor(dom, record, maxparams) <
0)
Post by Wang Huaqiang
+ return -1;
+
+ return 0;
This obviously would have some differences based on my comments from
patch15.
Got. Still remember what changes you made.
Rather than have you post patches 1-15 again just to fix 16, I'll push
1-15 and let you work on 16 and 17. We still have time before the next
release.
I am looking forward for your attitude toward whether I could regard
'memBW monitor'(MBM) as a kind of CPU resource in libvirt and report the
memory bandwidth statistics by command ' virsh domstats --cpu-total'?
I still think MBM might have big difference in comparing with DRAM
memory bandwidth, because the cache hierarchy has been significantly
changed since CPU platform Skylake-SP, in Skyalke-SP not all data to CPU
from DRAM is through L3 cache. Data might be read to L2 cache directly
from DRAM.
I am looking for answers regard the difference of MBM observed bandwidth
and DRAM bandwidth from internal team. I will make update once get
answers.
Besides once I push, we'll find out fairly quickly whether some other
arch has build/compile problems!
John
Thanks for review.
Huaqiang
Post by Wang Huaqiang
}
Wang Huaqiang
2018-11-12 13:31:46 UTC
Permalink
Refactoring qemuDomainGetStatsCpu, make it possible to add
more CPU statistics.

Signed-off-by: Wang Huaqiang <***@intel.com>
---
src/qemu/qemu_driver.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 09e04b8..89d46ee 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19699,11 +19699,9 @@ typedef enum {


static int
-qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
- virDomainObjPtr dom,
- virDomainStatsRecordPtr record,
- int *maxparams,
- unsigned int privflags ATTRIBUTE_UNUSED)
+qemuDomainGetStatsCpuCgroup(virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams)
{
qemuDomainObjPrivatePtr priv = dom->privateData;
unsigned long long cpu_time = 0;
@@ -19739,6 +19737,19 @@ qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
return 0;
}

+
+static int
+qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
+ virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams,
+ unsigned int privflags ATTRIBUTE_UNUSED)
+{
+ if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
+ return -1;
+}
+
+
static int
qemuDomainGetStatsBalloon(virQEMUDriverPtr driver,
virDomainObjPtr dom,
--
2.7.4
John Ferlan
2018-11-13 23:46:13 UTC
Permalink
Post by Wang Huaqiang
Refactoring qemuDomainGetStatsCpu, make it possible to add
more CPU statistics.
---
src/qemu/qemu_driver.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 09e04b8..89d46ee 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -19699,11 +19699,9 @@ typedef enum {
static int
-qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
- virDomainObjPtr dom,
- virDomainStatsRecordPtr record,
- int *maxparams,
- unsigned int privflags ATTRIBUTE_UNUSED)
+qemuDomainGetStatsCpuCgroup(virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams)
{
qemuDomainObjPrivatePtr priv = dom->privateData;
unsigned long long cpu_time = 0;
@@ -19739,6 +19737,19 @@ qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
return 0;
}
+
+static int
+qemuDomainGetStatsCpu(virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
+ virDomainObjPtr dom,
+ virDomainStatsRecordPtr record,
+ int *maxparams,
+ unsigned int privflags ATTRIBUTE_UNUSED)
+{
+ if (qemuDomainGetStatsCpuCgroup(dom, record, maxparams) < 0)
+ return -1;
This should just be:

return qemuDomainGetStatsCpuCgroup(dom, record, maxparams);

yes, causes a merge conflict in next patch, but that one at least has
the return 0 this one would have needed...

Reviewed-by: John Ferlan <***@redhat.com>

John
Post by Wang Huaqiang
+}
+
+
static int
qemuDomainGetStatsBalloon(virQEMUDriverPtr driver,
virDomainObjPtr dom,
John Ferlan
2018-11-13 23:11:06 UTC
Permalink
Post by Wang Huaqiang
Cache Monitoring Technology (aka CMT) provides the capability
to report cache utilization information of system task.
This patch introduces the concept of resctrl monitor through
data structure virResctrlMonitor.
---
src/libvirt_private.syms | 1 +
src/util/virresctrl.c | 80 ++++++++++++++++++++++++++++++++++++++++++++----
src/util/virresctrl.h | 9 ++++++
3 files changed, 84 insertions(+), 6 deletions(-)
[...]
Post by Wang Huaqiang
# util/virrotatingfile.h
diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index 7339e9b..e3c84a3 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
[...]
Post by Wang Huaqiang
+ *
+ * =====Cache monitoring technology (CMT)=====
+ *
+ * Cache monitoring technology is used to perceive how many cache the process
+ * is using actually. virResctrlMonitor represents the resource control
+ * monitoring group, it is supported to monitor resource utilization
+ * information on granularity of vcpu.
+ *
+ * From hardware perspective, cache monitoring technology (CMT), memory
From a hardware
+ * bandwidth technology (MBM), as well as the CAT and MBA, are all orthogonal
+ * features. The monitor will be created under the scope of default resctrl
+ * group if no specific CAT or MBA entries are provided for the guest."
*/
Reviewed-by: John Ferlan <***@redhat.com>

John

[...]
Loading...